1/*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
49 *  School of Computer Science
50 *  Carnegie Mellon University
51 *  Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 *	File:	vm/vm_page.c
60 *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 *	Resident memory management module.
63 */
64
65#include <debug.h>
66#include <libkern/OSAtomic.h>
67
68#include <mach/clock_types.h>
69#include <mach/vm_prot.h>
70#include <mach/vm_statistics.h>
71#include <mach/sdt.h>
72#include <kern/counters.h>
73#include <kern/sched_prim.h>
74#include <kern/task.h>
75#include <kern/thread.h>
76#include <kern/kalloc.h>
77#include <kern/zalloc.h>
78#include <kern/xpr.h>
79#include <kern/ledger.h>
80#include <vm/pmap.h>
81#include <vm/vm_init.h>
82#include <vm/vm_map.h>
83#include <vm/vm_page.h>
84#include <vm/vm_pageout.h>
85#include <vm/vm_kern.h>			/* kernel_memory_allocate() */
86#include <kern/misc_protos.h>
87#include <zone_debug.h>
88#include <vm/cpm.h>
89#include <pexpert/pexpert.h>
90
91#include <vm/vm_protos.h>
92#include <vm/memory_object.h>
93#include <vm/vm_purgeable_internal.h>
94#include <vm/vm_compressor.h>
95
96#if CONFIG_PHANTOM_CACHE
97#include <vm/vm_phantom_cache.h>
98#endif
99
100#include <IOKit/IOHibernatePrivate.h>
101
102#include <sys/kdebug.h>
103
104boolean_t	hibernate_cleaning_in_progress = FALSE;
105boolean_t	vm_page_free_verify = TRUE;
106
107uint32_t	vm_lopage_free_count = 0;
108uint32_t	vm_lopage_free_limit = 0;
109uint32_t	vm_lopage_lowater    = 0;
110boolean_t	vm_lopage_refill = FALSE;
111boolean_t	vm_lopage_needed = FALSE;
112
113lck_mtx_ext_t	vm_page_queue_lock_ext;
114lck_mtx_ext_t	vm_page_queue_free_lock_ext;
115lck_mtx_ext_t	vm_purgeable_queue_lock_ext;
116
117int		speculative_age_index = 0;
118int		speculative_steal_index = 0;
119struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
120
121
122__private_extern__ void		vm_page_init_lck_grp(void);
123
124static void		vm_page_free_prepare(vm_page_t	page);
125static vm_page_t	vm_page_grab_fictitious_common(ppnum_t phys_addr);
126
127
128
129
130/*
131 *	Associated with page of user-allocatable memory is a
132 *	page structure.
133 */
134
135/*
136 *	These variables record the values returned by vm_page_bootstrap,
137 *	for debugging purposes.  The implementation of pmap_steal_memory
138 *	and pmap_startup here also uses them internally.
139 */
140
141vm_offset_t virtual_space_start;
142vm_offset_t virtual_space_end;
143uint32_t	vm_page_pages;
144
145/*
146 *	The vm_page_lookup() routine, which provides for fast
147 *	(virtual memory object, offset) to page lookup, employs
148 *	the following hash table.  The vm_page_{insert,remove}
149 *	routines install and remove associations in the table.
150 *	[This table is often called the virtual-to-physical,
151 *	or VP, table.]
152 */
153typedef struct {
154	vm_page_packed_t page_list;
155#if	MACH_PAGE_HASH_STATS
156	int		cur_count;		/* current count */
157	int		hi_count;		/* high water mark */
158#endif /* MACH_PAGE_HASH_STATS */
159} vm_page_bucket_t;
160
161
162#define BUCKETS_PER_LOCK	16
163
164vm_page_bucket_t *vm_page_buckets;		/* Array of buckets */
165unsigned int	vm_page_bucket_count = 0;	/* How big is array? */
166unsigned int	vm_page_hash_mask;		/* Mask for hash function */
167unsigned int	vm_page_hash_shift;		/* Shift for hash function */
168uint32_t	vm_page_bucket_hash;		/* Basic bucket hash */
169unsigned int	vm_page_bucket_lock_count = 0;		/* How big is array of locks? */
170
171lck_spin_t	*vm_page_bucket_locks;
172
173#if VM_PAGE_BUCKETS_CHECK
174boolean_t vm_page_buckets_check_ready = FALSE;
175#if VM_PAGE_FAKE_BUCKETS
176vm_page_bucket_t *vm_page_fake_buckets;	/* decoy buckets */
177vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
178#endif /* VM_PAGE_FAKE_BUCKETS */
179#endif /* VM_PAGE_BUCKETS_CHECK */
180
181#if	MACH_PAGE_HASH_STATS
182/* This routine is only for debug.  It is intended to be called by
183 * hand by a developer using a kernel debugger.  This routine prints
184 * out vm_page_hash table statistics to the kernel debug console.
185 */
186void
187hash_debug(void)
188{
189	int	i;
190	int	numbuckets = 0;
191	int	highsum = 0;
192	int	maxdepth = 0;
193
194	for (i = 0; i < vm_page_bucket_count; i++) {
195		if (vm_page_buckets[i].hi_count) {
196			numbuckets++;
197			highsum += vm_page_buckets[i].hi_count;
198			if (vm_page_buckets[i].hi_count > maxdepth)
199				maxdepth = vm_page_buckets[i].hi_count;
200		}
201	}
202	printf("Total number of buckets: %d\n", vm_page_bucket_count);
203	printf("Number used buckets:     %d = %d%%\n",
204		numbuckets, 100*numbuckets/vm_page_bucket_count);
205	printf("Number unused buckets:   %d = %d%%\n",
206		vm_page_bucket_count - numbuckets,
207		100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
208	printf("Sum of bucket max depth: %d\n", highsum);
209	printf("Average bucket depth:    %d.%2d\n",
210		highsum/vm_page_bucket_count,
211		highsum%vm_page_bucket_count);
212	printf("Maximum bucket depth:    %d\n", maxdepth);
213}
214#endif /* MACH_PAGE_HASH_STATS */
215
216/*
217 *	The virtual page size is currently implemented as a runtime
218 *	variable, but is constant once initialized using vm_set_page_size.
219 *	This initialization must be done in the machine-dependent
220 *	bootstrap sequence, before calling other machine-independent
221 *	initializations.
222 *
223 *	All references to the virtual page size outside this
224 *	module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
225 *	constants.
226 */
227vm_size_t	page_size  = PAGE_SIZE;
228vm_size_t	page_mask  = PAGE_MASK;
229int		page_shift = PAGE_SHIFT;
230
231/*
232 *	Resident page structures are initialized from
233 *	a template (see vm_page_alloc).
234 *
235 *	When adding a new field to the virtual memory
236 *	object structure, be sure to add initialization
237 *	(see vm_page_bootstrap).
238 */
239struct vm_page	vm_page_template;
240
241vm_page_t	vm_pages = VM_PAGE_NULL;
242unsigned int	vm_pages_count = 0;
243ppnum_t		vm_page_lowest = 0;
244
245/*
246 *	Resident pages that represent real memory
247 *	are allocated from a set of free lists,
248 *	one per color.
249 */
250unsigned int	vm_colors;
251unsigned int    vm_color_mask;			/* mask is == (vm_colors-1) */
252unsigned int	vm_cache_geometry_colors = 0;	/* set by hw dependent code during startup */
253unsigned int	vm_free_magazine_refill_limit = 0;
254queue_head_t	vm_page_queue_free[MAX_COLORS];
255unsigned int	vm_page_free_wanted;
256unsigned int	vm_page_free_wanted_privileged;
257unsigned int	vm_page_free_count;
258unsigned int	vm_page_fictitious_count;
259
260/*
261 *	Occasionally, the virtual memory system uses
262 *	resident page structures that do not refer to
263 *	real pages, for example to leave a page with
264 *	important state information in the VP table.
265 *
266 *	These page structures are allocated the way
267 *	most other kernel structures are.
268 */
269zone_t	vm_page_zone;
270vm_locks_array_t vm_page_locks;
271decl_lck_mtx_data(,vm_page_alloc_lock)
272lck_mtx_ext_t vm_page_alloc_lock_ext;
273
274unsigned int io_throttle_zero_fill;
275
276unsigned int	vm_page_local_q_count = 0;
277unsigned int	vm_page_local_q_soft_limit = 250;
278unsigned int	vm_page_local_q_hard_limit = 500;
279struct vplq     *vm_page_local_q = NULL;
280
281/* N.B. Guard and fictitious pages must not
282 * be assigned a zero phys_page value.
283 */
284/*
285 *	Fictitious pages don't have a physical address,
286 *	but we must initialize phys_page to something.
287 *	For debugging, this should be a strange value
288 *	that the pmap module can recognize in assertions.
289 */
290ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
291
292/*
293 *	Guard pages are not accessible so they don't
294 * 	need a physical address, but we need to enter
295 *	one in the pmap.
296 *	Let's make it recognizable and make sure that
297 *	we don't use a real physical page with that
298 *	physical address.
299 */
300ppnum_t vm_page_guard_addr = (ppnum_t) -2;
301
302/*
303 *	Resident page structures are also chained on
304 *	queues that are used by the page replacement
305 *	system (pageout daemon).  These queues are
306 *	defined here, but are shared by the pageout
307 *	module.  The inactive queue is broken into
308 *	file backed and anonymous for convenience as the
309 *	pageout daemon often assignes a higher
310 *	importance to anonymous pages (less likely to pick)
311 */
312queue_head_t	vm_page_queue_active;
313queue_head_t	vm_page_queue_inactive;
314queue_head_t	vm_page_queue_anonymous;	/* inactive memory queue for anonymous pages */
315queue_head_t	vm_page_queue_throttled;
316
317unsigned int	vm_page_active_count;
318unsigned int	vm_page_inactive_count;
319unsigned int	vm_page_anonymous_count;
320unsigned int	vm_page_throttled_count;
321unsigned int	vm_page_speculative_count;
322unsigned int	vm_page_wire_count;
323unsigned int	vm_page_wire_count_initial;
324unsigned int	vm_page_gobble_count = 0;
325
326#define	VM_PAGE_WIRE_COUNT_WARNING	0
327#define VM_PAGE_GOBBLE_COUNT_WARNING	0
328
329unsigned int	vm_page_purgeable_count = 0; /* # of pages purgeable now */
330unsigned int	vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
331uint64_t	vm_page_purged_count = 0;    /* total count of purged pages */
332
333unsigned int	vm_page_xpmapped_external_count = 0;
334unsigned int	vm_page_external_count = 0;
335unsigned int	vm_page_internal_count = 0;
336unsigned int	vm_page_pageable_external_count = 0;
337unsigned int	vm_page_pageable_internal_count = 0;
338
339#if DEVELOPMENT || DEBUG
340unsigned int	vm_page_speculative_recreated = 0;
341unsigned int	vm_page_speculative_created = 0;
342unsigned int	vm_page_speculative_used = 0;
343#endif
344
345queue_head_t    vm_page_queue_cleaned;
346
347unsigned int	vm_page_cleaned_count = 0;
348unsigned int	vm_pageout_enqueued_cleaned = 0;
349
350uint64_t	max_valid_dma_address = 0xffffffffffffffffULL;
351ppnum_t		max_valid_low_ppnum = 0xffffffff;
352
353
354/*
355 *	Several page replacement parameters are also
356 *	shared with this module, so that page allocation
357 *	(done here in vm_page_alloc) can trigger the
358 *	pageout daemon.
359 */
360unsigned int	vm_page_free_target = 0;
361unsigned int	vm_page_free_min = 0;
362unsigned int	vm_page_throttle_limit = 0;
363uint32_t	vm_page_creation_throttle = 0;
364unsigned int	vm_page_inactive_target = 0;
365unsigned int	vm_page_anonymous_min = 0;
366unsigned int	vm_page_inactive_min = 0;
367unsigned int	vm_page_free_reserved = 0;
368unsigned int	vm_page_throttle_count = 0;
369
370
371/*
372 *	The VM system has a couple of heuristics for deciding
373 *	that pages are "uninteresting" and should be placed
374 *	on the inactive queue as likely candidates for replacement.
375 *	These variables let the heuristics be controlled at run-time
376 *	to make experimentation easier.
377 */
378
379boolean_t vm_page_deactivate_hint = TRUE;
380
381struct vm_page_stats_reusable vm_page_stats_reusable;
382
383/*
384 *	vm_set_page_size:
385 *
386 *	Sets the page size, perhaps based upon the memory
387 *	size.  Must be called before any use of page-size
388 *	dependent functions.
389 *
390 *	Sets page_shift and page_mask from page_size.
391 */
392void
393vm_set_page_size(void)
394{
395	page_size  = PAGE_SIZE;
396	page_mask  = PAGE_MASK;
397	page_shift = PAGE_SHIFT;
398
399	if ((page_mask & page_size) != 0)
400		panic("vm_set_page_size: page size not a power of two");
401
402	for (page_shift = 0; ; page_shift++)
403		if ((1U << page_shift) == page_size)
404			break;
405}
406
407#define COLOR_GROUPS_TO_STEAL	4
408
409
410/* Called once during statup, once the cache geometry is known.
411 */
412static void
413vm_page_set_colors( void )
414{
415	unsigned int	n, override;
416
417	if ( PE_parse_boot_argn("colors", &override, sizeof (override)) )		/* colors specified as a boot-arg? */
418		n = override;
419	else if ( vm_cache_geometry_colors )			/* do we know what the cache geometry is? */
420		n = vm_cache_geometry_colors;
421	else	n = DEFAULT_COLORS;				/* use default if all else fails */
422
423	if ( n == 0 )
424		n = 1;
425	if ( n > MAX_COLORS )
426		n = MAX_COLORS;
427
428	/* the count must be a power of 2  */
429	if ( ( n & (n - 1)) != 0  )
430		panic("vm_page_set_colors");
431
432	vm_colors = n;
433	vm_color_mask = n - 1;
434
435	vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL;
436}
437
438
439lck_grp_t		vm_page_lck_grp_free;
440lck_grp_t		vm_page_lck_grp_queue;
441lck_grp_t		vm_page_lck_grp_local;
442lck_grp_t		vm_page_lck_grp_purge;
443lck_grp_t		vm_page_lck_grp_alloc;
444lck_grp_t		vm_page_lck_grp_bucket;
445lck_grp_attr_t		vm_page_lck_grp_attr;
446lck_attr_t		vm_page_lck_attr;
447
448
449__private_extern__ void
450vm_page_init_lck_grp(void)
451{
452	/*
453	 * initialze the vm_page lock world
454	 */
455	lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
456	lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
457	lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
458	lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
459	lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
460	lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
461	lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
462	lck_attr_setdefault(&vm_page_lck_attr);
463	lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
464
465	vm_compressor_init_locks();
466}
467
468void
469vm_page_init_local_q()
470{
471	unsigned int		num_cpus;
472	unsigned int		i;
473	struct vplq     	*t_local_q;
474
475	num_cpus = ml_get_max_cpus();
476
477	/*
478	 * no point in this for a uni-processor system
479	 */
480	if (num_cpus >= 2) {
481		t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
482
483		for (i = 0; i < num_cpus; i++) {
484			struct vpl	*lq;
485
486			lq = &t_local_q[i].vpl_un.vpl;
487			VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
488			queue_init(&lq->vpl_queue);
489			lq->vpl_count = 0;
490			lq->vpl_internal_count = 0;
491			lq->vpl_external_count = 0;
492		}
493		vm_page_local_q_count = num_cpus;
494
495		vm_page_local_q = (struct vplq *)t_local_q;
496	}
497}
498
499
500/*
501 *	vm_page_bootstrap:
502 *
503 *	Initializes the resident memory module.
504 *
505 *	Allocates memory for the page cells, and
506 *	for the object/offset-to-page hash table headers.
507 *	Each page cell is initialized and placed on the free list.
508 *	Returns the range of available kernel virtual memory.
509 */
510
511void
512vm_page_bootstrap(
513	vm_offset_t		*startp,
514	vm_offset_t		*endp)
515{
516	register vm_page_t	m;
517	unsigned int		i;
518	unsigned int		log1;
519	unsigned int		log2;
520	unsigned int		size;
521
522	/*
523	 *	Initialize the vm_page template.
524	 */
525
526	m = &vm_page_template;
527	bzero(m, sizeof (*m));
528
529	m->pageq.next = NULL;
530	m->pageq.prev = NULL;
531	m->listq.next = NULL;
532	m->listq.prev = NULL;
533	m->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
534
535	m->object = VM_OBJECT_NULL;		/* reset later */
536	m->offset = (vm_object_offset_t) -1;	/* reset later */
537
538	m->wire_count = 0;
539	m->local = FALSE;
540	m->inactive = FALSE;
541	m->active = FALSE;
542	m->pageout_queue = FALSE;
543	m->speculative = FALSE;
544	m->laundry = FALSE;
545	m->free = FALSE;
546	m->reference = FALSE;
547	m->gobbled = FALSE;
548	m->private = FALSE;
549	m->throttled = FALSE;
550	m->__unused_pageq_bits = 0;
551
552	m->phys_page = 0;		/* reset later */
553
554	m->busy = TRUE;
555	m->wanted = FALSE;
556	m->tabled = FALSE;
557	m->hashed = FALSE;
558	m->fictitious = FALSE;
559	m->pmapped = FALSE;
560	m->wpmapped = FALSE;
561	m->pageout = FALSE;
562	m->absent = FALSE;
563	m->error = FALSE;
564	m->dirty = FALSE;
565	m->cleaning = FALSE;
566	m->precious = FALSE;
567	m->clustered = FALSE;
568	m->overwriting = FALSE;
569	m->restart = FALSE;
570	m->unusual = FALSE;
571	m->encrypted = FALSE;
572	m->encrypted_cleaning = FALSE;
573	m->cs_validated = FALSE;
574	m->cs_tainted = FALSE;
575	m->no_cache = FALSE;
576	m->reusable = FALSE;
577	m->slid = FALSE;
578	m->xpmapped = FALSE;
579	m->compressor = FALSE;
580	m->written_by_kernel = FALSE;
581	m->__unused_object_bits = 0;
582
583	/*
584	 *	Initialize the page queues.
585	 */
586	vm_page_init_lck_grp();
587
588	lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
589	lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
590	lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
591
592	for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
593		int group;
594
595		purgeable_queues[i].token_q_head = 0;
596		purgeable_queues[i].token_q_tail = 0;
597		for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
598		        queue_init(&purgeable_queues[i].objq[group]);
599
600		purgeable_queues[i].type = i;
601		purgeable_queues[i].new_pages = 0;
602#if MACH_ASSERT
603		purgeable_queues[i].debug_count_tokens = 0;
604		purgeable_queues[i].debug_count_objects = 0;
605#endif
606	};
607	purgeable_nonvolatile_count = 0;
608	queue_init(&purgeable_nonvolatile_queue);
609
610	for (i = 0; i < MAX_COLORS; i++ )
611		queue_init(&vm_page_queue_free[i]);
612
613	queue_init(&vm_lopage_queue_free);
614	queue_init(&vm_page_queue_active);
615	queue_init(&vm_page_queue_inactive);
616	queue_init(&vm_page_queue_cleaned);
617	queue_init(&vm_page_queue_throttled);
618	queue_init(&vm_page_queue_anonymous);
619
620	for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
621	        queue_init(&vm_page_queue_speculative[i].age_q);
622
623		vm_page_queue_speculative[i].age_ts.tv_sec = 0;
624		vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
625	}
626	vm_page_free_wanted = 0;
627	vm_page_free_wanted_privileged = 0;
628
629	vm_page_set_colors();
630
631
632	/*
633	 *	Steal memory for the map and zone subsystems.
634	 */
635	kernel_debug_string("zone_steal_memory");
636	zone_steal_memory();
637	kernel_debug_string("vm_map_steal_memory");
638	vm_map_steal_memory();
639
640	/*
641	 *	Allocate (and initialize) the virtual-to-physical
642	 *	table hash buckets.
643	 *
644	 *	The number of buckets should be a power of two to
645	 *	get a good hash function.  The following computation
646	 *	chooses the first power of two that is greater
647	 *	than the number of physical pages in the system.
648	 */
649
650	if (vm_page_bucket_count == 0) {
651		unsigned int npages = pmap_free_pages();
652
653		vm_page_bucket_count = 1;
654		while (vm_page_bucket_count < npages)
655			vm_page_bucket_count <<= 1;
656	}
657	vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
658
659	vm_page_hash_mask = vm_page_bucket_count - 1;
660
661	/*
662	 *	Calculate object shift value for hashing algorithm:
663	 *		O = log2(sizeof(struct vm_object))
664	 *		B = log2(vm_page_bucket_count)
665	 *	        hash shifts the object left by
666	 *		B/2 - O
667	 */
668	size = vm_page_bucket_count;
669	for (log1 = 0; size > 1; log1++)
670		size /= 2;
671	size = sizeof(struct vm_object);
672	for (log2 = 0; size > 1; log2++)
673		size /= 2;
674	vm_page_hash_shift = log1/2 - log2 + 1;
675
676	vm_page_bucket_hash = 1 << ((log1 + 1) >> 1);		/* Get (ceiling of sqrt of table size) */
677	vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2);		/* Get (ceiling of quadroot of table size) */
678	vm_page_bucket_hash |= 1;							/* Set bit and add 1 - always must be 1 to insure unique series */
679
680	if (vm_page_hash_mask & vm_page_bucket_count)
681		printf("vm_page_bootstrap: WARNING -- strange page hash\n");
682
683#if VM_PAGE_BUCKETS_CHECK
684#if VM_PAGE_FAKE_BUCKETS
685	/*
686	 * Allocate a decoy set of page buckets, to detect
687	 * any stomping there.
688	 */
689	vm_page_fake_buckets = (vm_page_bucket_t *)
690		pmap_steal_memory(vm_page_bucket_count *
691				  sizeof(vm_page_bucket_t));
692	vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
693	vm_page_fake_buckets_end =
694		vm_map_round_page((vm_page_fake_buckets_start +
695				   (vm_page_bucket_count *
696				    sizeof (vm_page_bucket_t))),
697				  PAGE_MASK);
698	char *cp;
699	for (cp = (char *)vm_page_fake_buckets_start;
700	     cp < (char *)vm_page_fake_buckets_end;
701	     cp++) {
702		*cp = 0x5a;
703	}
704#endif /* VM_PAGE_FAKE_BUCKETS */
705#endif /* VM_PAGE_BUCKETS_CHECK */
706
707	kernel_debug_string("vm_page_buckets");
708	vm_page_buckets = (vm_page_bucket_t *)
709		pmap_steal_memory(vm_page_bucket_count *
710				  sizeof(vm_page_bucket_t));
711
712	kernel_debug_string("vm_page_bucket_locks");
713	vm_page_bucket_locks = (lck_spin_t *)
714		pmap_steal_memory(vm_page_bucket_lock_count *
715				  sizeof(lck_spin_t));
716
717	for (i = 0; i < vm_page_bucket_count; i++) {
718		register vm_page_bucket_t *bucket = &vm_page_buckets[i];
719
720		bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
721#if     MACH_PAGE_HASH_STATS
722		bucket->cur_count = 0;
723		bucket->hi_count = 0;
724#endif /* MACH_PAGE_HASH_STATS */
725	}
726
727	for (i = 0; i < vm_page_bucket_lock_count; i++)
728	        lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
729
730#if VM_PAGE_BUCKETS_CHECK
731	vm_page_buckets_check_ready = TRUE;
732#endif /* VM_PAGE_BUCKETS_CHECK */
733
734	/*
735	 *	Machine-dependent code allocates the resident page table.
736	 *	It uses vm_page_init to initialize the page frames.
737	 *	The code also returns to us the virtual space available
738	 *	to the kernel.  We don't trust the pmap module
739	 *	to get the alignment right.
740	 */
741
742	kernel_debug_string("pmap_startup");
743	pmap_startup(&virtual_space_start, &virtual_space_end);
744	virtual_space_start = round_page(virtual_space_start);
745	virtual_space_end = trunc_page(virtual_space_end);
746
747	*startp = virtual_space_start;
748	*endp = virtual_space_end;
749
750	/*
751	 *	Compute the initial "wire" count.
752	 *	Up until now, the pages which have been set aside are not under
753	 *	the VM system's control, so although they aren't explicitly
754	 *	wired, they nonetheless can't be moved. At this moment,
755	 *	all VM managed pages are "free", courtesy of pmap_startup.
756	 */
757	assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
758	vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count;	/* initial value */
759	vm_page_wire_count_initial = vm_page_wire_count;
760
761	printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
762	       vm_page_free_count, vm_page_wire_count);
763
764	kernel_debug_string("vm_page_bootstrap complete");
765	simple_lock_init(&vm_paging_lock, 0);
766}
767
768#ifndef	MACHINE_PAGES
769/*
770 *	We implement pmap_steal_memory and pmap_startup with the help
771 *	of two simpler functions, pmap_virtual_space and pmap_next_page.
772 */
773
774void *
775pmap_steal_memory(
776	vm_size_t size)
777{
778	vm_offset_t addr, vaddr;
779	ppnum_t	phys_page;
780
781	/*
782	 *	We round the size to a round multiple.
783	 */
784
785	size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
786
787	/*
788	 *	If this is the first call to pmap_steal_memory,
789	 *	we have to initialize ourself.
790	 */
791
792	if (virtual_space_start == virtual_space_end) {
793		pmap_virtual_space(&virtual_space_start, &virtual_space_end);
794
795		/*
796		 *	The initial values must be aligned properly, and
797		 *	we don't trust the pmap module to do it right.
798		 */
799
800		virtual_space_start = round_page(virtual_space_start);
801		virtual_space_end = trunc_page(virtual_space_end);
802	}
803
804	/*
805	 *	Allocate virtual memory for this request.
806	 */
807
808	addr = virtual_space_start;
809	virtual_space_start += size;
810
811	//kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size);	/* (TEST/DEBUG) */
812
813	/*
814	 *	Allocate and map physical pages to back new virtual pages.
815	 */
816
817	for (vaddr = round_page(addr);
818	     vaddr < addr + size;
819	     vaddr += PAGE_SIZE) {
820
821		if (!pmap_next_page_hi(&phys_page))
822			panic("pmap_steal_memory");
823
824		/*
825		 *	XXX Logically, these mappings should be wired,
826		 *	but some pmap modules barf if they are.
827		 */
828#if defined(__LP64__)
829		pmap_pre_expand(kernel_pmap, vaddr);
830#endif
831
832		pmap_enter(kernel_pmap, vaddr, phys_page,
833			   VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
834				VM_WIMG_USE_DEFAULT, FALSE);
835		/*
836		 * Account for newly stolen memory
837		 */
838		vm_page_wire_count++;
839
840	}
841
842	return (void *) addr;
843}
844
845void vm_page_release_startup(vm_page_t mem);
846void
847pmap_startup(
848	vm_offset_t *startp,
849	vm_offset_t *endp)
850{
851	unsigned int i, npages, pages_initialized, fill, fillval;
852	ppnum_t		phys_page;
853	addr64_t	tmpaddr;
854
855
856#if    defined(__LP64__)
857	/*
858	 * struct vm_page must be of size 64 due to VM_PAGE_PACK_PTR use
859	 */
860	assert(sizeof(struct vm_page) == 64);
861
862	/*
863	 * make sure we are aligned on a 64 byte boundary
864	 * for VM_PAGE_PACK_PTR (it clips off the low-order
865	 * 6 bits of the pointer)
866	 */
867	if (virtual_space_start != virtual_space_end)
868		virtual_space_start = round_page(virtual_space_start);
869#endif
870
871	/*
872	 *	We calculate how many page frames we will have
873	 *	and then allocate the page structures in one chunk.
874	 */
875
876	tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE;	/* Get the amount of memory left */
877	tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start);	/* Account for any slop */
878	npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages)));	/* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
879
880	vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
881
882	/*
883	 *	Initialize the page frames.
884	 */
885	kernel_debug_string("Initialize the page frames");
886	for (i = 0, pages_initialized = 0; i < npages; i++) {
887		if (!pmap_next_page(&phys_page))
888			break;
889		if (pages_initialized == 0 || phys_page < vm_page_lowest)
890			vm_page_lowest = phys_page;
891
892		vm_page_init(&vm_pages[i], phys_page, FALSE);
893		vm_page_pages++;
894		pages_initialized++;
895	}
896	vm_pages_count = pages_initialized;
897
898#if    defined(__LP64__)
899
900	if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0])) != &vm_pages[0])
901		panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]);
902
903	if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count-1])) != &vm_pages[vm_pages_count-1])
904		panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count-1]);
905#endif
906	kernel_debug_string("page fill/release");
907	/*
908	 * Check if we want to initialize pages to a known value
909	 */
910	fill = 0;								/* Assume no fill */
911	if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1;			/* Set fill */
912#if	DEBUG
913	/* This slows down booting the DEBUG kernel, particularly on
914	 * large memory systems, but is worthwhile in deterministically
915	 * trapping uninitialized memory usage.
916	 */
917	if (fill == 0) {
918		fill = 1;
919		fillval = 0xDEB8F177;
920	}
921#endif
922	if (fill)
923		kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
924	// -debug code remove
925	if (2 == vm_himemory_mode) {
926		// free low -> high so high is preferred
927		for (i = 1; i <= pages_initialized; i++) {
928			if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);		/* Fill the page with a know value if requested at boot */
929			vm_page_release_startup(&vm_pages[i - 1]);
930		}
931	}
932	else
933	// debug code remove-
934
935	/*
936	 * Release pages in reverse order so that physical pages
937	 * initially get allocated in ascending addresses. This keeps
938	 * the devices (which must address physical memory) happy if
939	 * they require several consecutive pages.
940	 */
941	for (i = pages_initialized; i > 0; i--) {
942		if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);		/* Fill the page with a know value if requested at boot */
943		vm_page_release_startup(&vm_pages[i - 1]);
944	}
945
946	VM_CHECK_MEMORYSTATUS;
947
948#if 0
949	{
950		vm_page_t xx, xxo, xxl;
951		int i, j, k, l;
952
953		j = 0;													/* (BRINGUP) */
954		xxl = 0;
955
956		for( i = 0; i < vm_colors; i++ ) {
957			queue_iterate(&vm_page_queue_free[i],
958				      xx,
959				      vm_page_t,
960				      pageq) {	/* BRINGUP */
961				j++;												/* (BRINGUP) */
962				if(j > vm_page_free_count) {						/* (BRINGUP) */
963					panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
964				}
965
966				l = vm_page_free_count - j;							/* (BRINGUP) */
967				k = 0;												/* (BRINGUP) */
968
969				if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
970
971				for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) {	/* (BRINGUP) */
972					k++;
973					if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
974					if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) {	/* (BRINGUP) */
975						panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
976					}
977				}
978
979				xxl = xx;
980			}
981		}
982
983		if(j != vm_page_free_count) {						/* (BRINGUP) */
984			panic("pmap_startup: vm_page_free_count does not match, calc =  %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
985		}
986	}
987#endif
988
989
990	/*
991	 *	We have to re-align virtual_space_start,
992	 *	because pmap_steal_memory has been using it.
993	 */
994
995	virtual_space_start = round_page(virtual_space_start);
996
997	*startp = virtual_space_start;
998	*endp = virtual_space_end;
999}
1000#endif	/* MACHINE_PAGES */
1001
1002/*
1003 *	Routine:	vm_page_module_init
1004 *	Purpose:
1005 *		Second initialization pass, to be done after
1006 *		the basic VM system is ready.
1007 */
1008void
1009vm_page_module_init(void)
1010{
1011	vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
1012			     0, PAGE_SIZE, "vm pages");
1013
1014#if	ZONE_DEBUG
1015	zone_debug_disable(vm_page_zone);
1016#endif	/* ZONE_DEBUG */
1017
1018	zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
1019	zone_change(vm_page_zone, Z_EXPAND, FALSE);
1020	zone_change(vm_page_zone, Z_EXHAUST, TRUE);
1021	zone_change(vm_page_zone, Z_FOREIGN, TRUE);
1022	zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
1023        /*
1024         * Adjust zone statistics to account for the real pages allocated
1025         * in vm_page_create(). [Q: is this really what we want?]
1026         */
1027        vm_page_zone->count += vm_page_pages;
1028        vm_page_zone->sum_count += vm_page_pages;
1029        vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
1030}
1031
1032/*
1033 *	Routine:	vm_page_create
1034 *	Purpose:
1035 *		After the VM system is up, machine-dependent code
1036 *		may stumble across more physical memory.  For example,
1037 *		memory that it was reserving for a frame buffer.
1038 *		vm_page_create turns this memory into available pages.
1039 */
1040
1041void
1042vm_page_create(
1043	ppnum_t start,
1044	ppnum_t end)
1045{
1046	ppnum_t		phys_page;
1047	vm_page_t 	m;
1048
1049	for (phys_page = start;
1050	     phys_page < end;
1051	     phys_page++) {
1052		while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1053			== VM_PAGE_NULL)
1054			vm_page_more_fictitious();
1055
1056		m->fictitious = FALSE;
1057		pmap_clear_noencrypt(phys_page);
1058
1059		vm_page_pages++;
1060		vm_page_release(m);
1061	}
1062}
1063
1064/*
1065 *	vm_page_hash:
1066 *
1067 *	Distributes the object/offset key pair among hash buckets.
1068 *
1069 *	NOTE:	The bucket count must be a power of 2
1070 */
1071#define vm_page_hash(object, offset) (\
1072	( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1073	 & vm_page_hash_mask)
1074
1075
1076/*
1077 *	vm_page_insert:		[ internal use only ]
1078 *
1079 *	Inserts the given mem entry into the object/object-page
1080 *	table and object list.
1081 *
1082 *	The object must be locked.
1083 */
1084void
1085vm_page_insert(
1086	vm_page_t		mem,
1087	vm_object_t		object,
1088	vm_object_offset_t	offset)
1089{
1090	vm_page_insert_internal(mem, object, offset, FALSE, TRUE, FALSE);
1091}
1092
1093void
1094vm_page_insert_internal(
1095	vm_page_t		mem,
1096	vm_object_t		object,
1097	vm_object_offset_t	offset,
1098	boolean_t		queues_lock_held,
1099	boolean_t		insert_in_hash,
1100	boolean_t		batch_pmap_op)
1101{
1102	vm_page_bucket_t	*bucket;
1103	lck_spin_t		*bucket_lock;
1104	int			hash_id;
1105	task_t			owner;
1106
1107        XPR(XPR_VM_PAGE,
1108                "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1109                object, offset, mem, 0,0);
1110#if 0
1111	/*
1112	 * we may not hold the page queue lock
1113	 * so this check isn't safe to make
1114	 */
1115	VM_PAGE_CHECK(mem);
1116#endif
1117
1118	assert(page_aligned(offset));
1119
1120	/* the vm_submap_object is only a placeholder for submaps */
1121	assert(object != vm_submap_object);
1122
1123	vm_object_lock_assert_exclusive(object);
1124#if DEBUG
1125	lck_mtx_assert(&vm_page_queue_lock,
1126		       queues_lock_held ? LCK_MTX_ASSERT_OWNED
1127		       			: LCK_MTX_ASSERT_NOTOWNED);
1128#endif	/* DEBUG */
1129
1130	if (insert_in_hash == TRUE) {
1131#if DEBUG || VM_PAGE_CHECK_BUCKETS
1132		if (mem->tabled || mem->object != VM_OBJECT_NULL)
1133			panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1134			      "already in (obj=%p,off=0x%llx)",
1135			      mem, object, offset, mem->object, mem->offset);
1136#endif
1137		assert(!object->internal || offset < object->vo_size);
1138
1139		/* only insert "pageout" pages into "pageout" objects,
1140		 * and normal pages into normal objects */
1141		assert(object->pageout == mem->pageout);
1142
1143		assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1144
1145		/*
1146		 *	Record the object/offset pair in this page
1147		 */
1148
1149		mem->object = object;
1150		mem->offset = offset;
1151
1152		/*
1153		 *	Insert it into the object_object/offset hash table
1154		 */
1155		hash_id = vm_page_hash(object, offset);
1156		bucket = &vm_page_buckets[hash_id];
1157		bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1158
1159		lck_spin_lock(bucket_lock);
1160
1161		mem->next_m = bucket->page_list;
1162		bucket->page_list = VM_PAGE_PACK_PTR(mem);
1163		assert(mem == VM_PAGE_UNPACK_PTR(bucket->page_list));
1164
1165#if     MACH_PAGE_HASH_STATS
1166		if (++bucket->cur_count > bucket->hi_count)
1167			bucket->hi_count = bucket->cur_count;
1168#endif /* MACH_PAGE_HASH_STATS */
1169		mem->hashed = TRUE;
1170		lck_spin_unlock(bucket_lock);
1171	}
1172
1173	{
1174		unsigned int    cache_attr;
1175
1176		cache_attr = object->wimg_bits & VM_WIMG_MASK;
1177
1178		if (cache_attr != VM_WIMG_USE_DEFAULT) {
1179			PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1180		}
1181	}
1182	/*
1183	 *	Now link into the object's list of backed pages.
1184	 */
1185	VM_PAGE_INSERT(mem, object);
1186	mem->tabled = TRUE;
1187
1188	/*
1189	 *	Show that the object has one more resident page.
1190	 */
1191
1192	object->resident_page_count++;
1193	if (VM_PAGE_WIRED(mem)) {
1194		object->wired_page_count++;
1195	}
1196	assert(object->resident_page_count >= object->wired_page_count);
1197
1198	if (object->internal) {
1199		OSAddAtomic(1, &vm_page_internal_count);
1200	} else {
1201		OSAddAtomic(1, &vm_page_external_count);
1202	}
1203
1204	/*
1205	 * It wouldn't make sense to insert a "reusable" page in
1206	 * an object (the page would have been marked "reusable" only
1207	 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1208	 * in the object at that time).
1209	 * But a page could be inserted in a "all_reusable" object, if
1210	 * something faults it in (a vm_read() from another task or a
1211	 * "use-after-free" issue in user space, for example).  It can
1212	 * also happen if we're relocating a page from that object to
1213	 * a different physical page during a physically-contiguous
1214	 * allocation.
1215	 */
1216	assert(!mem->reusable);
1217	if (mem->object->all_reusable) {
1218		OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1219	}
1220
1221	if (object->purgable == VM_PURGABLE_DENY) {
1222		owner = TASK_NULL;
1223	} else {
1224		owner = object->vo_purgeable_owner;
1225	}
1226	if (owner &&
1227	    (object->purgable == VM_PURGABLE_NONVOLATILE ||
1228	     VM_PAGE_WIRED(mem))) {
1229		/* more non-volatile bytes */
1230		ledger_credit(owner->ledger,
1231			      task_ledgers.purgeable_nonvolatile,
1232			      PAGE_SIZE);
1233		/* more footprint */
1234		ledger_credit(owner->ledger,
1235			      task_ledgers.phys_footprint,
1236			      PAGE_SIZE);
1237
1238	} else if (owner &&
1239		   (object->purgable == VM_PURGABLE_VOLATILE ||
1240		    object->purgable == VM_PURGABLE_EMPTY)) {
1241		assert(! VM_PAGE_WIRED(mem));
1242		/* more volatile bytes */
1243		ledger_credit(owner->ledger,
1244			      task_ledgers.purgeable_volatile,
1245			      PAGE_SIZE);
1246	}
1247
1248	if (object->purgable == VM_PURGABLE_VOLATILE) {
1249		if (VM_PAGE_WIRED(mem)) {
1250			OSAddAtomic(+1, &vm_page_purgeable_wired_count);
1251		} else {
1252			OSAddAtomic(+1, &vm_page_purgeable_count);
1253		}
1254	} else if (object->purgable == VM_PURGABLE_EMPTY &&
1255		   mem->throttled) {
1256		/*
1257		 * This page belongs to a purged VM object but hasn't
1258		 * been purged (because it was "busy").
1259		 * It's in the "throttled" queue and hence not
1260		 * visible to vm_pageout_scan().  Move it to a pageable
1261		 * queue, so that it can eventually be reclaimed, instead
1262		 * of lingering in the "empty" object.
1263		 */
1264		if (queues_lock_held == FALSE)
1265			vm_page_lockspin_queues();
1266		vm_page_deactivate(mem);
1267		if (queues_lock_held == FALSE)
1268			vm_page_unlock_queues();
1269	}
1270
1271#if VM_OBJECT_TRACKING_OP_MODIFIED
1272	if (vm_object_tracking_inited &&
1273	    object->internal &&
1274	    object->resident_page_count == 0 &&
1275	    object->pager == NULL &&
1276	    object->shadow != NULL &&
1277	    object->shadow->copy == object) {
1278		void *bt[VM_OBJECT_TRACKING_BTDEPTH];
1279		int numsaved = 0;
1280
1281		numsaved =OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH);
1282		btlog_add_entry(vm_object_tracking_btlog,
1283				object,
1284				VM_OBJECT_TRACKING_OP_MODIFIED,
1285				bt,
1286				numsaved);
1287	}
1288#endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1289}
1290
1291/*
1292 *	vm_page_replace:
1293 *
1294 *	Exactly like vm_page_insert, except that we first
1295 *	remove any existing page at the given offset in object.
1296 *
1297 *	The object must be locked.
1298 */
1299void
1300vm_page_replace(
1301	register vm_page_t		mem,
1302	register vm_object_t		object,
1303	register vm_object_offset_t	offset)
1304{
1305	vm_page_bucket_t *bucket;
1306	vm_page_t	 found_m = VM_PAGE_NULL;
1307	lck_spin_t	*bucket_lock;
1308	int		hash_id;
1309
1310#if 0
1311	/*
1312	 * we don't hold the page queue lock
1313	 * so this check isn't safe to make
1314	 */
1315	VM_PAGE_CHECK(mem);
1316#endif
1317	vm_object_lock_assert_exclusive(object);
1318#if DEBUG || VM_PAGE_CHECK_BUCKETS
1319	if (mem->tabled || mem->object != VM_OBJECT_NULL)
1320		panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1321		      "already in (obj=%p,off=0x%llx)",
1322		      mem, object, offset, mem->object, mem->offset);
1323	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1324#endif
1325	/*
1326	 *	Record the object/offset pair in this page
1327	 */
1328
1329	mem->object = object;
1330	mem->offset = offset;
1331
1332	/*
1333	 *	Insert it into the object_object/offset hash table,
1334	 *	replacing any page that might have been there.
1335	 */
1336
1337	hash_id = vm_page_hash(object, offset);
1338	bucket = &vm_page_buckets[hash_id];
1339	bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1340
1341	lck_spin_lock(bucket_lock);
1342
1343	if (bucket->page_list) {
1344		vm_page_packed_t *mp = &bucket->page_list;
1345		vm_page_t m = VM_PAGE_UNPACK_PTR(*mp);
1346
1347		do {
1348			if (m->object == object && m->offset == offset) {
1349				/*
1350				 * Remove old page from hash list
1351				 */
1352				*mp = m->next_m;
1353				m->hashed = FALSE;
1354
1355				found_m = m;
1356				break;
1357			}
1358			mp = &m->next_m;
1359		} while ((m = VM_PAGE_UNPACK_PTR(*mp)));
1360
1361		mem->next_m = bucket->page_list;
1362	} else {
1363		mem->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
1364	}
1365	/*
1366	 * insert new page at head of hash list
1367	 */
1368	bucket->page_list = VM_PAGE_PACK_PTR(mem);
1369	mem->hashed = TRUE;
1370
1371	lck_spin_unlock(bucket_lock);
1372
1373	if (found_m) {
1374	        /*
1375		 * there was already a page at the specified
1376		 * offset for this object... remove it from
1377		 * the object and free it back to the free list
1378		 */
1379		vm_page_free_unlocked(found_m, FALSE);
1380	}
1381	vm_page_insert_internal(mem, object, offset, FALSE, FALSE, FALSE);
1382}
1383
1384/*
1385 *	vm_page_remove:		[ internal use only ]
1386 *
1387 *	Removes the given mem entry from the object/offset-page
1388 *	table and the object page list.
1389 *
1390 *	The object must be locked.
1391 */
1392
1393void
1394vm_page_remove(
1395	vm_page_t	mem,
1396	boolean_t	remove_from_hash)
1397{
1398	vm_page_bucket_t *bucket;
1399	vm_page_t	this;
1400	lck_spin_t	*bucket_lock;
1401	int		hash_id;
1402	task_t		owner;
1403
1404        XPR(XPR_VM_PAGE,
1405                "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1406                mem->object, mem->offset,
1407		mem, 0,0);
1408
1409	vm_object_lock_assert_exclusive(mem->object);
1410	assert(mem->tabled);
1411	assert(!mem->cleaning);
1412	assert(!mem->laundry);
1413#if 0
1414	/*
1415	 * we don't hold the page queue lock
1416	 * so this check isn't safe to make
1417	 */
1418	VM_PAGE_CHECK(mem);
1419#endif
1420	if (remove_from_hash == TRUE) {
1421		/*
1422		 *	Remove from the object_object/offset hash table
1423		 */
1424		hash_id = vm_page_hash(mem->object, mem->offset);
1425		bucket = &vm_page_buckets[hash_id];
1426		bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1427
1428		lck_spin_lock(bucket_lock);
1429
1430		if ((this = VM_PAGE_UNPACK_PTR(bucket->page_list)) == mem) {
1431			/* optimize for common case */
1432
1433			bucket->page_list = mem->next_m;
1434		} else {
1435			vm_page_packed_t	*prev;
1436
1437			for (prev = &this->next_m;
1438			     (this = VM_PAGE_UNPACK_PTR(*prev)) != mem;
1439			     prev = &this->next_m)
1440				continue;
1441			*prev = this->next_m;
1442		}
1443#if     MACH_PAGE_HASH_STATS
1444		bucket->cur_count--;
1445#endif /* MACH_PAGE_HASH_STATS */
1446		mem->hashed = FALSE;
1447		lck_spin_unlock(bucket_lock);
1448	}
1449	/*
1450	 *	Now remove from the object's list of backed pages.
1451	 */
1452
1453	VM_PAGE_REMOVE(mem);
1454
1455	/*
1456	 *	And show that the object has one fewer resident
1457	 *	page.
1458	 */
1459
1460	assert(mem->object->resident_page_count > 0);
1461	mem->object->resident_page_count--;
1462
1463	if (mem->object->internal) {
1464#if DEBUG
1465		assert(vm_page_internal_count);
1466#endif /* DEBUG */
1467
1468		OSAddAtomic(-1, &vm_page_internal_count);
1469	} else {
1470		assert(vm_page_external_count);
1471		OSAddAtomic(-1, &vm_page_external_count);
1472
1473		if (mem->xpmapped) {
1474			assert(vm_page_xpmapped_external_count);
1475			OSAddAtomic(-1, &vm_page_xpmapped_external_count);
1476		}
1477	}
1478	if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1479		if (mem->object->resident_page_count == 0)
1480			vm_object_cache_remove(mem->object);
1481	}
1482
1483	if (VM_PAGE_WIRED(mem)) {
1484		assert(mem->object->wired_page_count > 0);
1485		mem->object->wired_page_count--;
1486	}
1487	assert(mem->object->resident_page_count >=
1488	       mem->object->wired_page_count);
1489	if (mem->reusable) {
1490		assert(mem->object->reusable_page_count > 0);
1491		mem->object->reusable_page_count--;
1492		assert(mem->object->reusable_page_count <=
1493		       mem->object->resident_page_count);
1494		mem->reusable = FALSE;
1495		OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1496		vm_page_stats_reusable.reused_remove++;
1497	} else if (mem->object->all_reusable) {
1498		OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1499		vm_page_stats_reusable.reused_remove++;
1500	}
1501
1502	if (mem->object->purgable == VM_PURGABLE_DENY) {
1503		owner = TASK_NULL;
1504	} else {
1505		owner = mem->object->vo_purgeable_owner;
1506	}
1507	if (owner &&
1508	    (mem->object->purgable == VM_PURGABLE_NONVOLATILE ||
1509	     VM_PAGE_WIRED(mem))) {
1510		/* less non-volatile bytes */
1511		ledger_debit(owner->ledger,
1512			     task_ledgers.purgeable_nonvolatile,
1513			     PAGE_SIZE);
1514		/* less footprint */
1515		ledger_debit(owner->ledger,
1516			     task_ledgers.phys_footprint,
1517			     PAGE_SIZE);
1518	} else if (owner &&
1519		   (mem->object->purgable == VM_PURGABLE_VOLATILE ||
1520		    mem->object->purgable == VM_PURGABLE_EMPTY)) {
1521		assert(! VM_PAGE_WIRED(mem));
1522		/* less volatile bytes */
1523		ledger_debit(owner->ledger,
1524			     task_ledgers.purgeable_volatile,
1525			     PAGE_SIZE);
1526	}
1527	if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1528		if (VM_PAGE_WIRED(mem)) {
1529			assert(vm_page_purgeable_wired_count > 0);
1530			OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1531		} else {
1532			assert(vm_page_purgeable_count > 0);
1533			OSAddAtomic(-1, &vm_page_purgeable_count);
1534		}
1535	}
1536	if (mem->object->set_cache_attr == TRUE)
1537		pmap_set_cache_attributes(mem->phys_page, 0);
1538
1539	mem->tabled = FALSE;
1540	mem->object = VM_OBJECT_NULL;
1541	mem->offset = (vm_object_offset_t) -1;
1542}
1543
1544
1545/*
1546 *	vm_page_lookup:
1547 *
1548 *	Returns the page associated with the object/offset
1549 *	pair specified; if none is found, VM_PAGE_NULL is returned.
1550 *
1551 *	The object must be locked.  No side effects.
1552 */
1553
1554unsigned long vm_page_lookup_hint = 0;
1555unsigned long vm_page_lookup_hint_next = 0;
1556unsigned long vm_page_lookup_hint_prev = 0;
1557unsigned long vm_page_lookup_hint_miss = 0;
1558unsigned long vm_page_lookup_bucket_NULL = 0;
1559unsigned long vm_page_lookup_miss = 0;
1560
1561
1562vm_page_t
1563vm_page_lookup(
1564	vm_object_t		object,
1565	vm_object_offset_t	offset)
1566{
1567	vm_page_t	mem;
1568	vm_page_bucket_t *bucket;
1569	queue_entry_t	qe;
1570	lck_spin_t	*bucket_lock;
1571	int		hash_id;
1572
1573	vm_object_lock_assert_held(object);
1574	mem = object->memq_hint;
1575
1576	if (mem != VM_PAGE_NULL) {
1577		assert(mem->object == object);
1578
1579		if (mem->offset == offset) {
1580			vm_page_lookup_hint++;
1581			return mem;
1582		}
1583		qe = queue_next(&mem->listq);
1584
1585		if (! queue_end(&object->memq, qe)) {
1586			vm_page_t	next_page;
1587
1588			next_page = (vm_page_t) qe;
1589			assert(next_page->object == object);
1590
1591			if (next_page->offset == offset) {
1592				vm_page_lookup_hint_next++;
1593				object->memq_hint = next_page; /* new hint */
1594				return next_page;
1595			}
1596		}
1597		qe = queue_prev(&mem->listq);
1598
1599		if (! queue_end(&object->memq, qe)) {
1600			vm_page_t prev_page;
1601
1602			prev_page = (vm_page_t) qe;
1603			assert(prev_page->object == object);
1604
1605			if (prev_page->offset == offset) {
1606				vm_page_lookup_hint_prev++;
1607				object->memq_hint = prev_page; /* new hint */
1608				return prev_page;
1609			}
1610		}
1611	}
1612	/*
1613	 * Search the hash table for this object/offset pair
1614	 */
1615	hash_id = vm_page_hash(object, offset);
1616	bucket = &vm_page_buckets[hash_id];
1617
1618	/*
1619	 * since we hold the object lock, we are guaranteed that no
1620	 * new pages can be inserted into this object... this in turn
1621	 * guarantess that the page we're looking for can't exist
1622	 * if the bucket it hashes to is currently NULL even when looked
1623	 * at outside the scope of the hash bucket lock... this is a
1624	 * really cheap optimiztion to avoid taking the lock
1625	 */
1626	if (!bucket->page_list) {
1627	        vm_page_lookup_bucket_NULL++;
1628
1629	        return (VM_PAGE_NULL);
1630	}
1631	bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1632
1633	lck_spin_lock(bucket_lock);
1634
1635	for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = VM_PAGE_UNPACK_PTR(mem->next_m)) {
1636#if 0
1637		/*
1638		 * we don't hold the page queue lock
1639		 * so this check isn't safe to make
1640		 */
1641		VM_PAGE_CHECK(mem);
1642#endif
1643		if ((mem->object == object) && (mem->offset == offset))
1644			break;
1645	}
1646	lck_spin_unlock(bucket_lock);
1647
1648	if (mem != VM_PAGE_NULL) {
1649		if (object->memq_hint != VM_PAGE_NULL) {
1650			vm_page_lookup_hint_miss++;
1651		}
1652		assert(mem->object == object);
1653		object->memq_hint = mem;
1654	} else
1655	        vm_page_lookup_miss++;
1656
1657	return(mem);
1658}
1659
1660
1661/*
1662 *	vm_page_rename:
1663 *
1664 *	Move the given memory entry from its
1665 *	current object to the specified target object/offset.
1666 *
1667 *	The object must be locked.
1668 */
1669void
1670vm_page_rename(
1671	register vm_page_t		mem,
1672	register vm_object_t		new_object,
1673	vm_object_offset_t		new_offset,
1674	boolean_t			encrypted_ok)
1675{
1676	boolean_t	internal_to_external, external_to_internal;
1677
1678	assert(mem->object != new_object);
1679
1680	/*
1681	 * ENCRYPTED SWAP:
1682	 * The encryption key is based on the page's memory object
1683	 * (aka "pager") and paging offset.  Moving the page to
1684	 * another VM object changes its "pager" and "paging_offset"
1685	 * so it has to be decrypted first, or we would lose the key.
1686	 *
1687	 * One exception is VM object collapsing, where we transfer pages
1688	 * from one backing object to its parent object.  This operation also
1689	 * transfers the paging information, so the <pager,paging_offset> info
1690	 * should remain consistent.  The caller (vm_object_do_collapse())
1691	 * sets "encrypted_ok" in this case.
1692	 */
1693	if (!encrypted_ok && mem->encrypted) {
1694		panic("vm_page_rename: page %p is encrypted\n", mem);
1695	}
1696
1697        XPR(XPR_VM_PAGE,
1698                "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1699                new_object, new_offset,
1700		mem, 0,0);
1701
1702	/*
1703	 *	Changes to mem->object require the page lock because
1704	 *	the pageout daemon uses that lock to get the object.
1705	 */
1706	vm_page_lockspin_queues();
1707
1708	internal_to_external = FALSE;
1709	external_to_internal = FALSE;
1710
1711	if (mem->local) {
1712		/*
1713		 * it's much easier to get the vm_page_pageable_xxx accounting correct
1714		 * if we first move the page to the active queue... it's going to end
1715		 * up there anyway, and we don't do vm_page_rename's frequently enough
1716		 * for this to matter.
1717		 */
1718		VM_PAGE_QUEUES_REMOVE(mem);
1719		vm_page_activate(mem);
1720	}
1721	if (mem->active || mem->inactive || mem->speculative) {
1722		if (mem->object->internal && !new_object->internal) {
1723			internal_to_external = TRUE;
1724		}
1725		if (!mem->object->internal && new_object->internal) {
1726			external_to_internal = TRUE;
1727		}
1728	}
1729
1730    	vm_page_remove(mem, TRUE);
1731	vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE, FALSE);
1732
1733	if (internal_to_external) {
1734		vm_page_pageable_internal_count--;
1735		vm_page_pageable_external_count++;
1736	} else if (external_to_internal) {
1737		vm_page_pageable_external_count--;
1738		vm_page_pageable_internal_count++;
1739	}
1740
1741	vm_page_unlock_queues();
1742}
1743
1744/*
1745 *	vm_page_init:
1746 *
1747 *	Initialize the fields in a new page.
1748 *	This takes a structure with random values and initializes it
1749 *	so that it can be given to vm_page_release or vm_page_insert.
1750 */
1751void
1752vm_page_init(
1753	vm_page_t	mem,
1754	ppnum_t		phys_page,
1755	boolean_t	lopage)
1756{
1757	assert(phys_page);
1758
1759#if	DEBUG
1760	if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
1761		if (!(pmap_valid_page(phys_page))) {
1762			panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
1763		}
1764	}
1765#endif
1766	*mem = vm_page_template;
1767	mem->phys_page = phys_page;
1768#if 0
1769	/*
1770	 * we're leaving this turned off for now... currently pages
1771	 * come off the free list and are either immediately dirtied/referenced
1772	 * due to zero-fill or COW faults, or are used to read or write files...
1773	 * in the file I/O case, the UPL mechanism takes care of clearing
1774	 * the state of the HW ref/mod bits in a somewhat fragile way.
1775	 * Since we may change the way this works in the future (to toughen it up),
1776	 * I'm leaving this as a reminder of where these bits could get cleared
1777	 */
1778
1779	/*
1780	 * make sure both the h/w referenced and modified bits are
1781	 * clear at this point... we are especially dependent on
1782	 * not finding a 'stale' h/w modified in a number of spots
1783	 * once this page goes back into use
1784	 */
1785	pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1786#endif
1787	mem->lopage = lopage;
1788}
1789
1790/*
1791 *	vm_page_grab_fictitious:
1792 *
1793 *	Remove a fictitious page from the free list.
1794 *	Returns VM_PAGE_NULL if there are no free pages.
1795 */
1796int	c_vm_page_grab_fictitious = 0;
1797int	c_vm_page_grab_fictitious_failed = 0;
1798int	c_vm_page_release_fictitious = 0;
1799int	c_vm_page_more_fictitious = 0;
1800
1801vm_page_t
1802vm_page_grab_fictitious_common(
1803	ppnum_t phys_addr)
1804{
1805	vm_page_t	m;
1806
1807	if ((m = (vm_page_t)zget(vm_page_zone))) {
1808
1809		vm_page_init(m, phys_addr, FALSE);
1810		m->fictitious = TRUE;
1811
1812		c_vm_page_grab_fictitious++;
1813	} else
1814		c_vm_page_grab_fictitious_failed++;
1815
1816	return m;
1817}
1818
1819vm_page_t
1820vm_page_grab_fictitious(void)
1821{
1822	return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1823}
1824
1825vm_page_t
1826vm_page_grab_guard(void)
1827{
1828	return vm_page_grab_fictitious_common(vm_page_guard_addr);
1829}
1830
1831
1832/*
1833 *	vm_page_release_fictitious:
1834 *
1835 *	Release a fictitious page to the zone pool
1836 */
1837void
1838vm_page_release_fictitious(
1839	vm_page_t m)
1840{
1841	assert(!m->free);
1842	assert(m->fictitious);
1843	assert(m->phys_page == vm_page_fictitious_addr ||
1844	       m->phys_page == vm_page_guard_addr);
1845
1846	c_vm_page_release_fictitious++;
1847
1848	zfree(vm_page_zone, m);
1849}
1850
1851/*
1852 *	vm_page_more_fictitious:
1853 *
1854 *	Add more fictitious pages to the zone.
1855 *	Allowed to block. This routine is way intimate
1856 *	with the zones code, for several reasons:
1857 *	1. we need to carve some page structures out of physical
1858 *	   memory before zones work, so they _cannot_ come from
1859 *	   the zone_map.
1860 *	2. the zone needs to be collectable in order to prevent
1861 *	   growth without bound. These structures are used by
1862 *	   the device pager (by the hundreds and thousands), as
1863 *	   private pages for pageout, and as blocking pages for
1864 *	   pagein. Temporary bursts in demand should not result in
1865 *	   permanent allocation of a resource.
1866 *	3. To smooth allocation humps, we allocate single pages
1867 *	   with kernel_memory_allocate(), and cram them into the
1868 *	   zone.
1869 */
1870
1871void vm_page_more_fictitious(void)
1872{
1873	vm_offset_t	addr;
1874	kern_return_t	retval;
1875
1876	c_vm_page_more_fictitious++;
1877
1878	/*
1879	 * Allocate a single page from the zone_map. Do not wait if no physical
1880	 * pages are immediately available, and do not zero the space. We need
1881	 * our own blocking lock here to prevent having multiple,
1882	 * simultaneous requests from piling up on the zone_map lock. Exactly
1883	 * one (of our) threads should be potentially waiting on the map lock.
1884	 * If winner is not vm-privileged, then the page allocation will fail,
1885	 * and it will temporarily block here in the vm_page_wait().
1886	 */
1887	lck_mtx_lock(&vm_page_alloc_lock);
1888	/*
1889	 * If another thread allocated space, just bail out now.
1890	 */
1891	if (zone_free_count(vm_page_zone) > 5) {
1892		/*
1893		 * The number "5" is a small number that is larger than the
1894		 * number of fictitious pages that any single caller will
1895		 * attempt to allocate. Otherwise, a thread will attempt to
1896		 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1897		 * release all of the resources and locks already acquired,
1898		 * and then call this routine. This routine finds the pages
1899		 * that the caller released, so fails to allocate new space.
1900		 * The process repeats infinitely. The largest known number
1901		 * of fictitious pages required in this manner is 2. 5 is
1902		 * simply a somewhat larger number.
1903		 */
1904		lck_mtx_unlock(&vm_page_alloc_lock);
1905		return;
1906	}
1907
1908	retval = kernel_memory_allocate(zone_map,
1909					&addr, PAGE_SIZE, VM_PROT_ALL,
1910					KMA_KOBJECT|KMA_NOPAGEWAIT);
1911	if (retval != KERN_SUCCESS) {
1912		/*
1913		 * No page was available. Drop the
1914		 * lock to give another thread a chance at it, and
1915		 * wait for the pageout daemon to make progress.
1916		 */
1917		lck_mtx_unlock(&vm_page_alloc_lock);
1918		vm_page_wait(THREAD_UNINT);
1919		return;
1920	}
1921
1922	/* Increment zone page count. We account for all memory managed by the zone in z->page_count */
1923	OSAddAtomic64(1, &(vm_page_zone->page_count));
1924
1925	zcram(vm_page_zone, addr, PAGE_SIZE);
1926
1927	lck_mtx_unlock(&vm_page_alloc_lock);
1928}
1929
1930
1931/*
1932 *	vm_pool_low():
1933 *
1934 *	Return true if it is not likely that a non-vm_privileged thread
1935 *	can get memory without blocking.  Advisory only, since the
1936 *	situation may change under us.
1937 */
1938int
1939vm_pool_low(void)
1940{
1941	/* No locking, at worst we will fib. */
1942	return( vm_page_free_count <= vm_page_free_reserved );
1943}
1944
1945
1946
1947/*
1948 * this is an interface to support bring-up of drivers
1949 * on platforms with physical memory > 4G...
1950 */
1951int		vm_himemory_mode = 2;
1952
1953
1954/*
1955 * this interface exists to support hardware controllers
1956 * incapable of generating DMAs with more than 32 bits
1957 * of address on platforms with physical memory > 4G...
1958 */
1959unsigned int	vm_lopages_allocated_q = 0;
1960unsigned int	vm_lopages_allocated_cpm_success = 0;
1961unsigned int	vm_lopages_allocated_cpm_failed = 0;
1962queue_head_t	vm_lopage_queue_free;
1963
1964vm_page_t
1965vm_page_grablo(void)
1966{
1967	vm_page_t	mem;
1968
1969	if (vm_lopage_needed == FALSE)
1970	        return (vm_page_grab());
1971
1972	lck_mtx_lock_spin(&vm_page_queue_free_lock);
1973
1974        if ( !queue_empty(&vm_lopage_queue_free)) {
1975                queue_remove_first(&vm_lopage_queue_free,
1976                                   mem,
1977                                   vm_page_t,
1978                                   pageq);
1979		assert(vm_lopage_free_count);
1980
1981                vm_lopage_free_count--;
1982		vm_lopages_allocated_q++;
1983
1984		if (vm_lopage_free_count < vm_lopage_lowater)
1985			vm_lopage_refill = TRUE;
1986
1987		lck_mtx_unlock(&vm_page_queue_free_lock);
1988	} else {
1989		lck_mtx_unlock(&vm_page_queue_free_lock);
1990
1991		if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1992
1993			lck_mtx_lock_spin(&vm_page_queue_free_lock);
1994			vm_lopages_allocated_cpm_failed++;
1995			lck_mtx_unlock(&vm_page_queue_free_lock);
1996
1997			return (VM_PAGE_NULL);
1998		}
1999		mem->busy = TRUE;
2000
2001		vm_page_lockspin_queues();
2002
2003		mem->gobbled = FALSE;
2004		vm_page_gobble_count--;
2005		vm_page_wire_count--;
2006
2007		vm_lopages_allocated_cpm_success++;
2008		vm_page_unlock_queues();
2009	}
2010	assert(mem->busy);
2011	assert(!mem->free);
2012	assert(!mem->pmapped);
2013	assert(!mem->wpmapped);
2014	assert(!pmap_is_noencrypt(mem->phys_page));
2015
2016	mem->pageq.next = NULL;
2017	mem->pageq.prev = NULL;
2018
2019	return (mem);
2020}
2021
2022
2023/*
2024 *	vm_page_grab:
2025 *
2026 *	first try to grab a page from the per-cpu free list...
2027 *	this must be done while pre-emption is disabled... if
2028 * 	a page is available, we're done...
2029 *	if no page is available, grab the vm_page_queue_free_lock
2030 *	and see if current number of free pages would allow us
2031 * 	to grab at least 1... if not, return VM_PAGE_NULL as before...
2032 *	if there are pages available, disable preemption and
2033 * 	recheck the state of the per-cpu free list... we could
2034 *	have been preempted and moved to a different cpu, or
2035 * 	some other thread could have re-filled it... if still
2036 *	empty, figure out how many pages we can steal from the
2037 *	global free queue and move to the per-cpu queue...
2038 *	return 1 of these pages when done... only wakeup the
2039 * 	pageout_scan thread if we moved pages from the global
2040 *	list... no need for the wakeup if we've satisfied the
2041 *	request from the per-cpu queue.
2042 */
2043
2044
2045vm_page_t
2046vm_page_grab( void )
2047{
2048	vm_page_t	mem;
2049
2050
2051	disable_preemption();
2052
2053	if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2054return_page_from_cpu_list:
2055	        PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2056	        PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
2057
2058	        enable_preemption();
2059		mem->pageq.next = NULL;
2060
2061		assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2062		assert(mem->tabled == FALSE);
2063		assert(mem->object == VM_OBJECT_NULL);
2064		assert(!mem->laundry);
2065		assert(!mem->free);
2066		assert(pmap_verify_free(mem->phys_page));
2067		assert(mem->busy);
2068		assert(!mem->encrypted);
2069		assert(!mem->pmapped);
2070		assert(!mem->wpmapped);
2071		assert(!mem->active);
2072		assert(!mem->inactive);
2073		assert(!mem->throttled);
2074		assert(!mem->speculative);
2075		assert(!pmap_is_noencrypt(mem->phys_page));
2076
2077		return mem;
2078	}
2079	enable_preemption();
2080
2081
2082	/*
2083	 *	Optionally produce warnings if the wire or gobble
2084	 *	counts exceed some threshold.
2085	 */
2086#if VM_PAGE_WIRE_COUNT_WARNING
2087	if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) {
2088		printf("mk: vm_page_grab(): high wired page count of %d\n",
2089			vm_page_wire_count);
2090	}
2091#endif
2092#if VM_PAGE_GOBBLE_COUNT_WARNING
2093	if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) {
2094		printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2095			vm_page_gobble_count);
2096	}
2097#endif
2098	lck_mtx_lock_spin(&vm_page_queue_free_lock);
2099
2100	/*
2101	 *	Only let privileged threads (involved in pageout)
2102	 *	dip into the reserved pool.
2103	 */
2104	if ((vm_page_free_count < vm_page_free_reserved) &&
2105	    !(current_thread()->options & TH_OPT_VMPRIV)) {
2106		lck_mtx_unlock(&vm_page_queue_free_lock);
2107		mem = VM_PAGE_NULL;
2108	}
2109	else {
2110	       vm_page_t	head;
2111	       vm_page_t	tail;
2112	       unsigned int	pages_to_steal;
2113	       unsigned int	color;
2114
2115	       while ( vm_page_free_count == 0 ) {
2116
2117			lck_mtx_unlock(&vm_page_queue_free_lock);
2118			/*
2119			 * must be a privileged thread to be
2120			 * in this state since a non-privileged
2121			 * thread would have bailed if we were
2122			 * under the vm_page_free_reserved mark
2123			 */
2124			VM_PAGE_WAIT();
2125			lck_mtx_lock_spin(&vm_page_queue_free_lock);
2126		}
2127
2128		disable_preemption();
2129
2130		if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2131			lck_mtx_unlock(&vm_page_queue_free_lock);
2132
2133		        /*
2134			 * we got preempted and moved to another processor
2135			 * or we got preempted and someone else ran and filled the cache
2136			 */
2137			goto return_page_from_cpu_list;
2138		}
2139		if (vm_page_free_count <= vm_page_free_reserved)
2140		        pages_to_steal = 1;
2141		else {
2142			if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved))
2143				pages_to_steal = vm_free_magazine_refill_limit;
2144			else
2145			        pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
2146		}
2147		color = PROCESSOR_DATA(current_processor(), start_color);
2148		head = tail = NULL;
2149
2150		vm_page_free_count -= pages_to_steal;
2151
2152		while (pages_to_steal--) {
2153
2154			while (queue_empty(&vm_page_queue_free[color]))
2155			        color = (color + 1) & vm_color_mask;
2156
2157			queue_remove_first(&vm_page_queue_free[color],
2158					   mem,
2159					   vm_page_t,
2160					   pageq);
2161			mem->pageq.next = NULL;
2162			mem->pageq.prev = NULL;
2163
2164			assert(!mem->active);
2165			assert(!mem->inactive);
2166			assert(!mem->throttled);
2167			assert(!mem->speculative);
2168
2169			color = (color + 1) & vm_color_mask;
2170
2171			if (head == NULL)
2172				head = mem;
2173			else
2174			        tail->pageq.next = (queue_t)mem;
2175		        tail = mem;
2176
2177			assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2178			assert(mem->tabled == FALSE);
2179			assert(mem->object == VM_OBJECT_NULL);
2180			assert(!mem->laundry);
2181			assert(mem->free);
2182			mem->free = FALSE;
2183
2184			assert(pmap_verify_free(mem->phys_page));
2185			assert(mem->busy);
2186			assert(!mem->free);
2187			assert(!mem->encrypted);
2188			assert(!mem->pmapped);
2189			assert(!mem->wpmapped);
2190			assert(!pmap_is_noencrypt(mem->phys_page));
2191		}
2192		lck_mtx_unlock(&vm_page_queue_free_lock);
2193
2194		PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
2195		PROCESSOR_DATA(current_processor(), start_color) = color;
2196
2197		/*
2198		 * satisfy this request
2199		 */
2200	        PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2201		mem = head;
2202		mem->pageq.next = NULL;
2203
2204		enable_preemption();
2205	}
2206	/*
2207	 *	Decide if we should poke the pageout daemon.
2208	 *	We do this if the free count is less than the low
2209	 *	water mark, or if the free count is less than the high
2210	 *	water mark (but above the low water mark) and the inactive
2211	 *	count is less than its target.
2212	 *
2213	 *	We don't have the counts locked ... if they change a little,
2214	 *	it doesn't really matter.
2215	 */
2216	if ((vm_page_free_count < vm_page_free_min) ||
2217	     ((vm_page_free_count < vm_page_free_target) &&
2218	      ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
2219	         thread_wakeup((event_t) &vm_page_free_wanted);
2220
2221	VM_CHECK_MEMORYSTATUS;
2222
2223//	dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4);	/* (TEST/DEBUG) */
2224
2225	return mem;
2226}
2227
2228/*
2229 *	vm_page_release:
2230 *
2231 *	Return a page to the free list.
2232 */
2233
2234void
2235vm_page_release(
2236	register vm_page_t	mem)
2237{
2238	unsigned int	color;
2239	int	need_wakeup = 0;
2240	int	need_priv_wakeup = 0;
2241
2242
2243	assert(!mem->private && !mem->fictitious);
2244	if (vm_page_free_verify) {
2245		assert(pmap_verify_free(mem->phys_page));
2246	}
2247//	dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5);	/* (TEST/DEBUG) */
2248
2249	pmap_clear_noencrypt(mem->phys_page);
2250
2251	lck_mtx_lock_spin(&vm_page_queue_free_lock);
2252#if DEBUG
2253	if (mem->free)
2254		panic("vm_page_release");
2255#endif
2256
2257	assert(mem->busy);
2258	assert(!mem->laundry);
2259	assert(mem->object == VM_OBJECT_NULL);
2260	assert(mem->pageq.next == NULL &&
2261	       mem->pageq.prev == NULL);
2262	assert(mem->listq.next == NULL &&
2263	       mem->listq.prev == NULL);
2264
2265	if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2266	    vm_lopage_free_count < vm_lopage_free_limit &&
2267	    mem->phys_page < max_valid_low_ppnum) {
2268	        /*
2269		 * this exists to support hardware controllers
2270		 * incapable of generating DMAs with more than 32 bits
2271		 * of address on platforms with physical memory > 4G...
2272		 */
2273		queue_enter_first(&vm_lopage_queue_free,
2274				  mem,
2275				  vm_page_t,
2276				  pageq);
2277		vm_lopage_free_count++;
2278
2279		if (vm_lopage_free_count >= vm_lopage_free_limit)
2280			vm_lopage_refill = FALSE;
2281
2282		mem->lopage = TRUE;
2283	} else {
2284		mem->lopage = FALSE;
2285		mem->free = TRUE;
2286
2287	        color = mem->phys_page & vm_color_mask;
2288		queue_enter_first(&vm_page_queue_free[color],
2289				  mem,
2290				  vm_page_t,
2291				  pageq);
2292		vm_page_free_count++;
2293		/*
2294		 *	Check if we should wake up someone waiting for page.
2295		 *	But don't bother waking them unless they can allocate.
2296		 *
2297		 *	We wakeup only one thread, to prevent starvation.
2298		 *	Because the scheduling system handles wait queues FIFO,
2299		 *	if we wakeup all waiting threads, one greedy thread
2300		 *	can starve multiple niceguy threads.  When the threads
2301		 *	all wakeup, the greedy threads runs first, grabs the page,
2302		 *	and waits for another page.  It will be the first to run
2303		 *	when the next page is freed.
2304		 *
2305		 *	However, there is a slight danger here.
2306		 *	The thread we wake might not use the free page.
2307		 *	Then the other threads could wait indefinitely
2308		 *	while the page goes unused.  To forestall this,
2309		 *	the pageout daemon will keep making free pages
2310		 *	as long as vm_page_free_wanted is non-zero.
2311		 */
2312
2313		assert(vm_page_free_count > 0);
2314		if (vm_page_free_wanted_privileged > 0) {
2315		        vm_page_free_wanted_privileged--;
2316			need_priv_wakeup = 1;
2317		} else if (vm_page_free_wanted > 0 &&
2318			   vm_page_free_count > vm_page_free_reserved) {
2319		        vm_page_free_wanted--;
2320			need_wakeup = 1;
2321		}
2322	}
2323	lck_mtx_unlock(&vm_page_queue_free_lock);
2324
2325	if (need_priv_wakeup)
2326		thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2327	else if (need_wakeup)
2328		thread_wakeup_one((event_t) &vm_page_free_count);
2329
2330	VM_CHECK_MEMORYSTATUS;
2331}
2332
2333/*
2334 * This version of vm_page_release() is used only at startup
2335 * when we are single-threaded and pages are being released
2336 * for the first time. Hence, no locking or unnecessary checks are made.
2337 * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
2338 */
2339void
2340vm_page_release_startup(
2341	register vm_page_t	mem)
2342{
2343	queue_t	queue_free;
2344
2345	if (vm_lopage_free_count < vm_lopage_free_limit &&
2346	    mem->phys_page < max_valid_low_ppnum) {
2347		mem->lopage = TRUE;
2348		vm_lopage_free_count++;
2349		queue_free = &vm_lopage_queue_free;
2350	} else {
2351		mem->lopage = FALSE;
2352		mem->free = TRUE;
2353		vm_page_free_count++;
2354		queue_free = &vm_page_queue_free[mem->phys_page & vm_color_mask];
2355	}
2356	queue_enter_first(queue_free, mem, vm_page_t, pageq);
2357}
2358
2359/*
2360 *	vm_page_wait:
2361 *
2362 *	Wait for a page to become available.
2363 *	If there are plenty of free pages, then we don't sleep.
2364 *
2365 *	Returns:
2366 *		TRUE:  There may be another page, try again
2367 *		FALSE: We were interrupted out of our wait, don't try again
2368 */
2369
2370boolean_t
2371vm_page_wait(
2372	int	interruptible )
2373{
2374	/*
2375	 *	We can't use vm_page_free_reserved to make this
2376	 *	determination.  Consider: some thread might
2377	 *	need to allocate two pages.  The first allocation
2378	 *	succeeds, the second fails.  After the first page is freed,
2379	 *	a call to vm_page_wait must really block.
2380	 */
2381	kern_return_t	wait_result;
2382	int          	need_wakeup = 0;
2383	int		is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2384
2385	lck_mtx_lock_spin(&vm_page_queue_free_lock);
2386
2387	if (is_privileged && vm_page_free_count) {
2388		lck_mtx_unlock(&vm_page_queue_free_lock);
2389		return TRUE;
2390	}
2391	if (vm_page_free_count < vm_page_free_target) {
2392
2393	        if (is_privileged) {
2394		        if (vm_page_free_wanted_privileged++ == 0)
2395			        need_wakeup = 1;
2396			wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2397		} else {
2398		        if (vm_page_free_wanted++ == 0)
2399			        need_wakeup = 1;
2400			wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2401		}
2402		lck_mtx_unlock(&vm_page_queue_free_lock);
2403		counter(c_vm_page_wait_block++);
2404
2405		if (need_wakeup)
2406			thread_wakeup((event_t)&vm_page_free_wanted);
2407
2408		if (wait_result == THREAD_WAITING) {
2409			VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
2410				       vm_page_free_wanted_privileged, vm_page_free_wanted, 0, 0);
2411			wait_result = thread_block(THREAD_CONTINUE_NULL);
2412			VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
2413		}
2414
2415		return(wait_result == THREAD_AWAKENED);
2416	} else {
2417		lck_mtx_unlock(&vm_page_queue_free_lock);
2418		return TRUE;
2419	}
2420}
2421
2422/*
2423 *	vm_page_alloc:
2424 *
2425 *	Allocate and return a memory cell associated
2426 *	with this VM object/offset pair.
2427 *
2428 *	Object must be locked.
2429 */
2430
2431vm_page_t
2432vm_page_alloc(
2433	vm_object_t		object,
2434	vm_object_offset_t	offset)
2435{
2436	register vm_page_t	mem;
2437
2438	vm_object_lock_assert_exclusive(object);
2439	mem = vm_page_grab();
2440	if (mem == VM_PAGE_NULL)
2441		return VM_PAGE_NULL;
2442
2443	vm_page_insert(mem, object, offset);
2444
2445	return(mem);
2446}
2447
2448vm_page_t
2449vm_page_alloclo(
2450	vm_object_t		object,
2451	vm_object_offset_t	offset)
2452{
2453	register vm_page_t	mem;
2454
2455	vm_object_lock_assert_exclusive(object);
2456	mem = vm_page_grablo();
2457	if (mem == VM_PAGE_NULL)
2458		return VM_PAGE_NULL;
2459
2460	vm_page_insert(mem, object, offset);
2461
2462	return(mem);
2463}
2464
2465
2466/*
2467 *	vm_page_alloc_guard:
2468 *
2469 * 	Allocate a fictitious page which will be used
2470 *	as a guard page.  The page will be inserted into
2471 *	the object and returned to the caller.
2472 */
2473
2474vm_page_t
2475vm_page_alloc_guard(
2476	vm_object_t		object,
2477	vm_object_offset_t	offset)
2478{
2479	register vm_page_t	mem;
2480
2481	vm_object_lock_assert_exclusive(object);
2482	mem = vm_page_grab_guard();
2483	if (mem == VM_PAGE_NULL)
2484		return VM_PAGE_NULL;
2485
2486	vm_page_insert(mem, object, offset);
2487
2488	return(mem);
2489}
2490
2491
2492counter(unsigned int c_laundry_pages_freed = 0;)
2493
2494/*
2495 *	vm_page_free_prepare:
2496 *
2497 *	Removes page from any queue it may be on
2498 *	and disassociates it from its VM object.
2499 *
2500 *	Object and page queues must be locked prior to entry.
2501 */
2502static void
2503vm_page_free_prepare(
2504	vm_page_t	mem)
2505{
2506	vm_page_free_prepare_queues(mem);
2507	vm_page_free_prepare_object(mem, TRUE);
2508}
2509
2510
2511void
2512vm_page_free_prepare_queues(
2513	vm_page_t	mem)
2514{
2515	VM_PAGE_CHECK(mem);
2516	assert(!mem->free);
2517	assert(!mem->cleaning);
2518
2519#if MACH_ASSERT || DEBUG
2520	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2521	if (mem->free)
2522		panic("vm_page_free: freeing page on free list\n");
2523#endif /* MACH_ASSERT || DEBUG */
2524	if (mem->object) {
2525		vm_object_lock_assert_exclusive(mem->object);
2526	}
2527	if (mem->laundry) {
2528		/*
2529		 * We may have to free a page while it's being laundered
2530		 * if we lost its pager (due to a forced unmount, for example).
2531		 * We need to call vm_pageout_steal_laundry() before removing
2532		 * the page from its VM object, so that we can remove it
2533		 * from its pageout queue and adjust the laundry accounting
2534		 */
2535		vm_pageout_steal_laundry(mem, TRUE);
2536		counter(++c_laundry_pages_freed);
2537	}
2538
2539	VM_PAGE_QUEUES_REMOVE(mem);	/* clears local/active/inactive/throttled/speculative */
2540
2541	if (VM_PAGE_WIRED(mem)) {
2542		if (mem->object) {
2543			assert(mem->object->wired_page_count > 0);
2544			mem->object->wired_page_count--;
2545			assert(mem->object->resident_page_count >=
2546			       mem->object->wired_page_count);
2547
2548			if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2549				OSAddAtomic(+1, &vm_page_purgeable_count);
2550				assert(vm_page_purgeable_wired_count > 0);
2551				OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2552			}
2553			if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2554			     mem->object->purgable == VM_PURGABLE_EMPTY) &&
2555			    mem->object->vo_purgeable_owner != TASK_NULL) {
2556				task_t owner;
2557
2558				owner = mem->object->vo_purgeable_owner;
2559				/*
2560				 * While wired, this page was accounted
2561				 * as "non-volatile" but it should now
2562				 * be accounted as "volatile".
2563				 */
2564				/* one less "non-volatile"... */
2565				ledger_debit(owner->ledger,
2566					     task_ledgers.purgeable_nonvolatile,
2567					     PAGE_SIZE);
2568				/* ... and "phys_footprint" */
2569				ledger_debit(owner->ledger,
2570					     task_ledgers.phys_footprint,
2571					     PAGE_SIZE);
2572				/* one more "volatile" */
2573				ledger_credit(owner->ledger,
2574					      task_ledgers.purgeable_volatile,
2575					      PAGE_SIZE);
2576			}
2577		}
2578		if (!mem->private && !mem->fictitious)
2579			vm_page_wire_count--;
2580		mem->wire_count = 0;
2581		assert(!mem->gobbled);
2582	} else if (mem->gobbled) {
2583		if (!mem->private && !mem->fictitious)
2584			vm_page_wire_count--;
2585		vm_page_gobble_count--;
2586	}
2587}
2588
2589
2590void
2591vm_page_free_prepare_object(
2592	vm_page_t	mem,
2593	boolean_t	remove_from_hash)
2594{
2595	if (mem->tabled)
2596		vm_page_remove(mem, remove_from_hash);	/* clears tabled, object, offset */
2597
2598	PAGE_WAKEUP(mem);		/* clears wanted */
2599
2600	if (mem->private) {
2601		mem->private = FALSE;
2602		mem->fictitious = TRUE;
2603		mem->phys_page = vm_page_fictitious_addr;
2604	}
2605	if ( !mem->fictitious) {
2606		vm_page_init(mem, mem->phys_page, mem->lopage);
2607	}
2608}
2609
2610
2611/*
2612 *	vm_page_free:
2613 *
2614 *	Returns the given page to the free list,
2615 *	disassociating it with any VM object.
2616 *
2617 *	Object and page queues must be locked prior to entry.
2618 */
2619void
2620vm_page_free(
2621	vm_page_t	mem)
2622{
2623	vm_page_free_prepare(mem);
2624
2625	if (mem->fictitious) {
2626		vm_page_release_fictitious(mem);
2627	} else {
2628		vm_page_release(mem);
2629	}
2630}
2631
2632
2633void
2634vm_page_free_unlocked(
2635	vm_page_t	mem,
2636	boolean_t	remove_from_hash)
2637{
2638	vm_page_lockspin_queues();
2639	vm_page_free_prepare_queues(mem);
2640	vm_page_unlock_queues();
2641
2642	vm_page_free_prepare_object(mem, remove_from_hash);
2643
2644	if (mem->fictitious) {
2645		vm_page_release_fictitious(mem);
2646	} else {
2647		vm_page_release(mem);
2648	}
2649}
2650
2651
2652/*
2653 * Free a list of pages.  The list can be up to several hundred pages,
2654 * as blocked up by vm_pageout_scan().
2655 * The big win is not having to take the free list lock once
2656 * per page.
2657 */
2658void
2659vm_page_free_list(
2660	vm_page_t	freeq,
2661	boolean_t	prepare_object)
2662{
2663        vm_page_t	mem;
2664        vm_page_t	nxt;
2665	vm_page_t	local_freeq;
2666	int		pg_count;
2667
2668	while (freeq) {
2669
2670		pg_count = 0;
2671		local_freeq = VM_PAGE_NULL;
2672		mem = freeq;
2673
2674		/*
2675		 * break up the processing into smaller chunks so
2676		 * that we can 'pipeline' the pages onto the
2677		 * free list w/o introducing too much
2678		 * contention on the global free queue lock
2679		 */
2680		while (mem && pg_count < 64) {
2681
2682			assert(!mem->inactive);
2683			assert(!mem->active);
2684			assert(!mem->throttled);
2685			assert(!mem->free);
2686			assert(!mem->speculative);
2687			assert(!VM_PAGE_WIRED(mem));
2688			assert(mem->pageq.prev == NULL);
2689
2690			nxt = (vm_page_t)(mem->pageq.next);
2691
2692			if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2693				assert(pmap_verify_free(mem->phys_page));
2694			}
2695			if (prepare_object == TRUE)
2696				vm_page_free_prepare_object(mem, TRUE);
2697
2698			if (!mem->fictitious) {
2699				assert(mem->busy);
2700
2701				if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2702				    vm_lopage_free_count < vm_lopage_free_limit &&
2703				    mem->phys_page < max_valid_low_ppnum) {
2704					mem->pageq.next = NULL;
2705					vm_page_release(mem);
2706				} else {
2707					/*
2708					 * IMPORTANT: we can't set the page "free" here
2709					 * because that would make the page eligible for
2710					 * a physically-contiguous allocation (see
2711					 * vm_page_find_contiguous()) right away (we don't
2712					 * hold the vm_page_queue_free lock).  That would
2713					 * cause trouble because the page is not actually
2714					 * in the free queue yet...
2715					 */
2716					mem->pageq.next = (queue_entry_t)local_freeq;
2717					local_freeq = mem;
2718					pg_count++;
2719
2720					pmap_clear_noencrypt(mem->phys_page);
2721				}
2722			} else {
2723				assert(mem->phys_page == vm_page_fictitious_addr ||
2724				       mem->phys_page == vm_page_guard_addr);
2725				vm_page_release_fictitious(mem);
2726			}
2727			mem = nxt;
2728		}
2729		freeq = mem;
2730
2731		if ( (mem = local_freeq) ) {
2732			unsigned int	avail_free_count;
2733			unsigned int	need_wakeup = 0;
2734			unsigned int	need_priv_wakeup = 0;
2735
2736			lck_mtx_lock_spin(&vm_page_queue_free_lock);
2737
2738			while (mem) {
2739				int	color;
2740
2741				nxt = (vm_page_t)(mem->pageq.next);
2742
2743				assert(!mem->free);
2744				assert(mem->busy);
2745				mem->free = TRUE;
2746
2747				color = mem->phys_page & vm_color_mask;
2748				queue_enter_first(&vm_page_queue_free[color],
2749						  mem,
2750						  vm_page_t,
2751						  pageq);
2752				mem = nxt;
2753			}
2754			vm_page_free_count += pg_count;
2755			avail_free_count = vm_page_free_count;
2756
2757			if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
2758
2759				if (avail_free_count < vm_page_free_wanted_privileged) {
2760					need_priv_wakeup = avail_free_count;
2761					vm_page_free_wanted_privileged -= avail_free_count;
2762					avail_free_count = 0;
2763				} else {
2764					need_priv_wakeup = vm_page_free_wanted_privileged;
2765					vm_page_free_wanted_privileged = 0;
2766					avail_free_count -= vm_page_free_wanted_privileged;
2767				}
2768			}
2769			if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
2770				unsigned int  available_pages;
2771
2772				available_pages = avail_free_count - vm_page_free_reserved;
2773
2774				if (available_pages >= vm_page_free_wanted) {
2775					need_wakeup = vm_page_free_wanted;
2776					vm_page_free_wanted = 0;
2777				} else {
2778					need_wakeup = available_pages;
2779					vm_page_free_wanted -= available_pages;
2780				}
2781			}
2782			lck_mtx_unlock(&vm_page_queue_free_lock);
2783
2784			if (need_priv_wakeup != 0) {
2785				/*
2786				 * There shouldn't be that many VM-privileged threads,
2787				 * so let's wake them all up, even if we don't quite
2788				 * have enough pages to satisfy them all.
2789				 */
2790				thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2791			}
2792			if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2793				/*
2794				 * We don't expect to have any more waiters
2795				 * after this, so let's wake them all up at
2796				 * once.
2797				 */
2798				thread_wakeup((event_t) &vm_page_free_count);
2799			} else for (; need_wakeup != 0; need_wakeup--) {
2800				/*
2801				 * Wake up one waiter per page we just released.
2802				 */
2803				thread_wakeup_one((event_t) &vm_page_free_count);
2804			}
2805
2806			VM_CHECK_MEMORYSTATUS;
2807		}
2808	}
2809}
2810
2811
2812/*
2813 *	vm_page_wire:
2814 *
2815 *	Mark this page as wired down by yet
2816 *	another map, removing it from paging queues
2817 *	as necessary.
2818 *
2819 *	The page's object and the page queues must be locked.
2820 */
2821void
2822vm_page_wire(
2823	register vm_page_t	mem)
2824{
2825
2826//	dbgLog(current_thread(), mem->offset, mem->object, 1);	/* (TEST/DEBUG) */
2827
2828	VM_PAGE_CHECK(mem);
2829	if (mem->object) {
2830		vm_object_lock_assert_exclusive(mem->object);
2831	} else {
2832		/*
2833		 * In theory, the page should be in an object before it
2834		 * gets wired, since we need to hold the object lock
2835		 * to update some fields in the page structure.
2836		 * However, some code (i386 pmap, for example) might want
2837		 * to wire a page before it gets inserted into an object.
2838		 * That's somewhat OK, as long as nobody else can get to
2839		 * that page and update it at the same time.
2840		 */
2841	}
2842#if DEBUG
2843	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2844#endif
2845	if ( !VM_PAGE_WIRED(mem)) {
2846
2847		if (mem->pageout_queue) {
2848			mem->pageout = FALSE;
2849			vm_pageout_throttle_up(mem);
2850		}
2851		VM_PAGE_QUEUES_REMOVE(mem);
2852
2853		if (mem->object) {
2854			mem->object->wired_page_count++;
2855			assert(mem->object->resident_page_count >=
2856			       mem->object->wired_page_count);
2857			if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2858				assert(vm_page_purgeable_count > 0);
2859				OSAddAtomic(-1, &vm_page_purgeable_count);
2860				OSAddAtomic(1, &vm_page_purgeable_wired_count);
2861			}
2862			if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2863			     mem->object->purgable == VM_PURGABLE_EMPTY) &&
2864			    mem->object->vo_purgeable_owner != TASK_NULL) {
2865				task_t owner;
2866
2867				owner = mem->object->vo_purgeable_owner;
2868				/* less volatile bytes */
2869				ledger_debit(owner->ledger,
2870					     task_ledgers.purgeable_volatile,
2871					     PAGE_SIZE);
2872				/* more not-quite-volatile bytes */
2873				ledger_credit(owner->ledger,
2874					      task_ledgers.purgeable_nonvolatile,
2875					      PAGE_SIZE);
2876				/* more footprint */
2877				ledger_credit(owner->ledger,
2878					      task_ledgers.phys_footprint,
2879					      PAGE_SIZE);
2880			}
2881			if (mem->object->all_reusable) {
2882				/*
2883				 * Wired pages are not counted as "re-usable"
2884				 * in "all_reusable" VM objects, so nothing
2885				 * to do here.
2886				 */
2887			} else if (mem->reusable) {
2888				/*
2889				 * This page is not "re-usable" when it's
2890				 * wired, so adjust its state and the
2891				 * accounting.
2892				 */
2893				vm_object_reuse_pages(mem->object,
2894						      mem->offset,
2895						      mem->offset+PAGE_SIZE_64,
2896						      FALSE);
2897			}
2898		}
2899		assert(!mem->reusable);
2900
2901		if (!mem->private && !mem->fictitious && !mem->gobbled)
2902			vm_page_wire_count++;
2903		if (mem->gobbled)
2904			vm_page_gobble_count--;
2905		mem->gobbled = FALSE;
2906
2907		VM_CHECK_MEMORYSTATUS;
2908
2909		/*
2910		 * ENCRYPTED SWAP:
2911		 * The page could be encrypted, but
2912		 * We don't have to decrypt it here
2913		 * because we don't guarantee that the
2914		 * data is actually valid at this point.
2915		 * The page will get decrypted in
2916		 * vm_fault_wire() if needed.
2917		 */
2918	}
2919	assert(!mem->gobbled);
2920	mem->wire_count++;
2921	VM_PAGE_CHECK(mem);
2922}
2923
2924/*
2925 *      vm_page_gobble:
2926 *
2927 *      Mark this page as consumed by the vm/ipc/xmm subsystems.
2928 *
2929 *      Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2930 */
2931void
2932vm_page_gobble(
2933        register vm_page_t      mem)
2934{
2935        vm_page_lockspin_queues();
2936        VM_PAGE_CHECK(mem);
2937
2938	assert(!mem->gobbled);
2939	assert( !VM_PAGE_WIRED(mem));
2940
2941        if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
2942                if (!mem->private && !mem->fictitious)
2943                        vm_page_wire_count++;
2944        }
2945	vm_page_gobble_count++;
2946        mem->gobbled = TRUE;
2947        vm_page_unlock_queues();
2948}
2949
2950/*
2951 *	vm_page_unwire:
2952 *
2953 *	Release one wiring of this page, potentially
2954 *	enabling it to be paged again.
2955 *
2956 *	The page's object and the page queues must be locked.
2957 */
2958void
2959vm_page_unwire(
2960	vm_page_t	mem,
2961	boolean_t	queueit)
2962{
2963
2964//	dbgLog(current_thread(), mem->offset, mem->object, 0);	/* (TEST/DEBUG) */
2965
2966	VM_PAGE_CHECK(mem);
2967	assert(VM_PAGE_WIRED(mem));
2968	assert(mem->object != VM_OBJECT_NULL);
2969#if DEBUG
2970	vm_object_lock_assert_exclusive(mem->object);
2971	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2972#endif
2973	if (--mem->wire_count == 0) {
2974		assert(!mem->private && !mem->fictitious);
2975		vm_page_wire_count--;
2976		assert(mem->object->wired_page_count > 0);
2977		mem->object->wired_page_count--;
2978		assert(mem->object->resident_page_count >=
2979		       mem->object->wired_page_count);
2980		if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2981			OSAddAtomic(+1, &vm_page_purgeable_count);
2982			assert(vm_page_purgeable_wired_count > 0);
2983			OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2984		}
2985		if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2986		     mem->object->purgable == VM_PURGABLE_EMPTY) &&
2987		    mem->object->vo_purgeable_owner != TASK_NULL) {
2988			task_t owner;
2989
2990			owner = mem->object->vo_purgeable_owner;
2991			/* more volatile bytes */
2992			ledger_credit(owner->ledger,
2993				      task_ledgers.purgeable_volatile,
2994				      PAGE_SIZE);
2995			/* less not-quite-volatile bytes */
2996			ledger_debit(owner->ledger,
2997				     task_ledgers.purgeable_nonvolatile,
2998				     PAGE_SIZE);
2999			/* less footprint */
3000			ledger_debit(owner->ledger,
3001				     task_ledgers.phys_footprint,
3002				     PAGE_SIZE);
3003		}
3004		assert(mem->object != kernel_object);
3005		assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
3006
3007		if (queueit == TRUE) {
3008			if (mem->object->purgable == VM_PURGABLE_EMPTY) {
3009				vm_page_deactivate(mem);
3010			} else {
3011				vm_page_activate(mem);
3012			}
3013		}
3014
3015		VM_CHECK_MEMORYSTATUS;
3016
3017	}
3018	VM_PAGE_CHECK(mem);
3019}
3020
3021/*
3022 *	vm_page_deactivate:
3023 *
3024 *	Returns the given page to the inactive list,
3025 *	indicating that no physical maps have access
3026 *	to this page.  [Used by the physical mapping system.]
3027 *
3028 *	The page queues must be locked.
3029 */
3030void
3031vm_page_deactivate(
3032	vm_page_t	m)
3033{
3034	vm_page_deactivate_internal(m, TRUE);
3035}
3036
3037
3038void
3039vm_page_deactivate_internal(
3040	vm_page_t	m,
3041	boolean_t	clear_hw_reference)
3042{
3043
3044	VM_PAGE_CHECK(m);
3045	assert(m->object != kernel_object);
3046	assert(m->phys_page != vm_page_guard_addr);
3047
3048//	dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6);	/* (TEST/DEBUG) */
3049#if DEBUG
3050	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3051#endif
3052	/*
3053	 *	This page is no longer very interesting.  If it was
3054	 *	interesting (active or inactive/referenced), then we
3055	 *	clear the reference bit and (re)enter it in the
3056	 *	inactive queue.  Note wired pages should not have
3057	 *	their reference bit cleared.
3058	 */
3059	assert ( !(m->absent && !m->unusual));
3060
3061	if (m->gobbled) {		/* can this happen? */
3062		assert( !VM_PAGE_WIRED(m));
3063
3064		if (!m->private && !m->fictitious)
3065			vm_page_wire_count--;
3066		vm_page_gobble_count--;
3067		m->gobbled = FALSE;
3068	}
3069	/*
3070	 * if this page is currently on the pageout queue, we can't do the
3071	 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3072	 * and we can't remove it manually since we would need the object lock
3073	 * (which is not required here) to decrement the activity_in_progress
3074	 * reference which is held on the object while the page is in the pageout queue...
3075	 * just let the normal laundry processing proceed
3076	 */
3077	if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor || (VM_PAGE_WIRED(m)))
3078		return;
3079
3080	if (!m->absent && clear_hw_reference == TRUE)
3081		pmap_clear_reference(m->phys_page);
3082
3083	m->reference = FALSE;
3084	m->no_cache = FALSE;
3085
3086	if (!m->inactive) {
3087		VM_PAGE_QUEUES_REMOVE(m);
3088
3089		if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
3090		    m->dirty && m->object->internal &&
3091		    (m->object->purgable == VM_PURGABLE_DENY ||
3092		     m->object->purgable == VM_PURGABLE_NONVOLATILE ||
3093		     m->object->purgable == VM_PURGABLE_VOLATILE)) {
3094			queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3095			m->throttled = TRUE;
3096			vm_page_throttled_count++;
3097		} else {
3098			if (m->object->named && m->object->ref_count == 1) {
3099			        vm_page_speculate(m, FALSE);
3100#if DEVELOPMENT || DEBUG
3101				vm_page_speculative_recreated++;
3102#endif
3103			} else {
3104				VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
3105			}
3106		}
3107	}
3108}
3109
3110/*
3111 * vm_page_enqueue_cleaned
3112 *
3113 * Put the page on the cleaned queue, mark it cleaned, etc.
3114 * Being on the cleaned queue (and having m->clean_queue set)
3115 * does ** NOT ** guarantee that the page is clean!
3116 *
3117 * Call with the queues lock held.
3118 */
3119
3120void vm_page_enqueue_cleaned(vm_page_t m)
3121{
3122	assert(m->phys_page != vm_page_guard_addr);
3123#if DEBUG
3124	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3125#endif
3126	assert( !(m->absent && !m->unusual));
3127
3128	if (m->gobbled) {
3129		assert( !VM_PAGE_WIRED(m));
3130		if (!m->private && !m->fictitious)
3131			vm_page_wire_count--;
3132		vm_page_gobble_count--;
3133		m->gobbled = FALSE;
3134	}
3135	/*
3136	 * if this page is currently on the pageout queue, we can't do the
3137	 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3138	 * and we can't remove it manually since we would need the object lock
3139	 * (which is not required here) to decrement the activity_in_progress
3140	 * reference which is held on the object while the page is in the pageout queue...
3141	 * just let the normal laundry processing proceed
3142	 */
3143	if (m->laundry || m->clean_queue || m->pageout_queue || m->private || m->fictitious)
3144		return;
3145
3146	VM_PAGE_QUEUES_REMOVE(m);
3147
3148	queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
3149	m->clean_queue = TRUE;
3150	vm_page_cleaned_count++;
3151
3152	m->inactive = TRUE;
3153	vm_page_inactive_count++;
3154	if (m->object->internal) {
3155		vm_page_pageable_internal_count++;
3156	} else {
3157		vm_page_pageable_external_count++;
3158	}
3159
3160	vm_pageout_enqueued_cleaned++;
3161}
3162
3163/*
3164 *	vm_page_activate:
3165 *
3166 *	Put the specified page on the active list (if appropriate).
3167 *
3168 *	The page queues must be locked.
3169 */
3170
3171void
3172vm_page_activate(
3173	register vm_page_t	m)
3174{
3175	VM_PAGE_CHECK(m);
3176#ifdef	FIXME_4778297
3177	assert(m->object != kernel_object);
3178#endif
3179	assert(m->phys_page != vm_page_guard_addr);
3180#if DEBUG
3181	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3182#endif
3183	assert( !(m->absent && !m->unusual));
3184
3185	if (m->gobbled) {
3186		assert( !VM_PAGE_WIRED(m));
3187		if (!m->private && !m->fictitious)
3188			vm_page_wire_count--;
3189		vm_page_gobble_count--;
3190		m->gobbled = FALSE;
3191	}
3192	/*
3193	 * if this page is currently on the pageout queue, we can't do the
3194	 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3195	 * and we can't remove it manually since we would need the object lock
3196	 * (which is not required here) to decrement the activity_in_progress
3197	 * reference which is held on the object while the page is in the pageout queue...
3198	 * just let the normal laundry processing proceed
3199	 */
3200	if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor)
3201		return;
3202
3203#if DEBUG
3204	if (m->active)
3205	        panic("vm_page_activate: already active");
3206#endif
3207
3208	if (m->speculative) {
3209		DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
3210		DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
3211	}
3212
3213	VM_PAGE_QUEUES_REMOVE(m);
3214
3215	if ( !VM_PAGE_WIRED(m)) {
3216
3217		if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
3218		    m->dirty && m->object->internal &&
3219		    (m->object->purgable == VM_PURGABLE_DENY ||
3220		     m->object->purgable == VM_PURGABLE_NONVOLATILE ||
3221		     m->object->purgable == VM_PURGABLE_VOLATILE)) {
3222			queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3223			m->throttled = TRUE;
3224			vm_page_throttled_count++;
3225		} else {
3226			queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
3227			m->active = TRUE;
3228			vm_page_active_count++;
3229			if (m->object->internal) {
3230				vm_page_pageable_internal_count++;
3231			} else {
3232				vm_page_pageable_external_count++;
3233			}
3234		}
3235		m->reference = TRUE;
3236		m->no_cache = FALSE;
3237	}
3238	VM_PAGE_CHECK(m);
3239}
3240
3241
3242/*
3243 *      vm_page_speculate:
3244 *
3245 *      Put the specified page on the speculative list (if appropriate).
3246 *
3247 *      The page queues must be locked.
3248 */
3249void
3250vm_page_speculate(
3251	vm_page_t	m,
3252	boolean_t	new)
3253{
3254        struct vm_speculative_age_q	*aq;
3255
3256	VM_PAGE_CHECK(m);
3257	assert(m->object != kernel_object);
3258	assert(m->phys_page != vm_page_guard_addr);
3259#if DEBUG
3260	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3261#endif
3262	assert( !(m->absent && !m->unusual));
3263
3264	/*
3265	 * if this page is currently on the pageout queue, we can't do the
3266	 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3267	 * and we can't remove it manually since we would need the object lock
3268	 * (which is not required here) to decrement the activity_in_progress
3269	 * reference which is held on the object while the page is in the pageout queue...
3270	 * just let the normal laundry processing proceed
3271	 */
3272	if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor)
3273		return;
3274
3275	VM_PAGE_QUEUES_REMOVE(m);
3276
3277	if ( !VM_PAGE_WIRED(m)) {
3278	        mach_timespec_t		ts;
3279		clock_sec_t sec;
3280		clock_nsec_t nsec;
3281
3282	        clock_get_system_nanotime(&sec, &nsec);
3283		ts.tv_sec = (unsigned int) sec;
3284		ts.tv_nsec = nsec;
3285
3286		if (vm_page_speculative_count == 0) {
3287
3288			speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3289			speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3290
3291			aq = &vm_page_queue_speculative[speculative_age_index];
3292
3293		        /*
3294			 * set the timer to begin a new group
3295			 */
3296			aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3297			aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3298
3299			ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3300		} else {
3301			aq = &vm_page_queue_speculative[speculative_age_index];
3302
3303			if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
3304
3305			        speculative_age_index++;
3306
3307				if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3308				        speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3309				if (speculative_age_index == speculative_steal_index) {
3310				        speculative_steal_index = speculative_age_index + 1;
3311
3312					if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3313					        speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3314				}
3315				aq = &vm_page_queue_speculative[speculative_age_index];
3316
3317				if (!queue_empty(&aq->age_q))
3318				        vm_page_speculate_ageit(aq);
3319
3320				aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3321				aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3322
3323				ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3324			}
3325		}
3326		enqueue_tail(&aq->age_q, &m->pageq);
3327		m->speculative = TRUE;
3328		vm_page_speculative_count++;
3329		if (m->object->internal) {
3330			vm_page_pageable_internal_count++;
3331		} else {
3332			vm_page_pageable_external_count++;
3333		}
3334
3335		if (new == TRUE) {
3336			vm_object_lock_assert_exclusive(m->object);
3337
3338		        m->object->pages_created++;
3339#if DEVELOPMENT || DEBUG
3340			vm_page_speculative_created++;
3341#endif
3342		}
3343	}
3344	VM_PAGE_CHECK(m);
3345}
3346
3347
3348/*
3349 * move pages from the specified aging bin to
3350 * the speculative bin that pageout_scan claims from
3351 *
3352 *      The page queues must be locked.
3353 */
3354void
3355vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
3356{
3357        struct vm_speculative_age_q	*sq;
3358	vm_page_t	t;
3359
3360	sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3361
3362	if (queue_empty(&sq->age_q)) {
3363	        sq->age_q.next = aq->age_q.next;
3364		sq->age_q.prev = aq->age_q.prev;
3365
3366		t = (vm_page_t)sq->age_q.next;
3367		t->pageq.prev = &sq->age_q;
3368
3369		t = (vm_page_t)sq->age_q.prev;
3370		t->pageq.next = &sq->age_q;
3371	} else {
3372	        t = (vm_page_t)sq->age_q.prev;
3373		t->pageq.next = aq->age_q.next;
3374
3375		t = (vm_page_t)aq->age_q.next;
3376		t->pageq.prev = sq->age_q.prev;
3377
3378		t = (vm_page_t)aq->age_q.prev;
3379		t->pageq.next = &sq->age_q;
3380
3381		sq->age_q.prev = aq->age_q.prev;
3382	}
3383	queue_init(&aq->age_q);
3384}
3385
3386
3387void
3388vm_page_lru(
3389	vm_page_t	m)
3390{
3391	VM_PAGE_CHECK(m);
3392	assert(m->object != kernel_object);
3393	assert(m->phys_page != vm_page_guard_addr);
3394
3395#if DEBUG
3396	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3397#endif
3398	/*
3399	 * if this page is currently on the pageout queue, we can't do the
3400	 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3401	 * and we can't remove it manually since we would need the object lock
3402	 * (which is not required here) to decrement the activity_in_progress
3403	 * reference which is held on the object while the page is in the pageout queue...
3404	 * just let the normal laundry processing proceed
3405	 */
3406	if (m->laundry || m->pageout_queue || m->private || m->compressor || (VM_PAGE_WIRED(m)))
3407		return;
3408
3409	m->no_cache = FALSE;
3410
3411	VM_PAGE_QUEUES_REMOVE(m);
3412
3413	VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
3414}
3415
3416
3417void
3418vm_page_reactivate_all_throttled(void)
3419{
3420	vm_page_t	first_throttled, last_throttled;
3421	vm_page_t	first_active;
3422	vm_page_t	m;
3423	int		extra_active_count;
3424	int		extra_internal_count, extra_external_count;
3425
3426	if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
3427		return;
3428
3429	extra_active_count = 0;
3430	extra_internal_count = 0;
3431	extra_external_count = 0;
3432	vm_page_lock_queues();
3433	if (! queue_empty(&vm_page_queue_throttled)) {
3434		/*
3435		 * Switch "throttled" pages to "active".
3436		 */
3437		queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3438			VM_PAGE_CHECK(m);
3439			assert(m->throttled);
3440			assert(!m->active);
3441			assert(!m->inactive);
3442			assert(!m->speculative);
3443			assert(!VM_PAGE_WIRED(m));
3444
3445			extra_active_count++;
3446			if (m->object->internal) {
3447				extra_internal_count++;
3448			} else {
3449				extra_external_count++;
3450			}
3451
3452			m->throttled = FALSE;
3453			m->active = TRUE;
3454			VM_PAGE_CHECK(m);
3455		}
3456
3457		/*
3458		 * Transfer the entire throttled queue to a regular LRU page queues.
3459		 * We insert it at the head of the active queue, so that these pages
3460		 * get re-evaluated by the LRU algorithm first, since they've been
3461		 * completely out of it until now.
3462		 */
3463		first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3464		last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3465		first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3466		if (queue_empty(&vm_page_queue_active)) {
3467			queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3468		} else {
3469			queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3470		}
3471		queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3472		queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3473		queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3474
3475#if DEBUG
3476		printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3477#endif
3478		queue_init(&vm_page_queue_throttled);
3479		/*
3480		 * Adjust the global page counts.
3481		 */
3482		vm_page_active_count += extra_active_count;
3483		vm_page_pageable_internal_count += extra_internal_count;
3484		vm_page_pageable_external_count += extra_external_count;
3485		vm_page_throttled_count = 0;
3486	}
3487	assert(vm_page_throttled_count == 0);
3488	assert(queue_empty(&vm_page_queue_throttled));
3489	vm_page_unlock_queues();
3490}
3491
3492
3493/*
3494 * move pages from the indicated local queue to the global active queue
3495 * its ok to fail if we're below the hard limit and force == FALSE
3496 * the nolocks == TRUE case is to allow this function to be run on
3497 * the hibernate path
3498 */
3499
3500void
3501vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3502{
3503	struct vpl	*lq;
3504	vm_page_t	first_local, last_local;
3505	vm_page_t	first_active;
3506	vm_page_t	m;
3507	uint32_t	count = 0;
3508
3509	if (vm_page_local_q == NULL)
3510		return;
3511
3512	lq = &vm_page_local_q[lid].vpl_un.vpl;
3513
3514	if (nolocks == FALSE) {
3515		if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3516			if ( !vm_page_trylockspin_queues())
3517				return;
3518		} else
3519			vm_page_lockspin_queues();
3520
3521		VPL_LOCK(&lq->vpl_lock);
3522	}
3523	if (lq->vpl_count) {
3524		/*
3525		 * Switch "local" pages to "active".
3526		 */
3527		assert(!queue_empty(&lq->vpl_queue));
3528
3529		queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3530			VM_PAGE_CHECK(m);
3531			assert(m->local);
3532			assert(!m->active);
3533			assert(!m->inactive);
3534			assert(!m->speculative);
3535			assert(!VM_PAGE_WIRED(m));
3536			assert(!m->throttled);
3537			assert(!m->fictitious);
3538
3539			if (m->local_id != lid)
3540				panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3541
3542			m->local_id = 0;
3543			m->local = FALSE;
3544			m->active = TRUE;
3545			VM_PAGE_CHECK(m);
3546
3547			count++;
3548		}
3549		if (count != lq->vpl_count)
3550			panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3551
3552		/*
3553		 * Transfer the entire local queue to a regular LRU page queues.
3554		 */
3555		first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3556		last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3557		first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3558
3559		if (queue_empty(&vm_page_queue_active)) {
3560			queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3561		} else {
3562			queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3563		}
3564		queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3565		queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3566		queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3567
3568		queue_init(&lq->vpl_queue);
3569		/*
3570		 * Adjust the global page counts.
3571		 */
3572		vm_page_active_count += lq->vpl_count;
3573		vm_page_pageable_internal_count += lq->vpl_internal_count;
3574		vm_page_pageable_external_count += lq->vpl_external_count;
3575		lq->vpl_count = 0;
3576		lq->vpl_internal_count = 0;
3577		lq->vpl_external_count = 0;
3578	}
3579	assert(queue_empty(&lq->vpl_queue));
3580
3581	if (nolocks == FALSE) {
3582		VPL_UNLOCK(&lq->vpl_lock);
3583		vm_page_unlock_queues();
3584	}
3585}
3586
3587/*
3588 *	vm_page_part_zero_fill:
3589 *
3590 *	Zero-fill a part of the page.
3591 */
3592#define PMAP_ZERO_PART_PAGE_IMPLEMENTED
3593void
3594vm_page_part_zero_fill(
3595	vm_page_t	m,
3596	vm_offset_t	m_pa,
3597	vm_size_t	len)
3598{
3599
3600#if 0
3601	/*
3602	 * we don't hold the page queue lock
3603	 * so this check isn't safe to make
3604	 */
3605	VM_PAGE_CHECK(m);
3606#endif
3607
3608#ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3609	pmap_zero_part_page(m->phys_page, m_pa, len);
3610#else
3611	vm_page_t	tmp;
3612	while (1) {
3613       		tmp = vm_page_grab();
3614		if (tmp == VM_PAGE_NULL) {
3615			vm_page_wait(THREAD_UNINT);
3616			continue;
3617		}
3618		break;
3619	}
3620	vm_page_zero_fill(tmp);
3621	if(m_pa != 0) {
3622		vm_page_part_copy(m, 0, tmp, 0, m_pa);
3623	}
3624	if((m_pa + len) <  PAGE_SIZE) {
3625		vm_page_part_copy(m, m_pa + len, tmp,
3626				m_pa + len, PAGE_SIZE - (m_pa + len));
3627	}
3628	vm_page_copy(tmp,m);
3629	VM_PAGE_FREE(tmp);
3630#endif
3631
3632}
3633
3634/*
3635 *	vm_page_zero_fill:
3636 *
3637 *	Zero-fill the specified page.
3638 */
3639void
3640vm_page_zero_fill(
3641	vm_page_t	m)
3642{
3643        XPR(XPR_VM_PAGE,
3644                "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3645                m->object, m->offset, m, 0,0);
3646#if 0
3647	/*
3648	 * we don't hold the page queue lock
3649	 * so this check isn't safe to make
3650	 */
3651	VM_PAGE_CHECK(m);
3652#endif
3653
3654//	dbgTrace(0xAEAEAEAE, m->phys_page, 0);		/* (BRINGUP) */
3655	pmap_zero_page(m->phys_page);
3656}
3657
3658/*
3659 *	vm_page_part_copy:
3660 *
3661 *	copy part of one page to another
3662 */
3663
3664void
3665vm_page_part_copy(
3666	vm_page_t	src_m,
3667	vm_offset_t	src_pa,
3668	vm_page_t	dst_m,
3669	vm_offset_t	dst_pa,
3670	vm_size_t	len)
3671{
3672#if 0
3673	/*
3674	 * we don't hold the page queue lock
3675	 * so this check isn't safe to make
3676	 */
3677	VM_PAGE_CHECK(src_m);
3678	VM_PAGE_CHECK(dst_m);
3679#endif
3680	pmap_copy_part_page(src_m->phys_page, src_pa,
3681			dst_m->phys_page, dst_pa, len);
3682}
3683
3684/*
3685 *	vm_page_copy:
3686 *
3687 *	Copy one page to another
3688 *
3689 * ENCRYPTED SWAP:
3690 * The source page should not be encrypted.  The caller should
3691 * make sure the page is decrypted first, if necessary.
3692 */
3693
3694int vm_page_copy_cs_validations = 0;
3695int vm_page_copy_cs_tainted = 0;
3696
3697void
3698vm_page_copy(
3699	vm_page_t	src_m,
3700	vm_page_t	dest_m)
3701{
3702        XPR(XPR_VM_PAGE,
3703        "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3704        src_m->object, src_m->offset,
3705	dest_m->object, dest_m->offset,
3706	0);
3707#if 0
3708	/*
3709	 * we don't hold the page queue lock
3710	 * so this check isn't safe to make
3711	 */
3712	VM_PAGE_CHECK(src_m);
3713	VM_PAGE_CHECK(dest_m);
3714#endif
3715	vm_object_lock_assert_held(src_m->object);
3716
3717	/*
3718	 * ENCRYPTED SWAP:
3719	 * The source page should not be encrypted at this point.
3720	 * The destination page will therefore not contain encrypted
3721	 * data after the copy.
3722	 */
3723	if (src_m->encrypted) {
3724		panic("vm_page_copy: source page %p is encrypted\n", src_m);
3725	}
3726	dest_m->encrypted = FALSE;
3727
3728	if (src_m->object != VM_OBJECT_NULL &&
3729	    src_m->object->code_signed) {
3730		/*
3731		 * We're copying a page from a code-signed object.
3732		 * Whoever ends up mapping the copy page might care about
3733		 * the original page's integrity, so let's validate the
3734		 * source page now.
3735		 */
3736		vm_page_copy_cs_validations++;
3737		vm_page_validate_cs(src_m);
3738	}
3739
3740	if (vm_page_is_slideable(src_m)) {
3741		boolean_t was_busy = src_m->busy;
3742		src_m->busy = TRUE;
3743		(void) vm_page_slide(src_m, 0);
3744		assert(src_m->busy);
3745		if (!was_busy) {
3746			PAGE_WAKEUP_DONE(src_m);
3747		}
3748	}
3749
3750	/*
3751	 * Propagate the cs_tainted bit to the copy page. Do not propagate
3752	 * the cs_validated bit.
3753	 */
3754	dest_m->cs_tainted = src_m->cs_tainted;
3755	if (dest_m->cs_tainted) {
3756		vm_page_copy_cs_tainted++;
3757	}
3758	dest_m->slid = src_m->slid;
3759	dest_m->error = src_m->error; /* sliding src_m might have failed... */
3760	pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3761}
3762
3763#if MACH_ASSERT
3764static void
3765_vm_page_print(
3766	vm_page_t	p)
3767{
3768	printf("vm_page %p: \n", p);
3769	printf("  pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3770	printf("  listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3771	printf("  next=%p\n", VM_PAGE_UNPACK_PTR(p->next_m));
3772	printf("  object=%p offset=0x%llx\n", p->object, p->offset);
3773	printf("  wire_count=%u\n", p->wire_count);
3774
3775	printf("  %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3776	       (p->local ? "" : "!"),
3777	       (p->inactive ? "" : "!"),
3778	       (p->active ? "" : "!"),
3779	       (p->pageout_queue ? "" : "!"),
3780	       (p->speculative ? "" : "!"),
3781	       (p->laundry ? "" : "!"));
3782	printf("  %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3783	       (p->free ? "" : "!"),
3784	       (p->reference ? "" : "!"),
3785	       (p->gobbled ? "" : "!"),
3786	       (p->private ? "" : "!"),
3787	       (p->throttled ? "" : "!"));
3788	printf("  %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3789		(p->busy ? "" : "!"),
3790		(p->wanted ? "" : "!"),
3791		(p->tabled ? "" : "!"),
3792		(p->fictitious ? "" : "!"),
3793		(p->pmapped ? "" : "!"),
3794		(p->wpmapped ? "" : "!"));
3795	printf("  %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3796	       (p->pageout ? "" : "!"),
3797	       (p->absent ? "" : "!"),
3798	       (p->error ? "" : "!"),
3799	       (p->dirty ? "" : "!"),
3800	       (p->cleaning ? "" : "!"),
3801	       (p->precious ? "" : "!"),
3802	       (p->clustered ? "" : "!"));
3803	printf("  %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3804	       (p->overwriting ? "" : "!"),
3805	       (p->restart ? "" : "!"),
3806	       (p->unusual ? "" : "!"),
3807	       (p->encrypted ? "" : "!"),
3808	       (p->encrypted_cleaning ? "" : "!"));
3809	printf("  %scs_validated, %scs_tainted, %sno_cache\n",
3810	       (p->cs_validated ? "" : "!"),
3811	       (p->cs_tainted ? "" : "!"),
3812	       (p->no_cache ? "" : "!"));
3813
3814	printf("phys_page=0x%x\n", p->phys_page);
3815}
3816
3817/*
3818 *	Check that the list of pages is ordered by
3819 *	ascending physical address and has no holes.
3820 */
3821static int
3822vm_page_verify_contiguous(
3823	vm_page_t	pages,
3824	unsigned int	npages)
3825{
3826	register vm_page_t	m;
3827	unsigned int		page_count;
3828	vm_offset_t		prev_addr;
3829
3830	prev_addr = pages->phys_page;
3831	page_count = 1;
3832	for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3833		if (m->phys_page != prev_addr + 1) {
3834			printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3835			       m, (long)prev_addr, m->phys_page);
3836			printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
3837			panic("vm_page_verify_contiguous:  not contiguous!");
3838		}
3839		prev_addr = m->phys_page;
3840		++page_count;
3841	}
3842	if (page_count != npages) {
3843		printf("pages %p actual count 0x%x but requested 0x%x\n",
3844		       pages, page_count, npages);
3845		panic("vm_page_verify_contiguous:  count error");
3846	}
3847	return 1;
3848}
3849
3850
3851/*
3852 *	Check the free lists for proper length etc.
3853 */
3854static boolean_t vm_page_verify_this_free_list_enabled = FALSE;
3855static unsigned int
3856vm_page_verify_free_list(
3857	queue_head_t	*vm_page_queue,
3858	unsigned int	color,
3859	vm_page_t	look_for_page,
3860	boolean_t	expect_page)
3861{
3862	unsigned int 	npages;
3863	vm_page_t	m;
3864	vm_page_t	prev_m;
3865	boolean_t	found_page;
3866
3867	if (! vm_page_verify_this_free_list_enabled)
3868		return 0;
3869
3870	found_page = FALSE;
3871	npages = 0;
3872	prev_m = (vm_page_t) vm_page_queue;
3873	queue_iterate(vm_page_queue,
3874		      m,
3875		      vm_page_t,
3876		      pageq) {
3877
3878		if (m == look_for_page) {
3879			found_page = TRUE;
3880		}
3881		if ((vm_page_t) m->pageq.prev != prev_m)
3882			panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3883			      color, npages, m, m->pageq.prev, prev_m);
3884		if ( ! m->busy )
3885			panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3886			      color, npages, m);
3887		if (color != (unsigned int) -1) {
3888			if ((m->phys_page & vm_color_mask) != color)
3889				panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3890				      color, npages, m, m->phys_page & vm_color_mask, color);
3891			if ( ! m->free )
3892				panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3893				      color, npages, m);
3894		}
3895		++npages;
3896		prev_m = m;
3897	}
3898	if (look_for_page != VM_PAGE_NULL) {
3899		unsigned int other_color;
3900
3901		if (expect_page && !found_page) {
3902			printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3903			       color, npages, look_for_page, look_for_page->phys_page);
3904			_vm_page_print(look_for_page);
3905			for (other_color = 0;
3906			     other_color < vm_colors;
3907			     other_color++) {
3908				if (other_color == color)
3909					continue;
3910				vm_page_verify_free_list(&vm_page_queue_free[other_color],
3911							 other_color, look_for_page, FALSE);
3912			}
3913			if (color == (unsigned int) -1) {
3914				vm_page_verify_free_list(&vm_lopage_queue_free,
3915							 (unsigned int) -1, look_for_page, FALSE);
3916			}
3917			panic("vm_page_verify_free_list(color=%u)\n", color);
3918		}
3919		if (!expect_page && found_page) {
3920			printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3921			       color, npages, look_for_page, look_for_page->phys_page);
3922		}
3923	}
3924	return npages;
3925}
3926
3927static boolean_t vm_page_verify_all_free_lists_enabled = FALSE;
3928static void
3929vm_page_verify_free_lists( void )
3930{
3931	unsigned int	color, npages, nlopages;
3932	boolean_t	toggle = TRUE;
3933
3934	if (! vm_page_verify_all_free_lists_enabled)
3935		return;
3936
3937	npages = 0;
3938
3939	lck_mtx_lock(&vm_page_queue_free_lock);
3940
3941	if (vm_page_verify_this_free_list_enabled == TRUE) {
3942		/*
3943		 * This variable has been set globally for extra checking of
3944		 * each free list Q. Since we didn't set it, we don't own it
3945		 * and we shouldn't toggle it.
3946		 */
3947		toggle = FALSE;
3948	}
3949
3950	if (toggle == TRUE) {
3951		vm_page_verify_this_free_list_enabled = TRUE;
3952	}
3953
3954	for( color = 0; color < vm_colors; color++ ) {
3955		npages += vm_page_verify_free_list(&vm_page_queue_free[color],
3956						   color, VM_PAGE_NULL, FALSE);
3957	}
3958	nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3959					    (unsigned int) -1,
3960					    VM_PAGE_NULL, FALSE);
3961	if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3962		panic("vm_page_verify_free_lists:  "
3963		      "npages %u free_count %d nlopages %u lo_free_count %u",
3964		      npages, vm_page_free_count, nlopages, vm_lopage_free_count);
3965
3966	if (toggle == TRUE) {
3967		vm_page_verify_this_free_list_enabled = FALSE;
3968	}
3969
3970	lck_mtx_unlock(&vm_page_queue_free_lock);
3971}
3972
3973void
3974vm_page_queues_assert(
3975	vm_page_t	mem,
3976	int		val)
3977{
3978#if DEBUG
3979	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3980#endif
3981	if (mem->free + mem->active + mem->inactive + mem->speculative +
3982	    mem->throttled + mem->pageout_queue > (val)) {
3983		_vm_page_print(mem);
3984		panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3985	}
3986	if (VM_PAGE_WIRED(mem)) {
3987		assert(!mem->active);
3988		assert(!mem->inactive);
3989		assert(!mem->speculative);
3990		assert(!mem->throttled);
3991		assert(!mem->pageout_queue);
3992	}
3993}
3994#endif	/* MACH_ASSERT */
3995
3996
3997/*
3998 *	CONTIGUOUS PAGE ALLOCATION
3999 *
4000 *	Find a region large enough to contain at least n pages
4001 *	of contiguous physical memory.
4002 *
4003 *	This is done by traversing the vm_page_t array in a linear fashion
4004 *	we assume that the vm_page_t array has the avaiable physical pages in an
4005 *	ordered, ascending list... this is currently true of all our implementations
4006 * 	and must remain so... there can be 'holes' in the array...  we also can
4007 *	no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
4008 * 	which use to happen via 'vm_page_convert'... that function was no longer
4009 * 	being called and was removed...
4010 *
4011 *	The basic flow consists of stabilizing some of the interesting state of
4012 *	a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
4013 *	sweep at the beginning of the array looking for pages that meet our criterea
4014 *	for a 'stealable' page... currently we are pretty conservative... if the page
4015 *	meets this criterea and is physically contiguous to the previous page in the 'run'
4016 * 	we keep developing it.  If we hit a page that doesn't fit, we reset our state
4017 *	and start to develop a new run... if at this point we've already considered
4018 * 	at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
4019 *	and mutex_pause (which will yield the processor), to keep the latency low w/r
4020 *	to other threads trying to acquire free pages (or move pages from q to q),
4021 *	and then continue from the spot we left off... we only make 1 pass through the
4022 *	array.  Once we have a 'run' that is long enough, we'll go into the loop which
4023 * 	which steals the pages from the queues they're currently on... pages on the free
4024 *	queue can be stolen directly... pages that are on any of the other queues
4025 *	must be removed from the object they are tabled on... this requires taking the
4026 * 	object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
4027 *	or if the state of the page behind the vm_object lock is no longer viable, we'll
4028 *	dump the pages we've currently stolen back to the free list, and pick up our
4029 *	scan from the point where we aborted the 'current' run.
4030 *
4031 *
4032 *	Requirements:
4033 *		- neither vm_page_queue nor vm_free_list lock can be held on entry
4034 *
4035 *	Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
4036 *
4037 * Algorithm:
4038 */
4039
4040#define	MAX_CONSIDERED_BEFORE_YIELD	1000
4041
4042
4043#define RESET_STATE_OF_RUN()	\
4044	MACRO_BEGIN		\
4045	prevcontaddr = -2;	\
4046	start_pnum = -1;	\
4047	free_considered = 0;	\
4048	substitute_needed = 0;	\
4049	npages = 0;		\
4050	MACRO_END
4051
4052/*
4053 * Can we steal in-use (i.e. not free) pages when searching for
4054 * physically-contiguous pages ?
4055 */
4056#define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
4057
4058static unsigned int vm_page_find_contiguous_last_idx = 0,  vm_page_lomem_find_contiguous_last_idx = 0;
4059#if DEBUG
4060int vm_page_find_contig_debug = 0;
4061#endif
4062
4063static vm_page_t
4064vm_page_find_contiguous(
4065	unsigned int	contig_pages,
4066	ppnum_t		max_pnum,
4067	ppnum_t     pnum_mask,
4068	boolean_t	wire,
4069	int		flags)
4070{
4071	vm_page_t	m = NULL;
4072	ppnum_t		prevcontaddr;
4073	ppnum_t		start_pnum;
4074	unsigned int	npages, considered, scanned;
4075	unsigned int	page_idx, start_idx, last_idx, orig_last_idx;
4076	unsigned int	idx_last_contig_page_found = 0;
4077	int		free_considered, free_available;
4078	int		substitute_needed;
4079	boolean_t	wrapped;
4080#if DEBUG
4081	clock_sec_t	tv_start_sec, tv_end_sec;
4082	clock_usec_t	tv_start_usec, tv_end_usec;
4083#endif
4084#if MACH_ASSERT
4085	int		yielded = 0;
4086	int		dumped_run = 0;
4087	int		stolen_pages = 0;
4088	int		compressed_pages = 0;
4089#endif
4090
4091	if (contig_pages == 0)
4092		return VM_PAGE_NULL;
4093
4094#if MACH_ASSERT
4095	vm_page_verify_free_lists();
4096#endif
4097#if DEBUG
4098	clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
4099#endif
4100	PAGE_REPLACEMENT_ALLOWED(TRUE);
4101
4102	vm_page_lock_queues();
4103	lck_mtx_lock(&vm_page_queue_free_lock);
4104
4105	RESET_STATE_OF_RUN();
4106
4107	scanned = 0;
4108	considered = 0;
4109	free_available = vm_page_free_count - vm_page_free_reserved;
4110
4111	wrapped = FALSE;
4112
4113	if(flags & KMA_LOMEM)
4114		idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
4115	else
4116		idx_last_contig_page_found =  vm_page_find_contiguous_last_idx;
4117
4118	orig_last_idx = idx_last_contig_page_found;
4119	last_idx = orig_last_idx;
4120
4121	for (page_idx = last_idx, start_idx = last_idx;
4122	     npages < contig_pages && page_idx < vm_pages_count;
4123	     page_idx++) {
4124retry:
4125		if (wrapped &&
4126		    npages == 0 &&
4127		    page_idx >= orig_last_idx) {
4128			/*
4129			 * We're back where we started and we haven't
4130			 * found any suitable contiguous range.  Let's
4131			 * give up.
4132			 */
4133			break;
4134		}
4135		scanned++;
4136		m = &vm_pages[page_idx];
4137
4138		assert(!m->fictitious);
4139		assert(!m->private);
4140
4141		if (max_pnum && m->phys_page > max_pnum) {
4142			/* no more low pages... */
4143			break;
4144		}
4145		if (!npages & ((m->phys_page & pnum_mask) != 0)) {
4146			/*
4147			 * not aligned
4148			 */
4149			RESET_STATE_OF_RUN();
4150
4151		} else if (VM_PAGE_WIRED(m) || m->gobbled ||
4152			   m->encrypted_cleaning ||
4153			   m->pageout_queue || m->laundry || m->wanted ||
4154			   m->cleaning || m->overwriting || m->pageout) {
4155			/*
4156			 * page is in a transient state
4157			 * or a state we don't want to deal
4158			 * with, so don't consider it which
4159			 * means starting a new run
4160			 */
4161			RESET_STATE_OF_RUN();
4162
4163		} else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled && !m->compressor) {
4164			/*
4165			 * page needs to be on one of our queues
4166			 * or it needs to belong to the compressor pool
4167			 * in order for it to be stable behind the
4168			 * locks we hold at this point...
4169			 * if not, don't consider it which
4170			 * means starting a new run
4171			 */
4172			RESET_STATE_OF_RUN();
4173
4174		} else if (!m->free && (!m->tabled || m->busy)) {
4175			/*
4176			 * pages on the free list are always 'busy'
4177			 * so we couldn't test for 'busy' in the check
4178			 * for the transient states... pages that are
4179			 * 'free' are never 'tabled', so we also couldn't
4180			 * test for 'tabled'.  So we check here to make
4181			 * sure that a non-free page is not busy and is
4182			 * tabled on an object...
4183			 * if not, don't consider it which
4184			 * means starting a new run
4185			 */
4186			RESET_STATE_OF_RUN();
4187
4188		} else {
4189			if (m->phys_page != prevcontaddr + 1) {
4190				if ((m->phys_page & pnum_mask) != 0) {
4191					RESET_STATE_OF_RUN();
4192					goto did_consider;
4193				} else {
4194					npages = 1;
4195					start_idx = page_idx;
4196					start_pnum = m->phys_page;
4197				}
4198			} else {
4199				npages++;
4200			}
4201			prevcontaddr = m->phys_page;
4202
4203			VM_PAGE_CHECK(m);
4204			if (m->free) {
4205				free_considered++;
4206			} else {
4207				/*
4208				 * This page is not free.
4209				 * If we can't steal used pages,
4210				 * we have to give up this run
4211				 * and keep looking.
4212				 * Otherwise, we might need to
4213				 * move the contents of this page
4214				 * into a substitute page.
4215				 */
4216#if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4217				if (m->pmapped || m->dirty || m->precious) {
4218					substitute_needed++;
4219				}
4220#else
4221				RESET_STATE_OF_RUN();
4222#endif
4223			}
4224
4225			if ((free_considered + substitute_needed) > free_available) {
4226				/*
4227				 * if we let this run continue
4228				 * we will end up dropping the vm_page_free_count
4229				 * below the reserve limit... we need to abort
4230				 * this run, but we can at least re-consider this
4231				 * page... thus the jump back to 'retry'
4232				 */
4233				RESET_STATE_OF_RUN();
4234
4235				if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
4236					considered++;
4237					goto retry;
4238				}
4239				/*
4240				 * free_available == 0
4241				 * so can't consider any free pages... if
4242				 * we went to retry in this case, we'd
4243				 * get stuck looking at the same page
4244				 * w/o making any forward progress
4245				 * we also want to take this path if we've already
4246				 * reached our limit that controls the lock latency
4247				 */
4248			}
4249		}
4250did_consider:
4251		if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
4252
4253			PAGE_REPLACEMENT_ALLOWED(FALSE);
4254
4255			lck_mtx_unlock(&vm_page_queue_free_lock);
4256			vm_page_unlock_queues();
4257
4258			mutex_pause(0);
4259
4260			PAGE_REPLACEMENT_ALLOWED(TRUE);
4261
4262			vm_page_lock_queues();
4263			lck_mtx_lock(&vm_page_queue_free_lock);
4264
4265			RESET_STATE_OF_RUN();
4266			/*
4267			 * reset our free page limit since we
4268			 * dropped the lock protecting the vm_page_free_queue
4269			 */
4270			free_available = vm_page_free_count - vm_page_free_reserved;
4271			considered = 0;
4272#if MACH_ASSERT
4273			yielded++;
4274#endif
4275			goto retry;
4276		}
4277		considered++;
4278	}
4279	m = VM_PAGE_NULL;
4280
4281	if (npages != contig_pages) {
4282		if (!wrapped) {
4283			/*
4284			 * We didn't find a contiguous range but we didn't
4285			 * start from the very first page.
4286			 * Start again from the very first page.
4287			 */
4288			RESET_STATE_OF_RUN();
4289			if( flags & KMA_LOMEM)
4290				idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = 0;
4291			else
4292				idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
4293			last_idx = 0;
4294			page_idx = last_idx;
4295			wrapped = TRUE;
4296			goto retry;
4297		}
4298		lck_mtx_unlock(&vm_page_queue_free_lock);
4299	} else {
4300		vm_page_t	m1;
4301		vm_page_t	m2;
4302		unsigned int	cur_idx;
4303		unsigned int	tmp_start_idx;
4304		vm_object_t	locked_object = VM_OBJECT_NULL;
4305		boolean_t	abort_run = FALSE;
4306
4307		assert(page_idx - start_idx == contig_pages);
4308
4309		tmp_start_idx = start_idx;
4310
4311		/*
4312		 * first pass through to pull the free pages
4313		 * off of the free queue so that in case we
4314		 * need substitute pages, we won't grab any
4315		 * of the free pages in the run... we'll clear
4316		 * the 'free' bit in the 2nd pass, and even in
4317		 * an abort_run case, we'll collect all of the
4318		 * free pages in this run and return them to the free list
4319		 */
4320		while (start_idx < page_idx) {
4321
4322			m1 = &vm_pages[start_idx++];
4323
4324#if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4325			assert(m1->free);
4326#endif
4327
4328			if (m1->free) {
4329				unsigned int color;
4330
4331				color = m1->phys_page & vm_color_mask;
4332#if MACH_ASSERT
4333				vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
4334#endif
4335				queue_remove(&vm_page_queue_free[color],
4336					     m1,
4337					     vm_page_t,
4338					     pageq);
4339				m1->pageq.next = NULL;
4340				m1->pageq.prev = NULL;
4341#if MACH_ASSERT
4342				vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
4343#endif
4344				/*
4345				 * Clear the "free" bit so that this page
4346				 * does not get considered for another
4347				 * concurrent physically-contiguous allocation.
4348				 */
4349				m1->free = FALSE;
4350				assert(m1->busy);
4351
4352				vm_page_free_count--;
4353			}
4354		}
4355		if( flags & KMA_LOMEM)
4356			vm_page_lomem_find_contiguous_last_idx = page_idx;
4357		else
4358			vm_page_find_contiguous_last_idx = page_idx;
4359
4360		/*
4361		 * we can drop the free queue lock at this point since
4362		 * we've pulled any 'free' candidates off of the list
4363		 * we need it dropped so that we can do a vm_page_grab
4364		 * when substituing for pmapped/dirty pages
4365		 */
4366		lck_mtx_unlock(&vm_page_queue_free_lock);
4367
4368		start_idx = tmp_start_idx;
4369		cur_idx = page_idx - 1;
4370
4371		while (start_idx++ < page_idx) {
4372			/*
4373			 * must go through the list from back to front
4374			 * so that the page list is created in the
4375			 * correct order - low -> high phys addresses
4376			 */
4377			m1 = &vm_pages[cur_idx--];
4378
4379			assert(!m1->free);
4380
4381			if (m1->object == VM_OBJECT_NULL) {
4382				/*
4383				 * page has already been removed from
4384				 * the free list in the 1st pass
4385				 */
4386				assert(m1->offset == (vm_object_offset_t) -1);
4387				assert(m1->busy);
4388				assert(!m1->wanted);
4389				assert(!m1->laundry);
4390			} else {
4391				vm_object_t object;
4392				int refmod;
4393				boolean_t disconnected, reusable;
4394
4395				if (abort_run == TRUE)
4396					continue;
4397
4398				object = m1->object;
4399
4400				if (object != locked_object) {
4401					if (locked_object) {
4402						vm_object_unlock(locked_object);
4403						locked_object = VM_OBJECT_NULL;
4404					}
4405					if (vm_object_lock_try(object))
4406						locked_object = object;
4407				}
4408				if (locked_object == VM_OBJECT_NULL ||
4409				    (VM_PAGE_WIRED(m1) || m1->gobbled ||
4410				     m1->encrypted_cleaning ||
4411				     m1->pageout_queue || m1->laundry || m1->wanted ||
4412				     m1->cleaning || m1->overwriting || m1->pageout || m1->busy)) {
4413
4414					if (locked_object) {
4415						vm_object_unlock(locked_object);
4416						locked_object = VM_OBJECT_NULL;
4417					}
4418					tmp_start_idx = cur_idx;
4419					abort_run = TRUE;
4420					continue;
4421				}
4422
4423				disconnected = FALSE;
4424				reusable = FALSE;
4425
4426				if ((m1->reusable ||
4427				     m1->object->all_reusable) &&
4428				    m1->inactive &&
4429				    !m1->dirty &&
4430				    !m1->reference) {
4431					/* reusable page... */
4432					refmod = pmap_disconnect(m1->phys_page);
4433					disconnected = TRUE;
4434					if (refmod == 0) {
4435						/*
4436						 * ... not reused: can steal
4437						 * without relocating contents.
4438						 */
4439						reusable = TRUE;
4440					}
4441				}
4442
4443				if ((m1->pmapped &&
4444				     ! reusable) ||
4445				    m1->dirty ||
4446				    m1->precious) {
4447					vm_object_offset_t offset;
4448
4449					m2 = vm_page_grab();
4450
4451					if (m2 == VM_PAGE_NULL) {
4452						if (locked_object) {
4453							vm_object_unlock(locked_object);
4454							locked_object = VM_OBJECT_NULL;
4455						}
4456						tmp_start_idx = cur_idx;
4457						abort_run = TRUE;
4458						continue;
4459					}
4460					if (! disconnected) {
4461						if (m1->pmapped)
4462							refmod = pmap_disconnect(m1->phys_page);
4463						else
4464							refmod = 0;
4465					}
4466
4467					/* copy the page's contents */
4468					pmap_copy_page(m1->phys_page, m2->phys_page);
4469					/* copy the page's state */
4470					assert(!VM_PAGE_WIRED(m1));
4471					assert(!m1->free);
4472					assert(!m1->pageout_queue);
4473					assert(!m1->laundry);
4474					m2->reference	= m1->reference;
4475					assert(!m1->gobbled);
4476					assert(!m1->private);
4477					m2->no_cache	= m1->no_cache;
4478					m2->xpmapped	= 0;
4479					assert(!m1->busy);
4480					assert(!m1->wanted);
4481					assert(!m1->fictitious);
4482					m2->pmapped	= m1->pmapped; /* should flush cache ? */
4483					m2->wpmapped	= m1->wpmapped;
4484					assert(!m1->pageout);
4485					m2->absent	= m1->absent;
4486					m2->error	= m1->error;
4487					m2->dirty	= m1->dirty;
4488					assert(!m1->cleaning);
4489					m2->precious	= m1->precious;
4490					m2->clustered	= m1->clustered;
4491					assert(!m1->overwriting);
4492					m2->restart	= m1->restart;
4493					m2->unusual	= m1->unusual;
4494					m2->encrypted	= m1->encrypted;
4495					assert(!m1->encrypted_cleaning);
4496					m2->cs_validated = m1->cs_validated;
4497					m2->cs_tainted	= m1->cs_tainted;
4498
4499					/*
4500					 * If m1 had really been reusable,
4501					 * we would have just stolen it, so
4502					 * let's not propagate it's "reusable"
4503					 * bit and assert that m2 is not
4504					 * marked as "reusable".
4505					 */
4506					// m2->reusable	= m1->reusable;
4507					assert(!m2->reusable);
4508
4509					assert(!m1->lopage);
4510					m2->slid	= m1->slid;
4511					m2->compressor	= m1->compressor;
4512
4513					/*
4514					 * page may need to be flushed if
4515					 * it is marshalled into a UPL
4516					 * that is going to be used by a device
4517					 * that doesn't support coherency
4518					 */
4519					m2->written_by_kernel = TRUE;
4520
4521					/*
4522					 * make sure we clear the ref/mod state
4523					 * from the pmap layer... else we risk
4524					 * inheriting state from the last time
4525					 * this page was used...
4526					 */
4527					pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
4528
4529					if (refmod & VM_MEM_REFERENCED)
4530						m2->reference = TRUE;
4531					if (refmod & VM_MEM_MODIFIED) {
4532						SET_PAGE_DIRTY(m2, TRUE);
4533					}
4534					offset = m1->offset;
4535
4536					/*
4537					 * completely cleans up the state
4538					 * of the page so that it is ready
4539					 * to be put onto the free list, or
4540					 * for this purpose it looks like it
4541					 * just came off of the free list
4542					 */
4543					vm_page_free_prepare(m1);
4544
4545					/*
4546					 * now put the substitute page
4547					 * on the object
4548					 */
4549					vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE, FALSE);
4550
4551					if (m2->compressor) {
4552						m2->pmapped = TRUE;
4553						m2->wpmapped = TRUE;
4554
4555						PMAP_ENTER(kernel_pmap, m2->offset, m2,
4556							   VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE);
4557#if MACH_ASSERT
4558						compressed_pages++;
4559#endif
4560					} else {
4561						if (m2->reference)
4562							vm_page_activate(m2);
4563						else
4564							vm_page_deactivate(m2);
4565					}
4566					PAGE_WAKEUP_DONE(m2);
4567
4568				} else {
4569					assert(!m1->compressor);
4570
4571					/*
4572					 * completely cleans up the state
4573					 * of the page so that it is ready
4574					 * to be put onto the free list, or
4575					 * for this purpose it looks like it
4576					 * just came off of the free list
4577					 */
4578					vm_page_free_prepare(m1);
4579				}
4580#if MACH_ASSERT
4581				stolen_pages++;
4582#endif
4583			}
4584			m1->pageq.next = (queue_entry_t) m;
4585			m1->pageq.prev = NULL;
4586			m = m1;
4587		}
4588		if (locked_object) {
4589			vm_object_unlock(locked_object);
4590			locked_object = VM_OBJECT_NULL;
4591		}
4592
4593		if (abort_run == TRUE) {
4594			if (m != VM_PAGE_NULL) {
4595				vm_page_free_list(m, FALSE);
4596			}
4597#if MACH_ASSERT
4598			dumped_run++;
4599#endif
4600			/*
4601			 * want the index of the last
4602			 * page in this run that was
4603			 * successfully 'stolen', so back
4604			 * it up 1 for the auto-decrement on use
4605			 * and 1 more to bump back over this page
4606			 */
4607			page_idx = tmp_start_idx + 2;
4608			if (page_idx >= vm_pages_count) {
4609				if (wrapped)
4610					goto done_scanning;
4611				page_idx = last_idx = 0;
4612				wrapped = TRUE;
4613			}
4614			abort_run = FALSE;
4615
4616			/*
4617			 * We didn't find a contiguous range but we didn't
4618			 * start from the very first page.
4619			 * Start again from the very first page.
4620			 */
4621			RESET_STATE_OF_RUN();
4622
4623			if( flags & KMA_LOMEM)
4624				idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = page_idx;
4625			else
4626				idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4627
4628			last_idx = page_idx;
4629
4630			lck_mtx_lock(&vm_page_queue_free_lock);
4631			/*
4632			* reset our free page limit since we
4633			* dropped the lock protecting the vm_page_free_queue
4634			*/
4635			free_available = vm_page_free_count - vm_page_free_reserved;
4636			goto retry;
4637		}
4638
4639		for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4640
4641			if (wire == TRUE)
4642				m1->wire_count++;
4643			else
4644				m1->gobbled = TRUE;
4645		}
4646		if (wire == FALSE)
4647			vm_page_gobble_count += npages;
4648
4649		/*
4650		 * gobbled pages are also counted as wired pages
4651		 */
4652		vm_page_wire_count += npages;
4653
4654 		assert(vm_page_verify_contiguous(m, npages));
4655	}
4656done_scanning:
4657	PAGE_REPLACEMENT_ALLOWED(FALSE);
4658
4659	vm_page_unlock_queues();
4660
4661#if DEBUG
4662	clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4663
4664	tv_end_sec -= tv_start_sec;
4665	if (tv_end_usec < tv_start_usec) {
4666		tv_end_sec--;
4667		tv_end_usec += 1000000;
4668	}
4669	tv_end_usec -= tv_start_usec;
4670	if (tv_end_usec >= 1000000) {
4671		tv_end_sec++;
4672		tv_end_sec -= 1000000;
4673	}
4674	if (vm_page_find_contig_debug) {
4675		printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds...  started at %d...  scanned %d pages...  yielded %d times...  dumped run %d times... stole %d pages... stole %d compressed pages\n",
4676		       __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4677		       (long)tv_end_sec, tv_end_usec, orig_last_idx,
4678		       scanned, yielded, dumped_run, stolen_pages, compressed_pages);
4679	}
4680
4681#endif
4682#if MACH_ASSERT
4683	vm_page_verify_free_lists();
4684#endif
4685	return m;
4686}
4687
4688/*
4689 *	Allocate a list of contiguous, wired pages.
4690 */
4691kern_return_t
4692cpm_allocate(
4693	vm_size_t	size,
4694	vm_page_t	*list,
4695	ppnum_t		max_pnum,
4696	ppnum_t		pnum_mask,
4697	boolean_t	wire,
4698	int		flags)
4699{
4700	vm_page_t		pages;
4701	unsigned int		npages;
4702
4703	if (size % PAGE_SIZE != 0)
4704		return KERN_INVALID_ARGUMENT;
4705
4706	npages = (unsigned int) (size / PAGE_SIZE);
4707	if (npages != size / PAGE_SIZE) {
4708		/* 32-bit overflow */
4709		return KERN_INVALID_ARGUMENT;
4710	}
4711
4712	/*
4713	 *	Obtain a pointer to a subset of the free
4714	 *	list large enough to satisfy the request;
4715	 *	the region will be physically contiguous.
4716	 */
4717	pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4718
4719	if (pages == VM_PAGE_NULL)
4720		return KERN_NO_SPACE;
4721	/*
4722	 * determine need for wakeups
4723	 */
4724	if ((vm_page_free_count < vm_page_free_min) ||
4725	     ((vm_page_free_count < vm_page_free_target) &&
4726	      ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4727	         thread_wakeup((event_t) &vm_page_free_wanted);
4728
4729	VM_CHECK_MEMORYSTATUS;
4730
4731	/*
4732	 *	The CPM pages should now be available and
4733	 *	ordered by ascending physical address.
4734	 */
4735	assert(vm_page_verify_contiguous(pages, npages));
4736
4737	*list = pages;
4738	return KERN_SUCCESS;
4739}
4740
4741
4742unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4743
4744/*
4745 * when working on a 'run' of pages, it is necessary to hold
4746 * the vm_page_queue_lock (a hot global lock) for certain operations
4747 * on the page... however, the majority of the work can be done
4748 * while merely holding the object lock... in fact there are certain
4749 * collections of pages that don't require any work brokered by the
4750 * vm_page_queue_lock... to mitigate the time spent behind the global
4751 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4752 * while doing all of the work that doesn't require the vm_page_queue_lock...
4753 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4754 * necessary work for each page... we will grab the busy bit on the page
4755 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4756 * if it can't immediately take the vm_page_queue_lock in order to compete
4757 * for the locks in the same order that vm_pageout_scan takes them.
4758 * the operation names are modeled after the names of the routines that
4759 * need to be called in order to make the changes very obvious in the
4760 * original loop
4761 */
4762
4763void
4764vm_page_do_delayed_work(
4765	vm_object_t 	object,
4766	struct vm_page_delayed_work *dwp,
4767	int		dw_count)
4768{
4769	int		j;
4770	vm_page_t	m;
4771        vm_page_t       local_free_q = VM_PAGE_NULL;
4772
4773	/*
4774	 * pageout_scan takes the vm_page_lock_queues first
4775	 * then tries for the object lock... to avoid what
4776	 * is effectively a lock inversion, we'll go to the
4777	 * trouble of taking them in that same order... otherwise
4778	 * if this object contains the majority of the pages resident
4779	 * in the UBC (or a small set of large objects actively being
4780	 * worked on contain the majority of the pages), we could
4781	 * cause the pageout_scan thread to 'starve' in its attempt
4782	 * to find pages to move to the free queue, since it has to
4783	 * successfully acquire the object lock of any candidate page
4784	 * before it can steal/clean it.
4785	 */
4786	if (!vm_page_trylockspin_queues()) {
4787		vm_object_unlock(object);
4788
4789		vm_page_lockspin_queues();
4790
4791		for (j = 0; ; j++) {
4792			if (!vm_object_lock_avoid(object) &&
4793			    _vm_object_lock_try(object))
4794				break;
4795			vm_page_unlock_queues();
4796			mutex_pause(j);
4797			vm_page_lockspin_queues();
4798		}
4799	}
4800	for (j = 0; j < dw_count; j++, dwp++) {
4801
4802		m = dwp->dw_m;
4803
4804		if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4805			vm_pageout_throttle_up(m);
4806#if CONFIG_PHANTOM_CACHE
4807		if (dwp->dw_mask & DW_vm_phantom_cache_update)
4808			vm_phantom_cache_update(m);
4809#endif
4810		if (dwp->dw_mask & DW_vm_page_wire)
4811			vm_page_wire(m);
4812		else if (dwp->dw_mask & DW_vm_page_unwire) {
4813			boolean_t	queueit;
4814
4815			queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE;
4816
4817			vm_page_unwire(m, queueit);
4818		}
4819		if (dwp->dw_mask & DW_vm_page_free) {
4820			vm_page_free_prepare_queues(m);
4821
4822			assert(m->pageq.next == NULL && m->pageq.prev == NULL);
4823			/*
4824			 * Add this page to our list of reclaimed pages,
4825			 * to be freed later.
4826			 */
4827			m->pageq.next = (queue_entry_t) local_free_q;
4828			local_free_q = m;
4829		} else {
4830			if (dwp->dw_mask & DW_vm_page_deactivate_internal)
4831				vm_page_deactivate_internal(m, FALSE);
4832			else if (dwp->dw_mask & DW_vm_page_activate) {
4833				if (m->active == FALSE) {
4834					vm_page_activate(m);
4835				}
4836			}
4837			else if (dwp->dw_mask & DW_vm_page_speculate)
4838				vm_page_speculate(m, TRUE);
4839			else if (dwp->dw_mask & DW_enqueue_cleaned) {
4840				/*
4841				 * if we didn't hold the object lock and did this,
4842				 * we might disconnect the page, then someone might
4843				 * soft fault it back in, then we would put it on the
4844				 * cleaned queue, and so we would have a referenced (maybe even dirty)
4845				 * page on that queue, which we don't want
4846				 */
4847				int refmod_state = pmap_disconnect(m->phys_page);
4848
4849				if ((refmod_state & VM_MEM_REFERENCED)) {
4850					/*
4851					 * this page has been touched since it got cleaned; let's activate it
4852					 * if it hasn't already been
4853					 */
4854					vm_pageout_enqueued_cleaned++;
4855					vm_pageout_cleaned_reactivated++;
4856					vm_pageout_cleaned_commit_reactivated++;
4857
4858					if (m->active == FALSE)
4859						vm_page_activate(m);
4860				} else {
4861					m->reference = FALSE;
4862					vm_page_enqueue_cleaned(m);
4863				}
4864			}
4865			else if (dwp->dw_mask & DW_vm_page_lru)
4866				vm_page_lru(m);
4867			else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
4868				if ( !m->pageout_queue)
4869					VM_PAGE_QUEUES_REMOVE(m);
4870			}
4871			if (dwp->dw_mask & DW_set_reference)
4872				m->reference = TRUE;
4873			else if (dwp->dw_mask & DW_clear_reference)
4874				m->reference = FALSE;
4875
4876			if (dwp->dw_mask & DW_move_page) {
4877				if ( !m->pageout_queue) {
4878					VM_PAGE_QUEUES_REMOVE(m);
4879
4880					assert(m->object != kernel_object);
4881
4882					VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
4883				}
4884			}
4885			if (dwp->dw_mask & DW_clear_busy)
4886				m->busy = FALSE;
4887
4888			if (dwp->dw_mask & DW_PAGE_WAKEUP)
4889				PAGE_WAKEUP(m);
4890		}
4891	}
4892	vm_page_unlock_queues();
4893
4894	if (local_free_q)
4895		vm_page_free_list(local_free_q, TRUE);
4896
4897	VM_CHECK_MEMORYSTATUS;
4898
4899}
4900
4901kern_return_t
4902vm_page_alloc_list(
4903	int	page_count,
4904	int	flags,
4905	vm_page_t *list)
4906{
4907	vm_page_t	lo_page_list = VM_PAGE_NULL;
4908	vm_page_t	mem;
4909	int		i;
4910
4911	if ( !(flags & KMA_LOMEM))
4912		panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4913
4914	for (i = 0; i < page_count; i++) {
4915
4916		mem = vm_page_grablo();
4917
4918		if (mem == VM_PAGE_NULL) {
4919			if (lo_page_list)
4920				vm_page_free_list(lo_page_list, FALSE);
4921
4922			*list = VM_PAGE_NULL;
4923
4924			return (KERN_RESOURCE_SHORTAGE);
4925		}
4926		mem->pageq.next = (queue_entry_t) lo_page_list;
4927		lo_page_list = mem;
4928	}
4929	*list = lo_page_list;
4930
4931	return (KERN_SUCCESS);
4932}
4933
4934void
4935vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4936{
4937	page->offset = offset;
4938}
4939
4940vm_page_t
4941vm_page_get_next(vm_page_t page)
4942{
4943	return ((vm_page_t) page->pageq.next);
4944}
4945
4946vm_object_offset_t
4947vm_page_get_offset(vm_page_t page)
4948{
4949	return (page->offset);
4950}
4951
4952ppnum_t
4953vm_page_get_phys_page(vm_page_t page)
4954{
4955	return (page->phys_page);
4956}
4957
4958
4959/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4960
4961#if HIBERNATION
4962
4963static vm_page_t hibernate_gobble_queue;
4964
4965extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4966
4967static int  hibernate_drain_pageout_queue(struct vm_pageout_queue *);
4968static int  hibernate_flush_dirty_pages(int);
4969static int  hibernate_flush_queue(queue_head_t *, int);
4970
4971void hibernate_flush_wait(void);
4972void hibernate_mark_in_progress(void);
4973void hibernate_clear_in_progress(void);
4974
4975void		hibernate_free_range(int, int);
4976void		hibernate_hash_insert_page(vm_page_t);
4977uint32_t	hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
4978void		hibernate_rebuild_vm_structs(void);
4979uint32_t	hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
4980ppnum_t		hibernate_lookup_paddr(unsigned int);
4981
4982struct hibernate_statistics {
4983	int hibernate_considered;
4984	int hibernate_reentered_on_q;
4985	int hibernate_found_dirty;
4986	int hibernate_skipped_cleaning;
4987	int hibernate_skipped_transient;
4988	int hibernate_skipped_precious;
4989	int hibernate_skipped_external;
4990	int hibernate_queue_nolock;
4991	int hibernate_queue_paused;
4992	int hibernate_throttled;
4993	int hibernate_throttle_timeout;
4994	int hibernate_drained;
4995	int hibernate_drain_timeout;
4996	int cd_lock_failed;
4997	int cd_found_precious;
4998	int cd_found_wired;
4999	int cd_found_busy;
5000	int cd_found_unusual;
5001	int cd_found_cleaning;
5002	int cd_found_laundry;
5003	int cd_found_dirty;
5004	int cd_found_xpmapped;
5005	int cd_skipped_xpmapped;
5006	int cd_local_free;
5007	int cd_total_free;
5008	int cd_vm_page_wire_count;
5009	int cd_vm_struct_pages_unneeded;
5010	int cd_pages;
5011	int cd_discarded;
5012	int cd_count_wire;
5013} hibernate_stats;
5014
5015
5016/*
5017 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
5018 * so that we don't overrun the estimated image size, which would
5019 * result in a hibernation failure.
5020 */
5021#define	HIBERNATE_XPMAPPED_LIMIT	40000
5022
5023
5024static int
5025hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
5026{
5027	wait_result_t	wait_result;
5028
5029	vm_page_lock_queues();
5030
5031	while ( !queue_empty(&q->pgo_pending) ) {
5032
5033		q->pgo_draining = TRUE;
5034
5035		assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
5036
5037		vm_page_unlock_queues();
5038
5039		wait_result = thread_block(THREAD_CONTINUE_NULL);
5040
5041		if (wait_result == THREAD_TIMED_OUT && !queue_empty(&q->pgo_pending)) {
5042			hibernate_stats.hibernate_drain_timeout++;
5043
5044			if (q == &vm_pageout_queue_external)
5045				return (0);
5046
5047			return (1);
5048		}
5049		vm_page_lock_queues();
5050
5051		hibernate_stats.hibernate_drained++;
5052	}
5053	vm_page_unlock_queues();
5054
5055	return (0);
5056}
5057
5058
5059boolean_t hibernate_skip_external = FALSE;
5060
5061static int
5062hibernate_flush_queue(queue_head_t *q, int qcount)
5063{
5064	vm_page_t	m;
5065	vm_object_t	l_object = NULL;
5066	vm_object_t	m_object = NULL;
5067	int		refmod_state = 0;
5068	int		try_failed_count = 0;
5069	int		retval = 0;
5070	int		current_run = 0;
5071	struct	vm_pageout_queue *iq;
5072	struct	vm_pageout_queue *eq;
5073	struct	vm_pageout_queue *tq;
5074
5075
5076	KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
5077
5078	iq = &vm_pageout_queue_internal;
5079	eq = &vm_pageout_queue_external;
5080
5081	vm_page_lock_queues();
5082
5083	while (qcount && !queue_empty(q)) {
5084
5085		if (current_run++ == 1000) {
5086			if (hibernate_should_abort()) {
5087				retval = 1;
5088				break;
5089			}
5090			current_run = 0;
5091		}
5092
5093		m = (vm_page_t) queue_first(q);
5094		m_object = m->object;
5095
5096		/*
5097		 * check to see if we currently are working
5098		 * with the same object... if so, we've
5099		 * already got the lock
5100		 */
5101		if (m_object != l_object) {
5102		        /*
5103			 * the object associated with candidate page is
5104			 * different from the one we were just working
5105			 * with... dump the lock if we still own it
5106			 */
5107		        if (l_object != NULL) {
5108			        vm_object_unlock(l_object);
5109				l_object = NULL;
5110			}
5111			/*
5112			 * Try to lock object; since we've alread got the
5113			 * page queues lock, we can only 'try' for this one.
5114			 * if the 'try' fails, we need to do a mutex_pause
5115			 * to allow the owner of the object lock a chance to
5116			 * run...
5117			 */
5118			if ( !vm_object_lock_try_scan(m_object)) {
5119
5120				if (try_failed_count > 20) {
5121					hibernate_stats.hibernate_queue_nolock++;
5122
5123					goto reenter_pg_on_q;
5124				}
5125				vm_pageout_scan_wants_object = m_object;
5126
5127				vm_page_unlock_queues();
5128				mutex_pause(try_failed_count++);
5129				vm_page_lock_queues();
5130
5131				hibernate_stats.hibernate_queue_paused++;
5132				continue;
5133			} else {
5134				l_object = m_object;
5135				vm_pageout_scan_wants_object = VM_OBJECT_NULL;
5136			}
5137		}
5138		if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
5139			/*
5140			 * page is not to be cleaned
5141			 * put it back on the head of its queue
5142			 */
5143			if (m->cleaning)
5144				hibernate_stats.hibernate_skipped_cleaning++;
5145			else
5146				hibernate_stats.hibernate_skipped_transient++;
5147
5148			goto reenter_pg_on_q;
5149		}
5150		if (m_object->copy == VM_OBJECT_NULL) {
5151			if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
5152				/*
5153				 * let the normal hibernate image path
5154				 * deal with these
5155				 */
5156				goto reenter_pg_on_q;
5157			}
5158		}
5159		if ( !m->dirty && m->pmapped) {
5160		        refmod_state = pmap_get_refmod(m->phys_page);
5161
5162			if ((refmod_state & VM_MEM_MODIFIED)) {
5163				SET_PAGE_DIRTY(m, FALSE);
5164			}
5165		} else
5166			refmod_state = 0;
5167
5168		if ( !m->dirty) {
5169			/*
5170			 * page is not to be cleaned
5171			 * put it back on the head of its queue
5172			 */
5173			if (m->precious)
5174				hibernate_stats.hibernate_skipped_precious++;
5175
5176			goto reenter_pg_on_q;
5177		}
5178
5179		if (hibernate_skip_external == TRUE && !m_object->internal) {
5180
5181			hibernate_stats.hibernate_skipped_external++;
5182
5183			goto reenter_pg_on_q;
5184		}
5185		tq = NULL;
5186
5187		if (m_object->internal) {
5188			if (VM_PAGE_Q_THROTTLED(iq))
5189				tq = iq;
5190		} else if (VM_PAGE_Q_THROTTLED(eq))
5191			tq = eq;
5192
5193		if (tq != NULL) {
5194			wait_result_t	wait_result;
5195			int		wait_count = 5;
5196
5197		        if (l_object != NULL) {
5198			        vm_object_unlock(l_object);
5199				l_object = NULL;
5200			}
5201			vm_pageout_scan_wants_object = VM_OBJECT_NULL;
5202
5203			while (retval == 0) {
5204
5205				tq->pgo_throttled = TRUE;
5206
5207				assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
5208
5209				vm_page_unlock_queues();
5210
5211				wait_result = thread_block(THREAD_CONTINUE_NULL);
5212
5213				vm_page_lock_queues();
5214
5215				if (wait_result != THREAD_TIMED_OUT)
5216					break;
5217                                if (!VM_PAGE_Q_THROTTLED(tq))
5218                                        break;
5219
5220				if (hibernate_should_abort())
5221					retval = 1;
5222
5223				if (--wait_count == 0) {
5224
5225					hibernate_stats.hibernate_throttle_timeout++;
5226
5227					if (tq == eq) {
5228						hibernate_skip_external = TRUE;
5229						break;
5230					}
5231					retval = 1;
5232				}
5233			}
5234			if (retval)
5235				break;
5236
5237			hibernate_stats.hibernate_throttled++;
5238
5239			continue;
5240		}
5241		/*
5242		 * we've already factored out pages in the laundry which
5243		 * means this page can't be on the pageout queue so it's
5244		 * safe to do the VM_PAGE_QUEUES_REMOVE
5245		 */
5246                assert(!m->pageout_queue);
5247
5248		VM_PAGE_QUEUES_REMOVE(m);
5249
5250		if (COMPRESSED_PAGER_IS_ACTIVE && m_object->internal == TRUE)
5251			pmap_disconnect_options(m->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL);
5252
5253		vm_pageout_cluster(m, FALSE);
5254
5255		hibernate_stats.hibernate_found_dirty++;
5256
5257		goto next_pg;
5258
5259reenter_pg_on_q:
5260		queue_remove(q, m, vm_page_t, pageq);
5261		queue_enter(q, m, vm_page_t, pageq);
5262
5263		hibernate_stats.hibernate_reentered_on_q++;
5264next_pg:
5265		hibernate_stats.hibernate_considered++;
5266
5267		qcount--;
5268		try_failed_count = 0;
5269	}
5270	if (l_object != NULL) {
5271		vm_object_unlock(l_object);
5272		l_object = NULL;
5273	}
5274	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
5275
5276	vm_page_unlock_queues();
5277
5278	KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
5279
5280	return (retval);
5281}
5282
5283
5284static int
5285hibernate_flush_dirty_pages(int pass)
5286{
5287	struct vm_speculative_age_q	*aq;
5288	uint32_t	i;
5289
5290	if (vm_page_local_q) {
5291		for (i = 0; i < vm_page_local_q_count; i++)
5292			vm_page_reactivate_local(i, TRUE, FALSE);
5293	}
5294
5295	for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
5296		int		qcount;
5297		vm_page_t	m;
5298
5299		aq = &vm_page_queue_speculative[i];
5300
5301		if (queue_empty(&aq->age_q))
5302			continue;
5303		qcount = 0;
5304
5305		vm_page_lockspin_queues();
5306
5307		queue_iterate(&aq->age_q,
5308			      m,
5309			      vm_page_t,
5310			      pageq)
5311		{
5312			qcount++;
5313		}
5314		vm_page_unlock_queues();
5315
5316		if (qcount) {
5317			if (hibernate_flush_queue(&aq->age_q, qcount))
5318				return (1);
5319		}
5320	}
5321	if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
5322		return (1);
5323	if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
5324		return (1);
5325	if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
5326		return (1);
5327	if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
5328		return (1);
5329
5330	if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5331		vm_compressor_record_warmup_start();
5332
5333	if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
5334		if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5335			vm_compressor_record_warmup_end();
5336		return (1);
5337	}
5338	if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
5339		if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5340			vm_compressor_record_warmup_end();
5341		return (1);
5342	}
5343	if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5344		vm_compressor_record_warmup_end();
5345
5346	if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
5347		return (1);
5348
5349	return (0);
5350}
5351
5352
5353void
5354hibernate_reset_stats()
5355{
5356	bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
5357}
5358
5359
5360int
5361hibernate_flush_memory()
5362{
5363	int	retval;
5364
5365	KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
5366
5367	hibernate_cleaning_in_progress = TRUE;
5368	hibernate_skip_external = FALSE;
5369
5370	if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
5371
5372		if (COMPRESSED_PAGER_IS_ACTIVE) {
5373
5374				KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5375
5376				vm_compressor_flush();
5377
5378				KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5379		}
5380		if (consider_buffer_cache_collect != NULL) {
5381			unsigned int orig_wire_count;
5382
5383			KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5384			orig_wire_count = vm_page_wire_count;
5385
5386			(void)(*consider_buffer_cache_collect)(1);
5387			consider_zone_gc(TRUE);
5388
5389			HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
5390
5391			KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
5392		}
5393	}
5394	hibernate_cleaning_in_progress = FALSE;
5395
5396	KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
5397
5398	if (retval && COMPRESSED_PAGER_IS_ACTIVE)
5399		HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
5400
5401
5402    HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
5403                hibernate_stats.hibernate_considered,
5404                hibernate_stats.hibernate_reentered_on_q,
5405                hibernate_stats.hibernate_found_dirty);
5406    HIBPRINT("   skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
5407                hibernate_stats.hibernate_skipped_cleaning,
5408                hibernate_stats.hibernate_skipped_transient,
5409                hibernate_stats.hibernate_skipped_precious,
5410                hibernate_stats.hibernate_skipped_external,
5411                hibernate_stats.hibernate_queue_nolock);
5412    HIBPRINT("   queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
5413                hibernate_stats.hibernate_queue_paused,
5414                hibernate_stats.hibernate_throttled,
5415                hibernate_stats.hibernate_throttle_timeout,
5416                hibernate_stats.hibernate_drained,
5417                hibernate_stats.hibernate_drain_timeout);
5418
5419	return (retval);
5420}
5421
5422
5423static void
5424hibernate_page_list_zero(hibernate_page_list_t *list)
5425{
5426    uint32_t             bank;
5427    hibernate_bitmap_t * bitmap;
5428
5429    bitmap = &list->bank_bitmap[0];
5430    for (bank = 0; bank < list->bank_count; bank++)
5431    {
5432        uint32_t last_bit;
5433
5434	bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
5435        // set out-of-bound bits at end of bitmap.
5436        last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
5437	if (last_bit)
5438	    bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
5439
5440	bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
5441    }
5442}
5443
5444void
5445hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
5446{
5447    uint32_t i;
5448    vm_page_t m;
5449    uint64_t start, end, timeout, nsec;
5450    clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
5451    clock_get_uptime(&start);
5452
5453    for (i = 0; i < gobble_count; i++)
5454    {
5455	while (VM_PAGE_NULL == (m = vm_page_grab()))
5456	{
5457	    clock_get_uptime(&end);
5458	    if (end >= timeout)
5459		break;
5460	    VM_PAGE_WAIT();
5461	}
5462	if (!m)
5463	    break;
5464	m->busy = FALSE;
5465	vm_page_gobble(m);
5466
5467	m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
5468	hibernate_gobble_queue = m;
5469    }
5470
5471    clock_get_uptime(&end);
5472    absolutetime_to_nanoseconds(end - start, &nsec);
5473    HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
5474}
5475
5476void
5477hibernate_free_gobble_pages(void)
5478{
5479    vm_page_t m, next;
5480    uint32_t  count = 0;
5481
5482    m = (vm_page_t) hibernate_gobble_queue;
5483    while(m)
5484    {
5485        next = (vm_page_t) m->pageq.next;
5486        vm_page_free(m);
5487        count++;
5488        m = next;
5489    }
5490    hibernate_gobble_queue = VM_PAGE_NULL;
5491
5492    if (count)
5493        HIBLOG("Freed %d pages\n", count);
5494}
5495
5496static boolean_t
5497hibernate_consider_discard(vm_page_t m, boolean_t preflight)
5498{
5499    vm_object_t object = NULL;
5500    int                  refmod_state;
5501    boolean_t            discard = FALSE;
5502
5503    do
5504    {
5505        if (m->private)
5506            panic("hibernate_consider_discard: private");
5507
5508        if (!vm_object_lock_try(m->object)) {
5509	    if (!preflight) hibernate_stats.cd_lock_failed++;
5510            break;
5511	}
5512        object = m->object;
5513
5514	if (VM_PAGE_WIRED(m)) {
5515	    if (!preflight) hibernate_stats.cd_found_wired++;
5516            break;
5517	}
5518        if (m->precious) {
5519	    if (!preflight) hibernate_stats.cd_found_precious++;
5520            break;
5521	}
5522        if (m->busy || !object->alive) {
5523           /*
5524            *	Somebody is playing with this page.
5525            */
5526	    if (!preflight) hibernate_stats.cd_found_busy++;
5527            break;
5528	}
5529        if (m->absent || m->unusual || m->error) {
5530           /*
5531            * If it's unusual in anyway, ignore it
5532            */
5533	    if (!preflight) hibernate_stats.cd_found_unusual++;
5534            break;
5535	}
5536        if (m->cleaning) {
5537	    if (!preflight) hibernate_stats.cd_found_cleaning++;
5538            break;
5539	}
5540	if (m->laundry) {
5541	    if (!preflight) hibernate_stats.cd_found_laundry++;
5542            break;
5543	}
5544        if (!m->dirty)
5545        {
5546            refmod_state = pmap_get_refmod(m->phys_page);
5547
5548            if (refmod_state & VM_MEM_REFERENCED)
5549                m->reference = TRUE;
5550            if (refmod_state & VM_MEM_MODIFIED) {
5551              	SET_PAGE_DIRTY(m, FALSE);
5552	    }
5553        }
5554
5555        /*
5556         * If it's clean or purgeable we can discard the page on wakeup.
5557         */
5558        discard = (!m->dirty)
5559		    || (VM_PURGABLE_VOLATILE == object->purgable)
5560		    || (VM_PURGABLE_EMPTY    == object->purgable);
5561
5562
5563        if (discard == FALSE) {
5564		if (!preflight)
5565			hibernate_stats.cd_found_dirty++;
5566        } else if (m->xpmapped && m->reference && !object->internal) {
5567		if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) {
5568			if (!preflight)
5569				hibernate_stats.cd_found_xpmapped++;
5570			discard = FALSE;
5571		} else {
5572			if (!preflight)
5573				hibernate_stats.cd_skipped_xpmapped++;
5574		}
5575        }
5576    }
5577    while (FALSE);
5578
5579    if (object)
5580        vm_object_unlock(object);
5581
5582    return (discard);
5583}
5584
5585
5586static void
5587hibernate_discard_page(vm_page_t m)
5588{
5589    if (m->absent || m->unusual || m->error)
5590       /*
5591        * If it's unusual in anyway, ignore
5592        */
5593        return;
5594
5595#if MACH_ASSERT || DEBUG
5596    vm_object_t object = m->object;
5597    if (!vm_object_lock_try(m->object))
5598	panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
5599#else
5600    /* No need to lock page queue for token delete, hibernate_vm_unlock()
5601       makes sure these locks are uncontended before sleep */
5602#endif /* MACH_ASSERT || DEBUG */
5603
5604    if (m->pmapped == TRUE)
5605    {
5606        __unused int refmod_state = pmap_disconnect(m->phys_page);
5607    }
5608
5609    if (m->laundry)
5610        panic("hibernate_discard_page(%p) laundry", m);
5611    if (m->private)
5612        panic("hibernate_discard_page(%p) private", m);
5613    if (m->fictitious)
5614        panic("hibernate_discard_page(%p) fictitious", m);
5615
5616    if (VM_PURGABLE_VOLATILE == m->object->purgable)
5617    {
5618	/* object should be on a queue */
5619        assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
5620        purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
5621        assert(old_queue);
5622	if (m->object->purgeable_when_ripe) {
5623		vm_purgeable_token_delete_first(old_queue);
5624	}
5625        m->object->purgable = VM_PURGABLE_EMPTY;
5626
5627	/*
5628	 * Purgeable ledgers:  pages of VOLATILE and EMPTY objects are
5629	 * accounted in the "volatile" ledger, so no change here.
5630	 * We have to update vm_page_purgeable_count, though, since we're
5631	 * effectively purging this object.
5632	 */
5633	unsigned int delta;
5634	assert(m->object->resident_page_count >= m->object->wired_page_count);
5635	delta = (m->object->resident_page_count - m->object->wired_page_count);
5636	assert(vm_page_purgeable_count >= delta);
5637	assert(delta > 0);
5638	OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count);
5639    }
5640
5641    vm_page_free(m);
5642
5643#if MACH_ASSERT || DEBUG
5644    vm_object_unlock(object);
5645#endif	/* MACH_ASSERT || DEBUG */
5646}
5647
5648/*
5649 Grab locks for hibernate_page_list_setall()
5650*/
5651void
5652hibernate_vm_lock_queues(void)
5653{
5654    vm_object_lock(compressor_object);
5655    vm_page_lock_queues();
5656    lck_mtx_lock(&vm_page_queue_free_lock);
5657
5658    if (vm_page_local_q) {
5659	uint32_t  i;
5660	for (i = 0; i < vm_page_local_q_count; i++) {
5661	    struct vpl	*lq;
5662	    lq = &vm_page_local_q[i].vpl_un.vpl;
5663	    VPL_LOCK(&lq->vpl_lock);
5664	}
5665    }
5666}
5667
5668void
5669hibernate_vm_unlock_queues(void)
5670{
5671    if (vm_page_local_q) {
5672	uint32_t  i;
5673	for (i = 0; i < vm_page_local_q_count; i++) {
5674	    struct vpl	*lq;
5675	    lq = &vm_page_local_q[i].vpl_un.vpl;
5676	    VPL_UNLOCK(&lq->vpl_lock);
5677	}
5678    }
5679    lck_mtx_unlock(&vm_page_queue_free_lock);
5680    vm_page_unlock_queues();
5681    vm_object_unlock(compressor_object);
5682}
5683
5684/*
5685 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5686 pages known to VM to not need saving are subtracted.
5687 Wired pages to be saved are present in page_list_wired, pageable in page_list.
5688*/
5689
5690void
5691hibernate_page_list_setall(hibernate_page_list_t * page_list,
5692			   hibernate_page_list_t * page_list_wired,
5693			   hibernate_page_list_t * page_list_pal,
5694			   boolean_t preflight,
5695			   boolean_t will_discard,
5696			   uint32_t * pagesOut)
5697{
5698    uint64_t start, end, nsec;
5699    vm_page_t m;
5700    vm_page_t next;
5701    uint32_t pages = page_list->page_count;
5702    uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
5703    uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
5704    uint32_t count_wire = pages;
5705    uint32_t count_discard_active    = 0;
5706    uint32_t count_discard_inactive  = 0;
5707    uint32_t count_discard_cleaned   = 0;
5708    uint32_t count_discard_purgeable = 0;
5709    uint32_t count_discard_speculative = 0;
5710    uint32_t count_discard_vm_struct_pages = 0;
5711    uint32_t i;
5712    uint32_t             bank;
5713    hibernate_bitmap_t * bitmap;
5714    hibernate_bitmap_t * bitmap_wired;
5715    boolean_t			 discard_all;
5716    boolean_t            discard;
5717
5718    HIBLOG("hibernate_page_list_setall(preflight %d) start %p, %p\n", preflight, page_list, page_list_wired);
5719
5720    if (preflight) {
5721        page_list       = NULL;
5722        page_list_wired = NULL;
5723        page_list_pal   = NULL;
5724		discard_all     = FALSE;
5725    } else {
5726		discard_all     = will_discard;
5727    }
5728
5729#if MACH_ASSERT || DEBUG
5730    if (!preflight)
5731    {
5732        vm_page_lock_queues();
5733	if (vm_page_local_q) {
5734	    for (i = 0; i < vm_page_local_q_count; i++) {
5735		struct vpl	*lq;
5736		lq = &vm_page_local_q[i].vpl_un.vpl;
5737		VPL_LOCK(&lq->vpl_lock);
5738	    }
5739	}
5740    }
5741#endif  /* MACH_ASSERT || DEBUG */
5742
5743
5744    KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
5745
5746    clock_get_uptime(&start);
5747
5748    if (!preflight) {
5749	hibernate_page_list_zero(page_list);
5750	hibernate_page_list_zero(page_list_wired);
5751	hibernate_page_list_zero(page_list_pal);
5752
5753	hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5754	hibernate_stats.cd_pages = pages;
5755    }
5756
5757    if (vm_page_local_q) {
5758	    for (i = 0; i < vm_page_local_q_count; i++)
5759		    vm_page_reactivate_local(i, TRUE, !preflight);
5760    }
5761
5762    if (preflight) {
5763	vm_object_lock(compressor_object);
5764	vm_page_lock_queues();
5765	lck_mtx_lock(&vm_page_queue_free_lock);
5766    }
5767
5768    m = (vm_page_t) hibernate_gobble_queue;
5769    while (m)
5770    {
5771	pages--;
5772	count_wire--;
5773	if (!preflight) {
5774	    hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5775	    hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5776	}
5777	m = (vm_page_t) m->pageq.next;
5778    }
5779
5780    if (!preflight) for( i = 0; i < real_ncpus; i++ )
5781    {
5782	if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5783	{
5784	    for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5785	    {
5786		pages--;
5787		count_wire--;
5788		hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5789		hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5790
5791		hibernate_stats.cd_local_free++;
5792		hibernate_stats.cd_total_free++;
5793	    }
5794	}
5795    }
5796
5797    for( i = 0; i < vm_colors; i++ )
5798    {
5799	queue_iterate(&vm_page_queue_free[i],
5800		      m,
5801		      vm_page_t,
5802		      pageq)
5803	{
5804	    pages--;
5805	    count_wire--;
5806	    if (!preflight) {
5807		hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5808		hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5809
5810		hibernate_stats.cd_total_free++;
5811	    }
5812	}
5813    }
5814
5815    queue_iterate(&vm_lopage_queue_free,
5816		  m,
5817		  vm_page_t,
5818		  pageq)
5819    {
5820	pages--;
5821	count_wire--;
5822	if (!preflight) {
5823	    hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5824	    hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5825
5826	    hibernate_stats.cd_total_free++;
5827	}
5828    }
5829
5830    m = (vm_page_t) queue_first(&vm_page_queue_throttled);
5831    while (m && !queue_end(&vm_page_queue_throttled, (queue_entry_t)m))
5832    {
5833        next = (vm_page_t) m->pageq.next;
5834	discard = FALSE;
5835        if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5836         && hibernate_consider_discard(m, preflight))
5837        {
5838            if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5839            count_discard_inactive++;
5840            discard = discard_all;
5841        }
5842        else
5843            count_throttled++;
5844	count_wire--;
5845	if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5846
5847        if (discard) hibernate_discard_page(m);
5848	m = next;
5849    }
5850
5851    m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5852    while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
5853    {
5854        next = (vm_page_t) m->pageq.next;
5855	discard = FALSE;
5856        if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5857         && hibernate_consider_discard(m, preflight))
5858        {
5859            if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5860	    if (m->dirty)
5861		count_discard_purgeable++;
5862	    else
5863		count_discard_inactive++;
5864            discard = discard_all;
5865        }
5866        else
5867            count_anonymous++;
5868	count_wire--;
5869	if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5870        if (discard)    hibernate_discard_page(m);
5871	m = next;
5872    }
5873
5874    m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
5875    while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
5876    {
5877        next = (vm_page_t) m->pageq.next;
5878	discard = FALSE;
5879        if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5880         && hibernate_consider_discard(m, preflight))
5881        {
5882            if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5883	    if (m->dirty)
5884		count_discard_purgeable++;
5885	    else
5886		count_discard_cleaned++;
5887            discard = discard_all;
5888        }
5889        else
5890            count_cleaned++;
5891	count_wire--;
5892	if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5893        if (discard)    hibernate_discard_page(m);
5894	m = next;
5895    }
5896
5897    m = (vm_page_t) queue_first(&vm_page_queue_active);
5898    while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5899    {
5900        next = (vm_page_t) m->pageq.next;
5901	discard = FALSE;
5902        if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
5903         && hibernate_consider_discard(m, preflight))
5904        {
5905            if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5906	    if (m->dirty)
5907		count_discard_purgeable++;
5908	    else
5909		count_discard_active++;
5910            discard = discard_all;
5911        }
5912        else
5913            count_active++;
5914	count_wire--;
5915	if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5916        if (discard)    hibernate_discard_page(m);
5917	m = next;
5918    }
5919
5920    m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5921    while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5922    {
5923        next = (vm_page_t) m->pageq.next;
5924	discard = FALSE;
5925        if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5926         && hibernate_consider_discard(m, preflight))
5927        {
5928            if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5929	    if (m->dirty)
5930		count_discard_purgeable++;
5931	    else
5932		count_discard_inactive++;
5933            discard = discard_all;
5934        }
5935        else
5936            count_inactive++;
5937	count_wire--;
5938	if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5939        if (discard)    hibernate_discard_page(m);
5940	m = next;
5941    }
5942
5943    for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5944    {
5945	m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5946	while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5947	{
5948	    next = (vm_page_t) m->pageq.next;
5949	    discard = FALSE;
5950	    if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5951	     && hibernate_consider_discard(m, preflight))
5952	    {
5953		if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5954		count_discard_speculative++;
5955		discard = discard_all;
5956	    }
5957	    else
5958		count_speculative++;
5959	    count_wire--;
5960	    if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5961	    if (discard)    hibernate_discard_page(m);
5962	    m = next;
5963	}
5964    }
5965
5966    queue_iterate(&compressor_object->memq, m, vm_page_t, listq)
5967    {
5968        count_compressor++;
5969	count_wire--;
5970	if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5971    }
5972
5973    if (preflight == FALSE && discard_all == TRUE) {
5974	    KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5975
5976	    HIBLOG("hibernate_teardown started\n");
5977	    count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
5978	    HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
5979
5980	    pages -= count_discard_vm_struct_pages;
5981	    count_wire -= count_discard_vm_struct_pages;
5982
5983	    hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
5984
5985	    KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
5986    }
5987
5988    if (!preflight) {
5989	// pull wired from hibernate_bitmap
5990	bitmap = &page_list->bank_bitmap[0];
5991	bitmap_wired = &page_list_wired->bank_bitmap[0];
5992	for (bank = 0; bank < page_list->bank_count; bank++)
5993	{
5994	    for (i = 0; i < bitmap->bitmapwords; i++)
5995		bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5996	    bitmap       = (hibernate_bitmap_t *) &bitmap->bitmap      [bitmap->bitmapwords];
5997	    bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5998	}
5999    }
6000
6001    // machine dependent adjustments
6002    hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
6003
6004    if (!preflight) {
6005	hibernate_stats.cd_count_wire = count_wire;
6006	hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
6007		count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
6008    }
6009
6010    clock_get_uptime(&end);
6011    absolutetime_to_nanoseconds(end - start, &nsec);
6012    HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
6013
6014    HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n  %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
6015	   pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
6016	        discard_all ? "did" : "could",
6017	        count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
6018
6019    if (hibernate_stats.cd_skipped_xpmapped)
6020	    HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped);
6021
6022    *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
6023
6024    if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
6025
6026#if MACH_ASSERT || DEBUG
6027    if (!preflight)
6028    {
6029	if (vm_page_local_q) {
6030	    for (i = 0; i < vm_page_local_q_count; i++) {
6031		struct vpl	*lq;
6032		lq = &vm_page_local_q[i].vpl_un.vpl;
6033		VPL_UNLOCK(&lq->vpl_lock);
6034	    }
6035	}
6036        vm_page_unlock_queues();
6037    }
6038#endif  /* MACH_ASSERT || DEBUG */
6039
6040    if (preflight) {
6041	lck_mtx_unlock(&vm_page_queue_free_lock);
6042	vm_page_unlock_queues();
6043	vm_object_unlock(compressor_object);
6044    }
6045
6046    KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
6047}
6048
6049void
6050hibernate_page_list_discard(hibernate_page_list_t * page_list)
6051{
6052    uint64_t  start, end, nsec;
6053    vm_page_t m;
6054    vm_page_t next;
6055    uint32_t  i;
6056    uint32_t  count_discard_active    = 0;
6057    uint32_t  count_discard_inactive  = 0;
6058    uint32_t  count_discard_purgeable = 0;
6059    uint32_t  count_discard_cleaned   = 0;
6060    uint32_t  count_discard_speculative = 0;
6061
6062
6063#if MACH_ASSERT || DEBUG
6064        vm_page_lock_queues();
6065	if (vm_page_local_q) {
6066	    for (i = 0; i < vm_page_local_q_count; i++) {
6067		struct vpl	*lq;
6068		lq = &vm_page_local_q[i].vpl_un.vpl;
6069		VPL_LOCK(&lq->vpl_lock);
6070	    }
6071	}
6072#endif  /* MACH_ASSERT || DEBUG */
6073
6074    clock_get_uptime(&start);
6075
6076    m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
6077    while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
6078    {
6079        next = (vm_page_t) m->pageq.next;
6080        if (hibernate_page_bittst(page_list, m->phys_page))
6081        {
6082	    if (m->dirty)
6083		count_discard_purgeable++;
6084	    else
6085		count_discard_inactive++;
6086            hibernate_discard_page(m);
6087        }
6088        m = next;
6089    }
6090
6091    for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
6092    {
6093       m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
6094       while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
6095       {
6096           next = (vm_page_t) m->pageq.next;
6097           if (hibernate_page_bittst(page_list, m->phys_page))
6098           {
6099               count_discard_speculative++;
6100               hibernate_discard_page(m);
6101           }
6102           m = next;
6103       }
6104    }
6105
6106    m = (vm_page_t) queue_first(&vm_page_queue_inactive);
6107    while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
6108    {
6109        next = (vm_page_t) m->pageq.next;
6110        if (hibernate_page_bittst(page_list, m->phys_page))
6111        {
6112	    if (m->dirty)
6113		count_discard_purgeable++;
6114	    else
6115		count_discard_inactive++;
6116            hibernate_discard_page(m);
6117        }
6118        m = next;
6119    }
6120
6121    m = (vm_page_t) queue_first(&vm_page_queue_active);
6122    while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
6123    {
6124        next = (vm_page_t) m->pageq.next;
6125        if (hibernate_page_bittst(page_list, m->phys_page))
6126        {
6127	    if (m->dirty)
6128		count_discard_purgeable++;
6129	    else
6130		count_discard_active++;
6131            hibernate_discard_page(m);
6132        }
6133        m = next;
6134    }
6135
6136    m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
6137    while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
6138    {
6139        next = (vm_page_t) m->pageq.next;
6140        if (hibernate_page_bittst(page_list, m->phys_page))
6141        {
6142	    if (m->dirty)
6143		count_discard_purgeable++;
6144	    else
6145		count_discard_cleaned++;
6146            hibernate_discard_page(m);
6147        }
6148        m = next;
6149    }
6150
6151#if MACH_ASSERT || DEBUG
6152	if (vm_page_local_q) {
6153	    for (i = 0; i < vm_page_local_q_count; i++) {
6154		struct vpl	*lq;
6155		lq = &vm_page_local_q[i].vpl_un.vpl;
6156		VPL_UNLOCK(&lq->vpl_lock);
6157	    }
6158	}
6159        vm_page_unlock_queues();
6160#endif  /* MACH_ASSERT || DEBUG */
6161
6162    clock_get_uptime(&end);
6163    absolutetime_to_nanoseconds(end - start, &nsec);
6164    HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
6165                nsec / 1000000ULL,
6166	        count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
6167}
6168
6169boolean_t       hibernate_paddr_map_inited = FALSE;
6170boolean_t       hibernate_rebuild_needed = FALSE;
6171unsigned int	hibernate_teardown_last_valid_compact_indx = -1;
6172vm_page_t	hibernate_rebuild_hash_list = NULL;
6173
6174unsigned int	hibernate_teardown_found_tabled_pages = 0;
6175unsigned int	hibernate_teardown_found_created_pages = 0;
6176unsigned int	hibernate_teardown_found_free_pages = 0;
6177unsigned int	hibernate_teardown_vm_page_free_count;
6178
6179
6180struct ppnum_mapping {
6181	struct ppnum_mapping	*ppnm_next;
6182	ppnum_t			ppnm_base_paddr;
6183	unsigned int		ppnm_sindx;
6184	unsigned int		ppnm_eindx;
6185};
6186
6187struct ppnum_mapping	*ppnm_head;
6188struct ppnum_mapping	*ppnm_last_found = NULL;
6189
6190
6191void
6192hibernate_create_paddr_map()
6193{
6194	unsigned int	i;
6195	ppnum_t		next_ppnum_in_run = 0;
6196	struct ppnum_mapping *ppnm = NULL;
6197
6198	if (hibernate_paddr_map_inited == FALSE) {
6199
6200		for (i = 0; i < vm_pages_count; i++) {
6201
6202			if (ppnm)
6203				ppnm->ppnm_eindx = i;
6204
6205			if (ppnm == NULL || vm_pages[i].phys_page != next_ppnum_in_run) {
6206
6207				ppnm = kalloc(sizeof(struct ppnum_mapping));
6208
6209				ppnm->ppnm_next = ppnm_head;
6210				ppnm_head = ppnm;
6211
6212				ppnm->ppnm_sindx = i;
6213				ppnm->ppnm_base_paddr = vm_pages[i].phys_page;
6214			}
6215			next_ppnum_in_run = vm_pages[i].phys_page + 1;
6216		}
6217		ppnm->ppnm_eindx++;
6218
6219		hibernate_paddr_map_inited = TRUE;
6220	}
6221}
6222
6223ppnum_t
6224hibernate_lookup_paddr(unsigned int indx)
6225{
6226	struct ppnum_mapping *ppnm = NULL;
6227
6228	ppnm = ppnm_last_found;
6229
6230	if (ppnm) {
6231		if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
6232			goto done;
6233	}
6234	for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
6235
6236		if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
6237			ppnm_last_found = ppnm;
6238			break;
6239		}
6240	}
6241	if (ppnm == NULL)
6242		panic("hibernate_lookup_paddr of %d failed\n", indx);
6243done:
6244	return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
6245}
6246
6247
6248uint32_t
6249hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6250{
6251	addr64_t	saddr_aligned;
6252	addr64_t	eaddr_aligned;
6253	addr64_t	addr;
6254	ppnum_t		paddr;
6255	unsigned int	mark_as_unneeded_pages = 0;
6256
6257	saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
6258	eaddr_aligned = eaddr & ~PAGE_MASK_64;
6259
6260	for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
6261
6262		paddr = pmap_find_phys(kernel_pmap, addr);
6263
6264		assert(paddr);
6265
6266		hibernate_page_bitset(page_list,       TRUE, paddr);
6267		hibernate_page_bitset(page_list_wired, TRUE, paddr);
6268
6269		mark_as_unneeded_pages++;
6270	}
6271	return (mark_as_unneeded_pages);
6272}
6273
6274
6275void
6276hibernate_hash_insert_page(vm_page_t mem)
6277{
6278	vm_page_bucket_t *bucket;
6279	int		hash_id;
6280
6281	assert(mem->hashed);
6282	assert(mem->object);
6283	assert(mem->offset != (vm_object_offset_t) -1);
6284
6285	/*
6286	 *	Insert it into the object_object/offset hash table
6287	 */
6288	hash_id = vm_page_hash(mem->object, mem->offset);
6289	bucket = &vm_page_buckets[hash_id];
6290
6291	mem->next_m = bucket->page_list;
6292	bucket->page_list = VM_PAGE_PACK_PTR(mem);
6293}
6294
6295
6296void
6297hibernate_free_range(int sindx, int eindx)
6298{
6299	vm_page_t	mem;
6300	unsigned int	color;
6301
6302	while (sindx < eindx) {
6303		mem = &vm_pages[sindx];
6304
6305		vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
6306
6307		mem->lopage = FALSE;
6308		mem->free = TRUE;
6309
6310	        color = mem->phys_page & vm_color_mask;
6311		queue_enter_first(&vm_page_queue_free[color],
6312				  mem,
6313				  vm_page_t,
6314				  pageq);
6315		vm_page_free_count++;
6316
6317		sindx++;
6318	}
6319}
6320
6321
6322extern void hibernate_rebuild_pmap_structs(void);
6323
6324void
6325hibernate_rebuild_vm_structs(void)
6326{
6327	int		cindx, sindx, eindx;
6328	vm_page_t	mem, tmem, mem_next;
6329	AbsoluteTime	startTime, endTime;
6330	uint64_t	nsec;
6331
6332	if (hibernate_rebuild_needed == FALSE)
6333		return;
6334
6335	KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6336	HIBLOG("hibernate_rebuild started\n");
6337
6338	clock_get_uptime(&startTime);
6339
6340	hibernate_rebuild_pmap_structs();
6341
6342	bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
6343	eindx = vm_pages_count;
6344
6345	for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
6346
6347		mem = &vm_pages[cindx];
6348		/*
6349		 * hibernate_teardown_vm_structs leaves the location where
6350		 * this vm_page_t must be located in "next".
6351		 */
6352		tmem = VM_PAGE_UNPACK_PTR(mem->next_m);
6353		mem->next_m = VM_PAGE_PACK_PTR(NULL);
6354
6355		sindx = (int)(tmem - &vm_pages[0]);
6356
6357		if (mem != tmem) {
6358			/*
6359			 * this vm_page_t was moved by hibernate_teardown_vm_structs,
6360			 * so move it back to its real location
6361			 */
6362			*tmem = *mem;
6363			mem = tmem;
6364		}
6365		if (mem->hashed)
6366			hibernate_hash_insert_page(mem);
6367		/*
6368		 * the 'hole' between this vm_page_t and the previous
6369		 * vm_page_t we moved needs to be initialized as
6370		 * a range of free vm_page_t's
6371		 */
6372		hibernate_free_range(sindx + 1, eindx);
6373
6374		eindx = sindx;
6375	}
6376	if (sindx)
6377		hibernate_free_range(0, sindx);
6378
6379	assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
6380
6381	/*
6382	 * process the list of vm_page_t's that were entered in the hash,
6383	 * but were not located in the vm_pages arrary... these are
6384	 * vm_page_t's that were created on the fly (i.e. fictitious)
6385	 */
6386	for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
6387		mem_next = VM_PAGE_UNPACK_PTR(mem->next_m);
6388
6389		mem->next_m = VM_PAGE_PACK_PTR(NULL);
6390		hibernate_hash_insert_page(mem);
6391	}
6392	hibernate_rebuild_hash_list = NULL;
6393
6394        clock_get_uptime(&endTime);
6395        SUB_ABSOLUTETIME(&endTime, &startTime);
6396        absolutetime_to_nanoseconds(endTime, &nsec);
6397
6398	HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
6399
6400	hibernate_rebuild_needed = FALSE;
6401
6402	KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
6403}
6404
6405
6406extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
6407
6408uint32_t
6409hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6410{
6411	unsigned int	i;
6412	unsigned int	compact_target_indx;
6413	vm_page_t	mem, mem_next;
6414	vm_page_bucket_t *bucket;
6415	unsigned int	mark_as_unneeded_pages = 0;
6416	unsigned int	unneeded_vm_page_bucket_pages = 0;
6417	unsigned int	unneeded_vm_pages_pages = 0;
6418	unsigned int	unneeded_pmap_pages = 0;
6419	addr64_t	start_of_unneeded = 0;
6420	addr64_t	end_of_unneeded = 0;
6421
6422
6423	if (hibernate_should_abort())
6424		return (0);
6425
6426	HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
6427	       vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
6428	       vm_page_cleaned_count, compressor_object->resident_page_count);
6429
6430	for (i = 0; i < vm_page_bucket_count; i++) {
6431
6432		bucket = &vm_page_buckets[i];
6433
6434		for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = mem_next) {
6435			assert(mem->hashed);
6436
6437			mem_next = VM_PAGE_UNPACK_PTR(mem->next_m);
6438
6439			if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
6440				mem->next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list);
6441				hibernate_rebuild_hash_list = mem;
6442			}
6443		}
6444	}
6445	unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
6446	mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
6447
6448	hibernate_teardown_vm_page_free_count = vm_page_free_count;
6449
6450	compact_target_indx = 0;
6451
6452	for (i = 0; i < vm_pages_count; i++) {
6453
6454		mem = &vm_pages[i];
6455
6456		if (mem->free) {
6457			unsigned int color;
6458
6459			assert(mem->busy);
6460			assert(!mem->lopage);
6461
6462			color = mem->phys_page & vm_color_mask;
6463
6464			queue_remove(&vm_page_queue_free[color],
6465				     mem,
6466				     vm_page_t,
6467				     pageq);
6468			mem->pageq.next = NULL;
6469			mem->pageq.prev = NULL;
6470
6471			vm_page_free_count--;
6472
6473			hibernate_teardown_found_free_pages++;
6474
6475			if ( !vm_pages[compact_target_indx].free)
6476				compact_target_indx = i;
6477		} else {
6478			/*
6479			 * record this vm_page_t's original location
6480			 * we need this even if it doesn't get moved
6481			 * as an indicator to the rebuild function that
6482			 * we don't have to move it
6483			 */
6484			mem->next_m = VM_PAGE_PACK_PTR(mem);
6485
6486			if (vm_pages[compact_target_indx].free) {
6487				/*
6488				 * we've got a hole to fill, so
6489				 * move this vm_page_t to it's new home
6490				 */
6491				vm_pages[compact_target_indx] = *mem;
6492				mem->free = TRUE;
6493
6494				hibernate_teardown_last_valid_compact_indx = compact_target_indx;
6495				compact_target_indx++;
6496			} else
6497				hibernate_teardown_last_valid_compact_indx = i;
6498		}
6499	}
6500	unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
6501							     (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
6502	mark_as_unneeded_pages += unneeded_vm_pages_pages;
6503
6504	hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
6505
6506	if (start_of_unneeded) {
6507		unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
6508		mark_as_unneeded_pages += unneeded_pmap_pages;
6509	}
6510	HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
6511
6512	hibernate_rebuild_needed = TRUE;
6513
6514	return (mark_as_unneeded_pages);
6515}
6516
6517
6518#endif /* HIBERNATION */
6519
6520/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6521
6522#include <mach_vm_debug.h>
6523#if	MACH_VM_DEBUG
6524
6525#include <mach_debug/hash_info.h>
6526#include <vm/vm_debug.h>
6527
6528/*
6529 *	Routine:	vm_page_info
6530 *	Purpose:
6531 *		Return information about the global VP table.
6532 *		Fills the buffer with as much information as possible
6533 *		and returns the desired size of the buffer.
6534 *	Conditions:
6535 *		Nothing locked.  The caller should provide
6536 *		possibly-pageable memory.
6537 */
6538
6539unsigned int
6540vm_page_info(
6541	hash_info_bucket_t *info,
6542	unsigned int count)
6543{
6544	unsigned int i;
6545	lck_spin_t	*bucket_lock;
6546
6547	if (vm_page_bucket_count < count)
6548		count = vm_page_bucket_count;
6549
6550	for (i = 0; i < count; i++) {
6551		vm_page_bucket_t *bucket = &vm_page_buckets[i];
6552		unsigned int bucket_count = 0;
6553		vm_page_t m;
6554
6555		bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6556		lck_spin_lock(bucket_lock);
6557
6558		for (m = VM_PAGE_UNPACK_PTR(bucket->page_list); m != VM_PAGE_NULL; m = VM_PAGE_UNPACK_PTR(m->next_m))
6559			bucket_count++;
6560
6561		lck_spin_unlock(bucket_lock);
6562
6563		/* don't touch pageable memory while holding locks */
6564		info[i].hib_count = bucket_count;
6565	}
6566
6567	return vm_page_bucket_count;
6568}
6569#endif	/* MACH_VM_DEBUG */
6570
6571#if VM_PAGE_BUCKETS_CHECK
6572void
6573vm_page_buckets_check(void)
6574{
6575	unsigned int i;
6576	vm_page_t p;
6577	unsigned int p_hash;
6578	vm_page_bucket_t *bucket;
6579	lck_spin_t	*bucket_lock;
6580
6581	if (!vm_page_buckets_check_ready) {
6582		return;
6583	}
6584
6585#if HIBERNATION
6586	if (hibernate_rebuild_needed ||
6587	    hibernate_rebuild_hash_list) {
6588		panic("BUCKET_CHECK: hibernation in progress: "
6589		      "rebuild_needed=%d rebuild_hash_list=%p\n",
6590		      hibernate_rebuild_needed,
6591		      hibernate_rebuild_hash_list);
6592	}
6593#endif /* HIBERNATION */
6594
6595#if VM_PAGE_FAKE_BUCKETS
6596	char *cp;
6597	for (cp = (char *) vm_page_fake_buckets_start;
6598	     cp < (char *) vm_page_fake_buckets_end;
6599	     cp++) {
6600		if (*cp != 0x5a) {
6601			panic("BUCKET_CHECK: corruption at %p in fake buckets "
6602			      "[0x%llx:0x%llx]\n",
6603			      cp,
6604			      (uint64_t) vm_page_fake_buckets_start,
6605			      (uint64_t) vm_page_fake_buckets_end);
6606		}
6607	}
6608#endif /* VM_PAGE_FAKE_BUCKETS */
6609
6610	for (i = 0; i < vm_page_bucket_count; i++) {
6611		bucket = &vm_page_buckets[i];
6612		if (!bucket->page_list) {
6613			continue;
6614		}
6615
6616		bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6617		lck_spin_lock(bucket_lock);
6618		p = VM_PAGE_UNPACK_PTR(bucket->page_list);
6619		while (p != VM_PAGE_NULL) {
6620			if (!p->hashed) {
6621				panic("BUCKET_CHECK: page %p (%p,0x%llx) "
6622				      "hash %d in bucket %d at %p "
6623				      "is not hashed\n",
6624				      p, p->object, p->offset,
6625				      p_hash, i, bucket);
6626			}
6627			p_hash = vm_page_hash(p->object, p->offset);
6628			if (p_hash != i) {
6629				panic("BUCKET_CHECK: corruption in bucket %d "
6630				      "at %p: page %p object %p offset 0x%llx "
6631				      "hash %d\n",
6632				      i, bucket, p, p->object, p->offset,
6633				      p_hash);
6634			}
6635			p = VM_PAGE_UNPACK_PTR(p->next_m);
6636		}
6637		lck_spin_unlock(bucket_lock);
6638	}
6639
6640//	printf("BUCKET_CHECK: checked buckets\n");
6641}
6642#endif /* VM_PAGE_BUCKETS_CHECK */
6643