1/*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
49 *  School of Computer Science
50 *  Carnegie Mellon University
51 *  Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 *	File:	vm_fault.c
60 *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 *	Page fault handling module.
63 */
64
65#include <mach_cluster_stats.h>
66#include <mach_pagemap.h>
67#include <mach_kdb.h>
68#include <libkern/OSAtomic.h>
69
70#include <mach/mach_types.h>
71#include <mach/kern_return.h>
72#include <mach/message.h>	/* for error codes */
73#include <mach/vm_param.h>
74#include <mach/vm_behavior.h>
75#include <mach/memory_object.h>
76				/* For memory_object_data_{request,unlock} */
77#include <mach/sdt.h>
78
79#include <kern/kern_types.h>
80#include <kern/host_statistics.h>
81#include <kern/counters.h>
82#include <kern/task.h>
83#include <kern/thread.h>
84#include <kern/sched_prim.h>
85#include <kern/host.h>
86#include <kern/xpr.h>
87#include <kern/mach_param.h>
88#include <kern/macro_help.h>
89#include <kern/zalloc.h>
90#include <kern/misc_protos.h>
91
92#include <ppc/proc_reg.h>
93
94#include <vm/vm_fault.h>
95#include <vm/vm_map.h>
96#include <vm/vm_object.h>
97#include <vm/vm_page.h>
98#include <vm/vm_kern.h>
99#include <vm/pmap.h>
100#include <vm/vm_pageout.h>
101#include <vm/vm_protos.h>
102#include <vm/vm_external.h>
103#include <vm/memory_object.h>
104#include <vm/vm_purgeable_internal.h>	/* Needed by some vm_page.h macros */
105
106#include <sys/kdebug.h>
107
108#define VM_FAULT_CLASSIFY	0
109
110/* Zero-filled pages are marked "m->zero_fill" and put on the
111 * special zero-fill inactive queue  only if they belong to
112 * an object at least this big.
113 */
114#define	VM_ZF_OBJECT_SIZE_THRESHOLD	(0x200000)
115
116#define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
117
118int	vm_object_pagein_throttle = 16;
119
120extern int cs_debug;
121
122#if	MACH_KDB
123extern struct db_watchpoint *db_watchpoint_list;
124#endif	/* MACH_KDB */
125
126
127/* Forward declarations of internal routines. */
128extern kern_return_t vm_fault_wire_fast(
129				vm_map_t	map,
130				vm_map_offset_t	va,
131				vm_map_entry_t	entry,
132				pmap_t		pmap,
133				vm_map_offset_t	pmap_addr);
134
135extern void vm_fault_continue(void);
136
137extern void vm_fault_copy_cleanup(
138				vm_page_t	page,
139				vm_page_t	top_page);
140
141extern void vm_fault_copy_dst_cleanup(
142				vm_page_t	page);
143
144#if	VM_FAULT_CLASSIFY
145extern void vm_fault_classify(vm_object_t	object,
146			  vm_object_offset_t	offset,
147			  vm_prot_t		fault_type);
148
149extern void vm_fault_classify_init(void);
150#endif
151
152
153unsigned long vm_cs_validates = 0;
154unsigned long vm_cs_revalidates = 0;
155unsigned long vm_cs_query_modified = 0;
156unsigned long vm_cs_validated_dirtied = 0;
157
158#if CONFIG_ENFORCE_SIGNED_CODE
159#if SECURE_KERNEL
160const int cs_enforcement_disable=0;
161#else
162int cs_enforcement_disable=1;
163#endif
164#endif
165
166/*
167 *	Routine:	vm_fault_init
168 *	Purpose:
169 *		Initialize our private data structures.
170 */
171void
172vm_fault_init(void)
173{
174#if !SECURE_KERNEL
175#if CONFIG_ENFORCE_SIGNED_CODE
176	PE_parse_boot_argn("cs_enforcement_disable", &cs_enforcement_disable, sizeof (cs_enforcement_disable));
177#endif
178	PE_parse_boot_argn("cs_debug", &cs_debug, sizeof (cs_debug));
179#endif
180}
181
182/*
183 *	Routine:	vm_fault_cleanup
184 *	Purpose:
185 *		Clean up the result of vm_fault_page.
186 *	Results:
187 *		The paging reference for "object" is released.
188 *		"object" is unlocked.
189 *		If "top_page" is not null,  "top_page" is
190 *		freed and the paging reference for the object
191 *		containing it is released.
192 *
193 *	In/out conditions:
194 *		"object" must be locked.
195 */
196void
197vm_fault_cleanup(
198	register vm_object_t	object,
199	register vm_page_t	top_page)
200{
201	vm_object_paging_end(object);
202	vm_object_unlock(object);
203
204	if (top_page != VM_PAGE_NULL) {
205	        object = top_page->object;
206
207		vm_object_lock(object);
208		VM_PAGE_FREE(top_page);
209		vm_object_paging_end(object);
210		vm_object_unlock(object);
211	}
212}
213
214#if	MACH_CLUSTER_STATS
215#define MAXCLUSTERPAGES 16
216struct {
217	unsigned long pages_in_cluster;
218	unsigned long pages_at_higher_offsets;
219	unsigned long pages_at_lower_offsets;
220} cluster_stats_in[MAXCLUSTERPAGES];
221#define CLUSTER_STAT(clause)	clause
222#define CLUSTER_STAT_HIGHER(x)	\
223	((cluster_stats_in[(x)].pages_at_higher_offsets)++)
224#define CLUSTER_STAT_LOWER(x)	\
225	 ((cluster_stats_in[(x)].pages_at_lower_offsets)++)
226#define CLUSTER_STAT_CLUSTER(x)	\
227	((cluster_stats_in[(x)].pages_in_cluster)++)
228#else	/* MACH_CLUSTER_STATS */
229#define CLUSTER_STAT(clause)
230#endif	/* MACH_CLUSTER_STATS */
231
232#define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
233
234
235boolean_t	vm_page_deactivate_behind = TRUE;
236/*
237 * default sizes given VM_BEHAVIOR_DEFAULT reference behavior
238 */
239int vm_default_ahead = 0;
240int vm_default_behind = MAX_UPL_TRANSFER;
241
242#define MAX_SEQUENTIAL_RUN	(1024 * 1024 * 1024)
243
244/*
245 * vm_page_is_sequential
246 *
247 * Determine if sequential access is in progress
248 * in accordance with the behavior specified.
249 * Update state to indicate current access pattern.
250 *
251 * object must have at least the shared lock held
252 */
253static
254void
255vm_fault_is_sequential(
256	vm_object_t		object,
257	vm_object_offset_t	offset,
258	vm_behavior_t		behavior)
259{
260        vm_object_offset_t	last_alloc;
261	int			sequential;
262	int			orig_sequential;
263
264        last_alloc = object->last_alloc;
265	sequential = object->sequential;
266	orig_sequential = sequential;
267
268	switch (behavior) {
269	case VM_BEHAVIOR_RANDOM:
270	        /*
271		 * reset indicator of sequential behavior
272		 */
273	        sequential = 0;
274	        break;
275
276	case VM_BEHAVIOR_SEQUENTIAL:
277	        if (offset && last_alloc == offset - PAGE_SIZE_64) {
278		        /*
279			 * advance indicator of sequential behavior
280			 */
281		        if (sequential < MAX_SEQUENTIAL_RUN)
282			        sequential += PAGE_SIZE;
283		} else {
284		        /*
285			 * reset indicator of sequential behavior
286			 */
287		        sequential = 0;
288		}
289	        break;
290
291	case VM_BEHAVIOR_RSEQNTL:
292	        if (last_alloc && last_alloc == offset + PAGE_SIZE_64) {
293		        /*
294			 * advance indicator of sequential behavior
295			 */
296		        if (sequential > -MAX_SEQUENTIAL_RUN)
297			        sequential -= PAGE_SIZE;
298		} else {
299		        /*
300			 * reset indicator of sequential behavior
301			 */
302		        sequential = 0;
303		}
304	        break;
305
306	case VM_BEHAVIOR_DEFAULT:
307	default:
308	        if (offset && last_alloc == (offset - PAGE_SIZE_64)) {
309		        /*
310			 * advance indicator of sequential behavior
311			 */
312		        if (sequential < 0)
313			        sequential = 0;
314		        if (sequential < MAX_SEQUENTIAL_RUN)
315			        sequential += PAGE_SIZE;
316
317		} else if (last_alloc && last_alloc == (offset + PAGE_SIZE_64)) {
318		        /*
319			 * advance indicator of sequential behavior
320			 */
321		        if (sequential > 0)
322			        sequential = 0;
323		        if (sequential > -MAX_SEQUENTIAL_RUN)
324			        sequential -= PAGE_SIZE;
325		} else {
326		        /*
327			 * reset indicator of sequential behavior
328			 */
329		        sequential = 0;
330		}
331	        break;
332	}
333	if (sequential != orig_sequential) {
334	        if (!OSCompareAndSwap(orig_sequential, sequential, (UInt32 *)&object->sequential)) {
335		        /*
336			 * if someone else has already updated object->sequential
337			 * don't bother trying to update it or object->last_alloc
338			 */
339		        return;
340		}
341	}
342	/*
343	 * I'd like to do this with a OSCompareAndSwap64, but that
344	 * doesn't exist for PPC...  however, it shouldn't matter
345	 * that much... last_alloc is maintained so that we can determine
346	 * if a sequential access pattern is taking place... if only
347	 * one thread is banging on this object, no problem with the unprotected
348	 * update... if 2 or more threads are banging away, we run the risk of
349	 * someone seeing a mangled update... however, in the face of multiple
350	 * accesses, no sequential access pattern can develop anyway, so we
351	 * haven't lost any real info.
352	 */
353	object->last_alloc = offset;
354}
355
356
357/*
358 * vm_page_deactivate_behind
359 *
360 * Determine if sequential access is in progress
361 * in accordance with the behavior specified.  If
362 * so, compute a potential page to deactivate and
363 * deactivate it.
364 *
365 * object must be locked.
366 *
367 * return TRUE if we actually deactivate a page
368 */
369static
370boolean_t
371vm_fault_deactivate_behind(
372	vm_object_t		object,
373	vm_object_offset_t	offset,
374	vm_behavior_t		behavior)
375{
376	vm_page_t	m = NULL;
377	int		sequential_run;
378	int		sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
379
380#if TRACEFAULTPAGE
381	dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_fault_deactivate_behind);	/* (TEST/DEBUG) */
382#endif
383
384	if (object == kernel_object || vm_page_deactivate_behind == FALSE) {
385		/*
386		 * Do not deactivate pages from the kernel object: they
387		 * are not intended to become pageable.
388		 * or we've disabled the deactivate behind mechanism
389		 */
390		return FALSE;
391	}
392	if ((sequential_run = object->sequential)) {
393		  if (sequential_run < 0) {
394		          sequential_behavior = VM_BEHAVIOR_RSEQNTL;
395			  sequential_run = 0 - sequential_run;
396		  } else {
397		          sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
398		  }
399	}
400	switch (behavior) {
401	case VM_BEHAVIOR_RANDOM:
402		break;
403	case VM_BEHAVIOR_SEQUENTIAL:
404	        if (sequential_run >= (int)PAGE_SIZE)
405			m = vm_page_lookup(object, offset - PAGE_SIZE_64);
406		break;
407	case VM_BEHAVIOR_RSEQNTL:
408	        if (sequential_run >= (int)PAGE_SIZE)
409			m = vm_page_lookup(object, offset + PAGE_SIZE_64);
410		break;
411	case VM_BEHAVIOR_DEFAULT:
412	default:
413	{	vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64;
414
415	        /*
416		 * determine if the run of sequential accesss has been
417		 * long enough on an object with default access behavior
418		 * to consider it for deactivation
419		 */
420		if ((uint64_t)sequential_run >= behind) {
421		        if (sequential_behavior == VM_BEHAVIOR_SEQUENTIAL) {
422			        if (offset >= behind)
423				        m = vm_page_lookup(object, offset - behind);
424			} else {
425			        if (offset < -behind)
426				        m = vm_page_lookup(object, offset + behind);
427			}
428		}
429		break;
430	}
431	}
432	if (m) {
433		if (!m->busy && !m->no_cache && !m->throttled && !m->fictitious && !m->absent) {
434		        pmap_clear_reference(m->phys_page);
435			m->deactivated = TRUE;
436#if TRACEFAULTPAGE
437			dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m);	/* (TEST/DEBUG) */
438#endif
439			return TRUE;
440		}
441	}
442	return FALSE;
443}
444
445
446/*
447 * check for various conditions that would
448 * prevent us from creating a ZF page...
449 * cleanup is based on being called from vm_fault_page
450 *
451 * object must be locked
452 * object == m->object
453 */
454static vm_fault_return_t
455vm_fault_check(vm_object_t object, vm_page_t m, vm_page_t first_m, boolean_t interruptible_state)
456{
457        if (object->shadow_severed) {
458	        /*
459		 * the shadow chain was severed
460		 * just have to return an error at this point
461		 */
462	        if (m != VM_PAGE_NULL)
463		        VM_PAGE_FREE(m);
464		vm_fault_cleanup(object, first_m);
465
466		thread_interrupt_level(interruptible_state);
467
468		return (VM_FAULT_MEMORY_ERROR);
469	}
470	if (vm_backing_store_low) {
471	        /*
472		 * are we protecting the system from
473		 * backing store exhaustion.  If so
474		 * sleep unless we are privileged.
475		 */
476	        if (!(current_task()->priv_flags & VM_BACKING_STORE_PRIV)) {
477
478			if (m != VM_PAGE_NULL)
479			        VM_PAGE_FREE(m);
480			vm_fault_cleanup(object, first_m);
481
482		        assert_wait((event_t)&vm_backing_store_low, THREAD_UNINT);
483
484			thread_block(THREAD_CONTINUE_NULL);
485			thread_interrupt_level(interruptible_state);
486
487			return (VM_FAULT_RETRY);
488		}
489	}
490	if (VM_PAGE_ZFILL_THROTTLED()) {
491	        /*
492		 * we're throttling zero-fills...
493		 * treat this as if we couldn't grab a page
494		 */
495	        if (m != VM_PAGE_NULL)
496		        VM_PAGE_FREE(m);
497		vm_fault_cleanup(object, first_m);
498
499		thread_interrupt_level(interruptible_state);
500
501		return (VM_FAULT_MEMORY_SHORTAGE);
502	}
503	return (VM_FAULT_SUCCESS);
504}
505
506
507/*
508 * do the work to zero fill a page and
509 * inject it into the correct paging queue
510 *
511 * m->object must be locked
512 * page queue lock must NOT be held
513 */
514static int
515vm_fault_zero_page(vm_page_t m, boolean_t no_zero_fill)
516{
517        int my_fault = DBG_ZERO_FILL_FAULT;
518
519	/*
520	 * This is is a zero-fill page fault...
521	 *
522	 * Checking the page lock is a waste of
523	 * time;  this page was absent, so
524	 * it can't be page locked by a pager.
525	 *
526	 * we also consider it undefined
527	 * with respect to instruction
528	 * execution.  i.e. it is the responsibility
529	 * of higher layers to call for an instruction
530	 * sync after changing the contents and before
531	 * sending a program into this area.  We
532	 * choose this approach for performance
533	 */
534	m->pmapped = TRUE;
535
536	m->cs_validated = FALSE;
537	m->cs_tainted = FALSE;
538
539	if (no_zero_fill == TRUE)
540	        my_fault = DBG_NZF_PAGE_FAULT;
541	else {
542		vm_page_zero_fill(m);
543
544		VM_STAT_INCR(zero_fill_count);
545		DTRACE_VM2(zfod, int, 1, (uint64_t *), NULL);
546	}
547	assert(!m->laundry);
548	assert(m->object != kernel_object);
549	//assert(m->pageq.next == NULL && m->pageq.prev == NULL);
550
551	if (!IP_VALID(memory_manager_default) &&
552		(m->object->purgable == VM_PURGABLE_DENY ||
553		 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
554		 m->object->purgable == VM_PURGABLE_VOLATILE )) {
555		vm_page_lock_queues();
556
557                queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
558                m->throttled = TRUE;
559                vm_page_throttled_count++;
560
561		vm_page_unlock_queues();
562	} else {
563		if (m->object->size > VM_ZF_OBJECT_SIZE_THRESHOLD) {
564			m->zero_fill = TRUE;
565			OSAddAtomic(1, (SInt32 *)&vm_zf_count);
566		}
567	}
568	return (my_fault);
569}
570
571
572/*
573 *	Routine:	vm_fault_page
574 *	Purpose:
575 *		Find the resident page for the virtual memory
576 *		specified by the given virtual memory object
577 *		and offset.
578 *	Additional arguments:
579 *		The required permissions for the page is given
580 *		in "fault_type".  Desired permissions are included
581 *		in "protection".
582 *		fault_info is passed along to determine pagein cluster
583 *		limits... it contains the expected reference pattern,
584 *		cluster size if available, etc...
585 *
586 *		If the desired page is known to be resident (for
587 *		example, because it was previously wired down), asserting
588 *		the "unwiring" parameter will speed the search.
589 *
590 *		If the operation can be interrupted (by thread_abort
591 *		or thread_terminate), then the "interruptible"
592 *		parameter should be asserted.
593 *
594 *	Results:
595 *		The page containing the proper data is returned
596 *		in "result_page".
597 *
598 *	In/out conditions:
599 *		The source object must be locked and referenced,
600 *		and must donate one paging reference.  The reference
601 *		is not affected.  The paging reference and lock are
602 *		consumed.
603 *
604 *		If the call succeeds, the object in which "result_page"
605 *		resides is left locked and holding a paging reference.
606 *		If this is not the original object, a busy page in the
607 *		original object is returned in "top_page", to prevent other
608 *		callers from pursuing this same data, along with a paging
609 *		reference for the original object.  The "top_page" should
610 *		be destroyed when this guarantee is no longer required.
611 *		The "result_page" is also left busy.  It is not removed
612 *		from the pageout queues.
613 */
614
615vm_fault_return_t
616vm_fault_page(
617	/* Arguments: */
618	vm_object_t	first_object,	/* Object to begin search */
619	vm_object_offset_t first_offset,	/* Offset into object */
620	vm_prot_t	fault_type,	/* What access is requested */
621	boolean_t	must_be_resident,/* Must page be resident? */
622	/* Modifies in place: */
623	vm_prot_t	*protection,	/* Protection for mapping */
624	/* Returns: */
625	vm_page_t	*result_page,	/* Page found, if successful */
626	vm_page_t	*top_page,	/* Page in top object, if
627					 * not result_page.  */
628	int             *type_of_fault, /* if non-null, fill in with type of fault
629					 * COW, zero-fill, etc... returned in trace point */
630	/* More arguments: */
631	kern_return_t	*error_code,	/* code if page is in error */
632	boolean_t	no_zero_fill,	/* don't zero fill absent pages */
633#if MACH_PAGEMAP
634	boolean_t	data_supply,	/* treat as data_supply if
635					 * it is a write fault and a full
636					 * page is provided */
637#else
638	__unused boolean_t data_supply,
639#endif
640	vm_object_fault_info_t fault_info)
641{
642	vm_page_t		m;
643	vm_object_t		object;
644	vm_object_offset_t	offset;
645	vm_page_t		first_m;
646	vm_object_t		next_object;
647	vm_object_t		copy_object;
648	boolean_t		look_for_page;
649	vm_prot_t		access_required = fault_type;
650	vm_prot_t		wants_copy_flag;
651	CLUSTER_STAT(int pages_at_higher_offsets;)
652	CLUSTER_STAT(int pages_at_lower_offsets;)
653	kern_return_t		wait_result;
654	boolean_t		interruptible_state;
655	vm_fault_return_t	error;
656	int			my_fault;
657	uint32_t		try_failed_count;
658	int			interruptible; /* how may fault be interrupted? */
659	memory_object_t		pager;
660
661/*
662 * MACH page map - an optional optimization where a bit map is maintained
663 * by the VM subsystem for internal objects to indicate which pages of
664 * the object currently reside on backing store.  This existence map
665 * duplicates information maintained by the vnode pager.  It is
666 * created at the time of the first pageout against the object, i.e.
667 * at the same time pager for the object is created.  The optimization
668 * is designed to eliminate pager interaction overhead, if it is
669 * 'known' that the page does not exist on backing store.
670 *
671 * MUST_ASK_PAGER() evaluates to TRUE if the page specified by object/offset is
672 * either marked as paged out in the existence map for the object or no
673 * existence map exists for the object.  MUST_ASK_PAGER() is one of the
674 * criteria in the decision to invoke the pager.   It is also used as one
675 * of the criteria to terminate the scan for adjacent pages in a clustered
676 * pagein operation.  Note that MUST_ASK_PAGER() always evaluates to TRUE for
677 * permanent objects.  Note also that if the pager for an internal object
678 * has not been created, the pager is not invoked regardless of the value
679 * of MUST_ASK_PAGER() and that clustered pagein scans are only done on an object
680 * for which a pager has been created.
681 *
682 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
683 * is marked as paged out in the existence map for the object.  PAGED_OUT()
684 * PAGED_OUT() is used to determine if a page has already been pushed
685 * into a copy object in order to avoid a redundant page out operation.
686 */
687#if MACH_PAGEMAP
688#define MUST_ASK_PAGER(o, f) (vm_external_state_get((o)->existence_map, (f)) \
689			!= VM_EXTERNAL_STATE_ABSENT)
690#define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
691			== VM_EXTERNAL_STATE_EXISTS)
692#else
693#define MUST_ASK_PAGER(o, f) (TRUE)
694#define PAGED_OUT(o, f) (FALSE)
695#endif
696
697/*
698 *	Recovery actions
699 */
700#define PREPARE_RELEASE_PAGE(m)				\
701	MACRO_BEGIN					\
702	vm_page_lock_queues();				\
703	MACRO_END
704
705#define DO_RELEASE_PAGE(m)				\
706	MACRO_BEGIN					\
707	PAGE_WAKEUP_DONE(m);				\
708	if (!m->active && !m->inactive && !m->throttled)\
709		vm_page_activate(m);			\
710	vm_page_unlock_queues();			\
711	MACRO_END
712
713#define RELEASE_PAGE(m)					\
714	MACRO_BEGIN					\
715	PREPARE_RELEASE_PAGE(m);			\
716	DO_RELEASE_PAGE(m);				\
717	MACRO_END
718
719#if TRACEFAULTPAGE
720	dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset);	/* (TEST/DEBUG) */
721#endif
722
723
724#if	MACH_KDB
725		/*
726		 *	If there are watchpoints set, then
727		 *	we don't want to give away write permission
728		 *	on a read fault.  Make the task write fault,
729		 *	so that the watchpoint code notices the access.
730		 */
731	    if (db_watchpoint_list) {
732		/*
733		 *	If we aren't asking for write permission,
734		 *	then don't give it away.  We're using write
735		 *	faults to set the dirty bit.
736		 */
737		if (!(fault_type & VM_PROT_WRITE))
738			*protection &= ~VM_PROT_WRITE;
739	}
740#endif	/* MACH_KDB */
741
742	interruptible = fault_info->interruptible;
743	interruptible_state = thread_interrupt_level(interruptible);
744
745	/*
746	 *	INVARIANTS (through entire routine):
747	 *
748	 *	1)	At all times, we must either have the object
749	 *		lock or a busy page in some object to prevent
750	 *		some other thread from trying to bring in
751	 *		the same page.
752	 *
753	 *		Note that we cannot hold any locks during the
754	 *		pager access or when waiting for memory, so
755	 *		we use a busy page then.
756	 *
757	 *	2)	To prevent another thread from racing us down the
758	 *		shadow chain and entering a new page in the top
759	 *		object before we do, we must keep a busy page in
760	 *		the top object while following the shadow chain.
761	 *
762	 *	3)	We must increment paging_in_progress on any object
763	 *		for which we have a busy page before dropping
764	 *		the object lock
765	 *
766	 *	4)	We leave busy pages on the pageout queues.
767	 *		If the pageout daemon comes across a busy page,
768	 *		it will remove the page from the pageout queues.
769	 */
770
771	object = first_object;
772	offset = first_offset;
773	first_m = VM_PAGE_NULL;
774	access_required = fault_type;
775
776
777	XPR(XPR_VM_FAULT,
778		"vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
779		(integer_t)object, offset, fault_type, *protection, 0);
780
781	/*
782	 * default type of fault
783	 */
784	my_fault = DBG_CACHE_HIT_FAULT;
785
786	while (TRUE) {
787#if TRACEFAULTPAGE
788		dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0);	/* (TEST/DEBUG) */
789#endif
790		if (!object->alive) {
791		        /*
792			 * object is no longer valid
793			 * clean up and return error
794			 */
795			vm_fault_cleanup(object, first_m);
796			thread_interrupt_level(interruptible_state);
797
798			return (VM_FAULT_MEMORY_ERROR);
799		}
800
801		/*
802		 * See whether the page at 'offset' is resident
803		 */
804		m = vm_page_lookup(object, offset);
805#if TRACEFAULTPAGE
806		dbgTrace(0xBEEF0004, (unsigned int) m, (unsigned int) object);	/* (TEST/DEBUG) */
807#endif
808		if (m != VM_PAGE_NULL) {
809
810			if (m->busy) {
811			        /*
812				 * The page is being brought in,
813				 * wait for it and then retry.
814				 *
815				 * A possible optimization: if the page
816				 * is known to be resident, we can ignore
817				 * pages that are absent (regardless of
818				 * whether they're busy).
819				 */
820#if TRACEFAULTPAGE
821				dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0);	/* (TEST/DEBUG) */
822#endif
823				wait_result = PAGE_SLEEP(object, m, interruptible);
824				XPR(XPR_VM_FAULT,
825				    "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
826					(integer_t)object, offset,
827					(integer_t)m, 0, 0);
828				counter(c_vm_fault_page_block_busy_kernel++);
829
830				if (wait_result != THREAD_AWAKENED) {
831					vm_fault_cleanup(object, first_m);
832					thread_interrupt_level(interruptible_state);
833
834					if (wait_result == THREAD_RESTART)
835					        return (VM_FAULT_RETRY);
836					else
837						return (VM_FAULT_INTERRUPTED);
838				}
839				continue;
840			}
841
842			if (m->phys_page == vm_page_guard_addr) {
843				/*
844				 * Guard page: off limits !
845				 */
846				if (fault_type == VM_PROT_NONE) {
847					/*
848					 * The fault is not requesting any
849					 * access to the guard page, so it must
850					 * be just to wire or unwire it.
851					 * Let's pretend it succeeded...
852					 */
853					m->busy = TRUE;
854					*result_page = m;
855					assert(first_m == VM_PAGE_NULL);
856					*top_page = first_m;
857					if (type_of_fault)
858						*type_of_fault = DBG_GUARD_FAULT;
859					return VM_FAULT_SUCCESS;
860				} else {
861					/*
862					 * The fault requests access to the
863					 * guard page: let's deny that !
864					 */
865					vm_fault_cleanup(object, first_m);
866					thread_interrupt_level(interruptible_state);
867					return VM_FAULT_MEMORY_ERROR;
868				}
869			}
870
871			if (m->error) {
872			        /*
873				 * The page is in error, give up now.
874				 */
875#if TRACEFAULTPAGE
876				dbgTrace(0xBEEF0006, (unsigned int) m, (unsigned int) error_code);	/* (TEST/DEBUG) */
877#endif
878				if (error_code)
879				        *error_code = KERN_MEMORY_ERROR;
880				VM_PAGE_FREE(m);
881
882				vm_fault_cleanup(object, first_m);
883				thread_interrupt_level(interruptible_state);
884
885				return (VM_FAULT_MEMORY_ERROR);
886			}
887			if (m->restart) {
888			        /*
889				 * The pager wants us to restart
890				 * at the top of the chain,
891				 * typically because it has moved the
892				 * page to another pager, then do so.
893				 */
894#if TRACEFAULTPAGE
895				dbgTrace(0xBEEF0007, (unsigned int) m, (unsigned int) 0);	/* (TEST/DEBUG) */
896#endif
897				VM_PAGE_FREE(m);
898
899				vm_fault_cleanup(object, first_m);
900				thread_interrupt_level(interruptible_state);
901
902				return (VM_FAULT_RETRY);
903			}
904			if (m->absent) {
905			        /*
906				 * The page isn't busy, but is absent,
907				 * therefore it's deemed "unavailable".
908				 *
909				 * Remove the non-existent page (unless it's
910				 * in the top object) and move on down to the
911				 * next object (if there is one).
912				 */
913#if TRACEFAULTPAGE
914				dbgTrace(0xBEEF0008, (unsigned int) m, (unsigned int) object->shadow);	/* (TEST/DEBUG) */
915#endif
916				next_object = object->shadow;
917
918				if (next_object == VM_OBJECT_NULL) {
919					/*
920					 * Absent page at bottom of shadow
921					 * chain; zero fill the page we left
922					 * busy in the first object, and free
923					 * the absent page.
924					 */
925					assert(!must_be_resident);
926
927					/*
928					 * check for any conditions that prevent
929					 * us from creating a new zero-fill page
930					 * vm_fault_check will do all of the
931					 * fault cleanup in the case of an error condition
932					 * including resetting the thread_interrupt_level
933					 */
934					error = vm_fault_check(object, m, first_m, interruptible_state);
935
936					if (error != VM_FAULT_SUCCESS)
937					        return (error);
938
939					XPR(XPR_VM_FAULT,
940					    "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
941						(integer_t)object, offset,
942						(integer_t)m,
943						(integer_t)first_object, 0);
944
945					if (object != first_object) {
946					        /*
947						 * free the absent page we just found
948						 */
949						VM_PAGE_FREE(m);
950
951						/*
952						 * drop reference and lock on current object
953						 */
954						vm_object_paging_end(object);
955						vm_object_unlock(object);
956
957						/*
958						 * grab the original page we
959						 * 'soldered' in place and
960						 * retake lock on 'first_object'
961						 */
962						m = first_m;
963						first_m = VM_PAGE_NULL;
964
965						object = first_object;
966						offset = first_offset;
967
968						vm_object_lock(object);
969					} else {
970					        /*
971						 * we're going to use the absent page we just found
972						 * so convert it to a 'busy' page
973						 */
974					        m->absent = FALSE;
975						m->busy = TRUE;
976					}
977					/*
978					 * zero-fill the page and put it on
979					 * the correct paging queue
980					 */
981					my_fault = vm_fault_zero_page(m, no_zero_fill);
982
983					break;
984				} else {
985					if (must_be_resident)
986						vm_object_paging_end(object);
987					else if (object != first_object) {
988						vm_object_paging_end(object);
989						VM_PAGE_FREE(m);
990					} else {
991						first_m = m;
992						m->absent = FALSE;
993						m->busy = TRUE;
994
995						vm_page_lockspin_queues();
996						VM_PAGE_QUEUES_REMOVE(m);
997						vm_page_unlock_queues();
998					}
999					XPR(XPR_VM_FAULT,
1000					    "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
1001						(integer_t)object, offset,
1002						(integer_t)next_object,
1003						offset+object->shadow_offset,0);
1004
1005					offset += object->shadow_offset;
1006					fault_info->lo_offset += object->shadow_offset;
1007					fault_info->hi_offset += object->shadow_offset;
1008					access_required = VM_PROT_READ;
1009
1010					vm_object_lock(next_object);
1011					vm_object_unlock(object);
1012					object = next_object;
1013					vm_object_paging_begin(object);
1014
1015					/*
1016					 * reset to default type of fault
1017					 */
1018					my_fault = DBG_CACHE_HIT_FAULT;
1019
1020					continue;
1021				}
1022			}
1023			if ((m->cleaning)
1024			    && ((object != first_object) || (object->copy != VM_OBJECT_NULL))
1025			    && (fault_type & VM_PROT_WRITE)) {
1026				/*
1027				 * This is a copy-on-write fault that will
1028				 * cause us to revoke access to this page, but
1029				 * this page is in the process of being cleaned
1030				 * in a clustered pageout. We must wait until
1031				 * the cleaning operation completes before
1032				 * revoking access to the original page,
1033				 * otherwise we might attempt to remove a
1034				 * wired mapping.
1035				 */
1036#if TRACEFAULTPAGE
1037				dbgTrace(0xBEEF0009, (unsigned int) m, (unsigned int) offset);	/* (TEST/DEBUG) */
1038#endif
1039				XPR(XPR_VM_FAULT,
1040				    "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
1041					(integer_t)object, offset,
1042					(integer_t)m, 0, 0);
1043				/*
1044				 * take an extra ref so that object won't die
1045				 */
1046				vm_object_reference_locked(object);
1047
1048				vm_fault_cleanup(object, first_m);
1049
1050				counter(c_vm_fault_page_block_backoff_kernel++);
1051				vm_object_lock(object);
1052				assert(object->ref_count > 0);
1053
1054				m = vm_page_lookup(object, offset);
1055
1056				if (m != VM_PAGE_NULL && m->cleaning) {
1057					PAGE_ASSERT_WAIT(m, interruptible);
1058
1059					vm_object_unlock(object);
1060					wait_result = thread_block(THREAD_CONTINUE_NULL);
1061					vm_object_deallocate(object);
1062
1063					goto backoff;
1064				} else {
1065					vm_object_unlock(object);
1066
1067					vm_object_deallocate(object);
1068					thread_interrupt_level(interruptible_state);
1069
1070					return (VM_FAULT_RETRY);
1071				}
1072			}
1073			if (type_of_fault == NULL && m->speculative) {
1074			        /*
1075				 * If we were passed a non-NULL pointer for
1076				 * "type_of_fault", than we came from
1077				 * vm_fault... we'll let it deal with
1078				 * this condition, since it
1079				 * needs to see m->speculative to correctly
1080				 * account the pageins, otherwise...
1081				 * take it off the speculative queue, we'll
1082				 * let the caller of vm_fault_page deal
1083				 * with getting it onto the correct queue
1084				 */
1085			        vm_page_lockspin_queues();
1086			        VM_PAGE_QUEUES_REMOVE(m);
1087			        vm_page_unlock_queues();
1088			}
1089
1090			if (m->encrypted) {
1091				/*
1092				 * ENCRYPTED SWAP:
1093				 * the user needs access to a page that we
1094				 * encrypted before paging it out.
1095				 * Decrypt the page now.
1096				 * Keep it busy to prevent anyone from
1097				 * accessing it during the decryption.
1098				 */
1099				m->busy = TRUE;
1100				vm_page_decrypt(m, 0);
1101				assert(object == m->object);
1102				assert(m->busy);
1103				PAGE_WAKEUP_DONE(m);
1104
1105				/*
1106				 * Retry from the top, in case
1107				 * something changed while we were
1108				 * decrypting.
1109				 */
1110				continue;
1111			}
1112			ASSERT_PAGE_DECRYPTED(m);
1113
1114			if (m->object->code_signed) {
1115				/*
1116				 * CODE SIGNING:
1117				 * We just paged in a page from a signed
1118				 * memory object but we don't need to
1119				 * validate it now.  We'll validate it if
1120				 * when it gets mapped into a user address
1121				 * space for the first time or when the page
1122				 * gets copied to another object as a result
1123				 * of a copy-on-write.
1124				 */
1125			}
1126
1127			/*
1128			 * We mark the page busy and leave it on
1129			 * the pageout queues.  If the pageout
1130			 * deamon comes across it, then it will
1131			 * remove the page from the queue, but not the object
1132			 */
1133#if TRACEFAULTPAGE
1134			dbgTrace(0xBEEF000B, (unsigned int) m, (unsigned int) 0);	/* (TEST/DEBUG) */
1135#endif
1136			XPR(XPR_VM_FAULT,
1137			    "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
1138				(integer_t)object, offset, (integer_t)m, 0, 0);
1139			assert(!m->busy);
1140			assert(!m->absent);
1141
1142			m->busy = TRUE;
1143			break;
1144		}
1145
1146
1147		/*
1148		 * we get here when there is no page present in the object at
1149		 * the offset we're interested in... we'll allocate a page
1150		 * at this point if the pager associated with
1151		 * this object can provide the data or we're the top object...
1152		 * object is locked;  m == NULL
1153		 */
1154		look_for_page =	(object->pager_created && (MUST_ASK_PAGER(object, offset) == TRUE) && !data_supply);
1155
1156#if TRACEFAULTPAGE
1157		dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object);	/* (TEST/DEBUG) */
1158#endif
1159		if ((look_for_page || (object == first_object)) && !must_be_resident && !object->phys_contiguous) {
1160			/*
1161			 * Allocate a new page for this object/offset pair
1162			 */
1163			m = vm_page_grab();
1164#if TRACEFAULTPAGE
1165			dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object);	/* (TEST/DEBUG) */
1166#endif
1167			if (m == VM_PAGE_NULL) {
1168
1169				vm_fault_cleanup(object, first_m);
1170				thread_interrupt_level(interruptible_state);
1171
1172				return (VM_FAULT_MEMORY_SHORTAGE);
1173			}
1174			vm_page_insert(m, object, offset);
1175		}
1176		if (look_for_page && !must_be_resident) {
1177			kern_return_t	rc;
1178
1179			/*
1180			 *	If the memory manager is not ready, we
1181			 *	cannot make requests.
1182			 */
1183			if (!object->pager_ready) {
1184#if TRACEFAULTPAGE
1185				dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0);	/* (TEST/DEBUG) */
1186#endif
1187				if (m != VM_PAGE_NULL)
1188				        VM_PAGE_FREE(m);
1189
1190				XPR(XPR_VM_FAULT,
1191				"vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
1192					(integer_t)object, offset, 0, 0, 0);
1193
1194				/*
1195				 * take an extra ref so object won't die
1196				 */
1197				vm_object_reference_locked(object);
1198				vm_fault_cleanup(object, first_m);
1199				counter(c_vm_fault_page_block_backoff_kernel++);
1200
1201				vm_object_lock(object);
1202				assert(object->ref_count > 0);
1203
1204				if (!object->pager_ready) {
1205					wait_result = vm_object_assert_wait(object, VM_OBJECT_EVENT_PAGER_READY, interruptible);
1206
1207					vm_object_unlock(object);
1208					if (wait_result == THREAD_WAITING)
1209						wait_result = thread_block(THREAD_CONTINUE_NULL);
1210					vm_object_deallocate(object);
1211
1212					goto backoff;
1213				} else {
1214					vm_object_unlock(object);
1215					vm_object_deallocate(object);
1216					thread_interrupt_level(interruptible_state);
1217
1218					return (VM_FAULT_RETRY);
1219				}
1220			}
1221			if (!object->internal && !object->phys_contiguous && object->paging_in_progress > vm_object_pagein_throttle) {
1222				/*
1223				 * If there are too many outstanding page
1224				 * requests pending on this external object, we
1225				 * wait for them to be resolved now.
1226				 */
1227#if TRACEFAULTPAGE
1228				dbgTrace(0xBEEF0010, (unsigned int) m, (unsigned int) 0);	/* (TEST/DEBUG) */
1229#endif
1230				if (m != VM_PAGE_NULL)
1231					VM_PAGE_FREE(m);
1232				/*
1233				 * take an extra ref so object won't die
1234				 */
1235				vm_object_reference_locked(object);
1236
1237				vm_fault_cleanup(object, first_m);
1238
1239				counter(c_vm_fault_page_block_backoff_kernel++);
1240
1241				vm_object_lock(object);
1242				assert(object->ref_count > 0);
1243
1244				if (object->paging_in_progress > vm_object_pagein_throttle) {
1245				        vm_object_assert_wait(object, VM_OBJECT_EVENT_PAGING_IN_PROGRESS, interruptible);
1246
1247					vm_object_unlock(object);
1248					wait_result = thread_block(THREAD_CONTINUE_NULL);
1249					vm_object_deallocate(object);
1250
1251					goto backoff;
1252				} else {
1253					vm_object_unlock(object);
1254					vm_object_deallocate(object);
1255					thread_interrupt_level(interruptible_state);
1256
1257					return (VM_FAULT_RETRY);
1258				}
1259			}
1260			if (m != VM_PAGE_NULL) {
1261			        /*
1262				 * Indicate that the page is waiting for data
1263				 * from the memory manager.
1264				 */
1265			        m->list_req_pending = TRUE;
1266				m->absent = TRUE;
1267			}
1268
1269#if TRACEFAULTPAGE
1270			dbgTrace(0xBEEF0012, (unsigned int) object, (unsigned int) 0);	/* (TEST/DEBUG) */
1271#endif
1272
1273			/*
1274			 * It's possible someone called vm_object_destroy while we weren't
1275			 * holding the object lock.  If that has happened, then bail out
1276			 * here.
1277			 */
1278
1279			pager = object->pager;
1280
1281			if (pager == MEMORY_OBJECT_NULL) {
1282				vm_fault_cleanup(object, first_m);
1283				thread_interrupt_level(interruptible_state);
1284				return VM_FAULT_MEMORY_ERROR;
1285			}
1286
1287			/*
1288			 * We have an absent page in place for the faulting offset,
1289			 * so we can release the object lock.
1290			 */
1291
1292			vm_object_unlock(object);
1293
1294			/*
1295			 * If this object uses a copy_call strategy,
1296			 * and we are interested in a copy of this object
1297			 * (having gotten here only by following a
1298			 * shadow chain), then tell the memory manager
1299			 * via a flag added to the desired_access
1300			 * parameter, so that it can detect a race
1301			 * between our walking down the shadow chain
1302			 * and its pushing pages up into a copy of
1303			 * the object that it manages.
1304			 */
1305			if (object->copy_strategy == MEMORY_OBJECT_COPY_CALL && object != first_object)
1306				wants_copy_flag = VM_PROT_WANTS_COPY;
1307			else
1308				wants_copy_flag = VM_PROT_NONE;
1309
1310			XPR(XPR_VM_FAULT,
1311			    "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
1312				(integer_t)object, offset, (integer_t)m,
1313				access_required | wants_copy_flag, 0);
1314
1315			/*
1316			 * Call the memory manager to retrieve the data.
1317			 */
1318			rc = memory_object_data_request(
1319				pager,
1320				offset + object->paging_offset,
1321				PAGE_SIZE,
1322				access_required | wants_copy_flag,
1323				(memory_object_fault_info_t)fault_info);
1324
1325#if TRACEFAULTPAGE
1326			dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc);	/* (TEST/DEBUG) */
1327#endif
1328			vm_object_lock(object);
1329
1330			if (rc != KERN_SUCCESS) {
1331
1332				vm_fault_cleanup(object, first_m);
1333				thread_interrupt_level(interruptible_state);
1334
1335				return ((rc == MACH_SEND_INTERRUPTED) ?
1336					VM_FAULT_INTERRUPTED :
1337					VM_FAULT_MEMORY_ERROR);
1338			}
1339			if ((interruptible != THREAD_UNINT) && (current_thread()->sched_mode & TH_MODE_ABORT)) {
1340
1341				vm_fault_cleanup(object, first_m);
1342				thread_interrupt_level(interruptible_state);
1343
1344				return (VM_FAULT_INTERRUPTED);
1345			}
1346			if (m == VM_PAGE_NULL && object->phys_contiguous) {
1347				/*
1348				 * No page here means that the object we
1349				 * initially looked up was "physically
1350				 * contiguous" (i.e. device memory).  However,
1351				 * with Virtual VRAM, the object might not
1352				 * be backed by that device memory anymore,
1353				 * so we're done here only if the object is
1354				 * still "phys_contiguous".
1355				 * Otherwise, if the object is no longer
1356				 * "phys_contiguous", we need to retry the
1357				 * page fault against the object's new backing
1358				 * store (different memory object).
1359				 */
1360				break;
1361			}
1362			/*
1363			 * potentially a pagein fault
1364			 * if we make it through the state checks
1365			 * above, than we'll count it as such
1366			 */
1367			my_fault = DBG_PAGEIN_FAULT;
1368
1369			/*
1370			 * Retry with same object/offset, since new data may
1371			 * be in a different page (i.e., m is meaningless at
1372			 * this point).
1373			 */
1374			continue;
1375		}
1376
1377		/*
1378		 * We get here if the object has no pager, or an existence map
1379		 * exists and indicates the page isn't present on the pager
1380		 * or we're unwiring a page.  If a pager exists, but there
1381		 * is no existence map, then the m->absent case above handles
1382		 * the ZF case when the pager can't provide the page
1383		 */
1384#if TRACEFAULTPAGE
1385		dbgTrace(0xBEEF0014, (unsigned int) object, (unsigned int) m);	/* (TEST/DEBUG) */
1386#endif
1387		if (object == first_object)
1388			first_m = m;
1389		else
1390			assert(m == VM_PAGE_NULL);
1391
1392		XPR(XPR_VM_FAULT,
1393		    "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
1394			(integer_t)object, offset, (integer_t)m,
1395			(integer_t)object->shadow, 0);
1396
1397		next_object = object->shadow;
1398
1399		if (next_object == VM_OBJECT_NULL) {
1400			/*
1401			 * we've hit the bottom of the shadown chain,
1402			 * fill the page in the top object with zeros.
1403			 */
1404			assert(!must_be_resident);
1405
1406			if (object != first_object) {
1407				vm_object_paging_end(object);
1408				vm_object_unlock(object);
1409
1410				object = first_object;
1411				offset = first_offset;
1412				vm_object_lock(object);
1413			}
1414			m = first_m;
1415			assert(m->object == object);
1416			first_m = VM_PAGE_NULL;
1417
1418			/*
1419			 * check for any conditions that prevent
1420			 * us from creating a new zero-fill page
1421			 * vm_fault_check will do all of the
1422			 * fault cleanup in the case of an error condition
1423			 * including resetting the thread_interrupt_level
1424			 */
1425			error = vm_fault_check(object, m, first_m, interruptible_state);
1426
1427			if (error != VM_FAULT_SUCCESS)
1428			        return (error);
1429
1430			if (m == VM_PAGE_NULL) {
1431				m = vm_page_grab();
1432
1433				if (m == VM_PAGE_NULL) {
1434					vm_fault_cleanup(object, VM_PAGE_NULL);
1435					thread_interrupt_level(interruptible_state);
1436
1437					return (VM_FAULT_MEMORY_SHORTAGE);
1438				}
1439				vm_page_insert(m, object, offset);
1440			}
1441			my_fault = vm_fault_zero_page(m, no_zero_fill);
1442
1443			break;
1444
1445		} else {
1446		        /*
1447			 * Move on to the next object.  Lock the next
1448			 * object before unlocking the current one.
1449			 */
1450			if ((object != first_object) || must_be_resident)
1451				vm_object_paging_end(object);
1452
1453			offset += object->shadow_offset;
1454			fault_info->lo_offset += object->shadow_offset;
1455			fault_info->hi_offset += object->shadow_offset;
1456			access_required = VM_PROT_READ;
1457
1458			vm_object_lock(next_object);
1459			vm_object_unlock(object);
1460
1461			object = next_object;
1462			vm_object_paging_begin(object);
1463		}
1464	}
1465
1466	/*
1467	 *	PAGE HAS BEEN FOUND.
1468	 *
1469	 *	This page (m) is:
1470	 *		busy, so that we can play with it;
1471	 *		not absent, so that nobody else will fill it;
1472	 *		possibly eligible for pageout;
1473	 *
1474	 *	The top-level page (first_m) is:
1475	 *		VM_PAGE_NULL if the page was found in the
1476	 *		 top-level object;
1477	 *		busy, not absent, and ineligible for pageout.
1478	 *
1479	 *	The current object (object) is locked.  A paging
1480	 *	reference is held for the current and top-level
1481	 *	objects.
1482	 */
1483
1484#if TRACEFAULTPAGE
1485	dbgTrace(0xBEEF0015, (unsigned int) object, (unsigned int) m);	/* (TEST/DEBUG) */
1486#endif
1487#if	EXTRA_ASSERTIONS
1488	if (m != VM_PAGE_NULL) {
1489		assert(m->busy && !m->absent);
1490		assert((first_m == VM_PAGE_NULL) ||
1491			(first_m->busy && !first_m->absent &&
1492			 !first_m->active && !first_m->inactive));
1493	}
1494#endif	/* EXTRA_ASSERTIONS */
1495
1496	/*
1497	 * ENCRYPTED SWAP:
1498	 * If we found a page, we must have decrypted it before we
1499	 * get here...
1500	 */
1501	if (m != VM_PAGE_NULL) {
1502		ASSERT_PAGE_DECRYPTED(m);
1503	}
1504
1505	XPR(XPR_VM_FAULT,
1506	    "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
1507		(integer_t)object, offset, (integer_t)m,
1508		(integer_t)first_object, (integer_t)first_m);
1509
1510	/*
1511	 * If the page is being written, but isn't
1512	 * already owned by the top-level object,
1513	 * we have to copy it into a new page owned
1514	 * by the top-level object.
1515	 */
1516	if ((object != first_object) && (m != VM_PAGE_NULL)) {
1517
1518#if TRACEFAULTPAGE
1519		dbgTrace(0xBEEF0016, (unsigned int) object, (unsigned int) fault_type);	/* (TEST/DEBUG) */
1520#endif
1521	    	if (fault_type & VM_PROT_WRITE) {
1522			vm_page_t copy_m;
1523
1524			/*
1525			 * We only really need to copy if we
1526			 * want to write it.
1527			 */
1528			assert(!must_be_resident);
1529
1530			/*
1531			 * are we protecting the system from
1532			 * backing store exhaustion.  If so
1533			 * sleep unless we are privileged.
1534			 */
1535			if (vm_backing_store_low) {
1536				if (!(current_task()->priv_flags & VM_BACKING_STORE_PRIV)) {
1537
1538					RELEASE_PAGE(m);
1539					vm_fault_cleanup(object, first_m);
1540
1541					assert_wait((event_t)&vm_backing_store_low, THREAD_UNINT);
1542
1543					thread_block(THREAD_CONTINUE_NULL);
1544					thread_interrupt_level(interruptible_state);
1545
1546					return (VM_FAULT_RETRY);
1547				}
1548			}
1549			/*
1550			 * If we try to collapse first_object at this
1551			 * point, we may deadlock when we try to get
1552			 * the lock on an intermediate object (since we
1553			 * have the bottom object locked).  We can't
1554			 * unlock the bottom object, because the page
1555			 * we found may move (by collapse) if we do.
1556			 *
1557			 * Instead, we first copy the page.  Then, when
1558			 * we have no more use for the bottom object,
1559			 * we unlock it and try to collapse.
1560			 *
1561			 * Note that we copy the page even if we didn't
1562			 * need to... that's the breaks.
1563			 */
1564
1565			/*
1566			 * Allocate a page for the copy
1567			 */
1568			copy_m = vm_page_grab();
1569
1570			if (copy_m == VM_PAGE_NULL) {
1571				RELEASE_PAGE(m);
1572
1573				vm_fault_cleanup(object, first_m);
1574				thread_interrupt_level(interruptible_state);
1575
1576				return (VM_FAULT_MEMORY_SHORTAGE);
1577			}
1578			XPR(XPR_VM_FAULT,
1579			    "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
1580				(integer_t)object, offset,
1581				(integer_t)m, (integer_t)copy_m, 0);
1582
1583			vm_page_copy(m, copy_m);
1584
1585			/*
1586			 * If another map is truly sharing this
1587			 * page with us, we have to flush all
1588			 * uses of the original page, since we
1589			 * can't distinguish those which want the
1590			 * original from those which need the
1591			 * new copy.
1592			 *
1593			 * XXXO If we know that only one map has
1594			 * access to this page, then we could
1595			 * avoid the pmap_disconnect() call.
1596			 */
1597			if (m->pmapped)
1598			        pmap_disconnect(m->phys_page);
1599
1600			assert(!m->cleaning);
1601
1602			/*
1603			 * We no longer need the old page or object.
1604			 */
1605			PAGE_WAKEUP_DONE(m);
1606			vm_object_paging_end(object);
1607			vm_object_unlock(object);
1608
1609			my_fault = DBG_COW_FAULT;
1610			VM_STAT_INCR(cow_faults);
1611			DTRACE_VM2(cow_fault, int, 1, (uint64_t *), NULL);
1612			current_task()->cow_faults++;
1613
1614			object = first_object;
1615			offset = first_offset;
1616
1617			vm_object_lock(object);
1618			/*
1619			 * get rid of the place holder
1620			 * page that we soldered in earlier
1621			 */
1622			VM_PAGE_FREE(first_m);
1623			first_m = VM_PAGE_NULL;
1624
1625			/*
1626			 * and replace it with the
1627			 * page we just copied into
1628			 */
1629			assert(copy_m->busy);
1630			vm_page_insert(copy_m, object, offset);
1631			copy_m->dirty = TRUE;
1632
1633			m = copy_m;
1634			/*
1635			 * Now that we've gotten the copy out of the
1636			 * way, let's try to collapse the top object.
1637			 * But we have to play ugly games with
1638			 * paging_in_progress to do that...
1639			 */
1640			vm_object_paging_end(object);
1641			vm_object_collapse(object, offset, TRUE);
1642			vm_object_paging_begin(object);
1643
1644		} else
1645		    	*protection &= (~VM_PROT_WRITE);
1646	}
1647	/*
1648	 * Now check whether the page needs to be pushed into the
1649	 * copy object.  The use of asymmetric copy on write for
1650	 * shared temporary objects means that we may do two copies to
1651	 * satisfy the fault; one above to get the page from a
1652	 * shadowed object, and one here to push it into the copy.
1653	 */
1654	try_failed_count = 0;
1655
1656	while ((copy_object = first_object->copy) != VM_OBJECT_NULL && (m != VM_PAGE_NULL)) {
1657		vm_object_offset_t	copy_offset;
1658		vm_page_t		copy_m;
1659
1660#if TRACEFAULTPAGE
1661		dbgTrace(0xBEEF0017, (unsigned int) copy_object, (unsigned int) fault_type);	/* (TEST/DEBUG) */
1662#endif
1663		/*
1664		 * If the page is being written, but hasn't been
1665		 * copied to the copy-object, we have to copy it there.
1666		 */
1667		if ((fault_type & VM_PROT_WRITE) == 0) {
1668			*protection &= ~VM_PROT_WRITE;
1669			break;
1670		}
1671
1672		/*
1673		 * If the page was guaranteed to be resident,
1674		 * we must have already performed the copy.
1675		 */
1676		if (must_be_resident)
1677			break;
1678
1679		/*
1680		 * Try to get the lock on the copy_object.
1681		 */
1682		if (!vm_object_lock_try(copy_object)) {
1683
1684			vm_object_unlock(object);
1685			try_failed_count++;
1686
1687			mutex_pause(try_failed_count);	/* wait a bit */
1688			vm_object_lock(object);
1689
1690			continue;
1691		}
1692		try_failed_count = 0;
1693
1694		/*
1695		 * Make another reference to the copy-object,
1696		 * to keep it from disappearing during the
1697		 * copy.
1698		 */
1699		vm_object_reference_locked(copy_object);
1700
1701		/*
1702		 * Does the page exist in the copy?
1703		 */
1704		copy_offset = first_offset - copy_object->shadow_offset;
1705
1706		if (copy_object->size <= copy_offset)
1707			/*
1708			 * Copy object doesn't cover this page -- do nothing.
1709			 */
1710			;
1711		else if ((copy_m = vm_page_lookup(copy_object, copy_offset)) != VM_PAGE_NULL) {
1712			/*
1713			 * Page currently exists in the copy object
1714			 */
1715			if (copy_m->busy) {
1716				/*
1717				 * If the page is being brought
1718				 * in, wait for it and then retry.
1719				 */
1720				RELEASE_PAGE(m);
1721
1722				/*
1723				 * take an extra ref so object won't die
1724				 */
1725				vm_object_reference_locked(copy_object);
1726				vm_object_unlock(copy_object);
1727				vm_fault_cleanup(object, first_m);
1728				counter(c_vm_fault_page_block_backoff_kernel++);
1729
1730				vm_object_lock(copy_object);
1731				assert(copy_object->ref_count > 0);
1732				VM_OBJ_RES_DECR(copy_object);
1733				vm_object_lock_assert_exclusive(copy_object);
1734				copy_object->ref_count--;
1735				assert(copy_object->ref_count > 0);
1736				copy_m = vm_page_lookup(copy_object, copy_offset);
1737				/*
1738				 * ENCRYPTED SWAP:
1739				 * it's OK if the "copy_m" page is encrypted,
1740				 * because we're not moving it nor handling its
1741				 * contents.
1742				 */
1743				if (copy_m != VM_PAGE_NULL && copy_m->busy) {
1744					PAGE_ASSERT_WAIT(copy_m, interruptible);
1745
1746					vm_object_unlock(copy_object);
1747					wait_result = thread_block(THREAD_CONTINUE_NULL);
1748					vm_object_deallocate(copy_object);
1749
1750					goto backoff;
1751				} else {
1752					vm_object_unlock(copy_object);
1753					vm_object_deallocate(copy_object);
1754					thread_interrupt_level(interruptible_state);
1755
1756					return (VM_FAULT_RETRY);
1757				}
1758			}
1759		}
1760		else if (!PAGED_OUT(copy_object, copy_offset)) {
1761			/*
1762			 * If PAGED_OUT is TRUE, then the page used to exist
1763			 * in the copy-object, and has already been paged out.
1764			 * We don't need to repeat this. If PAGED_OUT is
1765			 * FALSE, then either we don't know (!pager_created,
1766			 * for example) or it hasn't been paged out.
1767			 * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT)
1768			 * We must copy the page to the copy object.
1769			 */
1770
1771			if (vm_backing_store_low) {
1772			        /*
1773				 * we are protecting the system from
1774				 * backing store exhaustion.  If so
1775				 * sleep unless we are privileged.
1776				 */
1777				if (!(current_task()->priv_flags & VM_BACKING_STORE_PRIV)) {
1778					assert_wait((event_t)&vm_backing_store_low, THREAD_UNINT);
1779
1780					RELEASE_PAGE(m);
1781					VM_OBJ_RES_DECR(copy_object);
1782					vm_object_lock_assert_exclusive(copy_object);
1783					copy_object->ref_count--;
1784					assert(copy_object->ref_count > 0);
1785
1786					vm_object_unlock(copy_object);
1787					vm_fault_cleanup(object, first_m);
1788					thread_block(THREAD_CONTINUE_NULL);
1789					thread_interrupt_level(interruptible_state);
1790
1791					return (VM_FAULT_RETRY);
1792				}
1793			}
1794			/*
1795			 * Allocate a page for the copy
1796			 */
1797			copy_m = vm_page_alloc(copy_object, copy_offset);
1798
1799			if (copy_m == VM_PAGE_NULL) {
1800				RELEASE_PAGE(m);
1801
1802				VM_OBJ_RES_DECR(copy_object);
1803				vm_object_lock_assert_exclusive(copy_object);
1804				copy_object->ref_count--;
1805				assert(copy_object->ref_count > 0);
1806
1807				vm_object_unlock(copy_object);
1808				vm_fault_cleanup(object, first_m);
1809				thread_interrupt_level(interruptible_state);
1810
1811				return (VM_FAULT_MEMORY_SHORTAGE);
1812			}
1813			/*
1814			 * Must copy page into copy-object.
1815			 */
1816			vm_page_copy(m, copy_m);
1817
1818			/*
1819			 * If the old page was in use by any users
1820			 * of the copy-object, it must be removed
1821			 * from all pmaps.  (We can't know which
1822			 * pmaps use it.)
1823			 */
1824			if (m->pmapped)
1825			        pmap_disconnect(m->phys_page);
1826
1827			/*
1828			 * If there's a pager, then immediately
1829			 * page out this page, using the "initialize"
1830			 * option.  Else, we use the copy.
1831			 */
1832		 	if ((!copy_object->pager_created)
1833#if MACH_PAGEMAP
1834			    || vm_external_state_get(copy_object->existence_map, copy_offset) == VM_EXTERNAL_STATE_ABSENT
1835#endif
1836			    ) {
1837
1838				vm_page_lockspin_queues();
1839				assert(!m->cleaning);
1840				vm_page_activate(copy_m);
1841				vm_page_unlock_queues();
1842
1843				copy_m->dirty = TRUE;
1844				PAGE_WAKEUP_DONE(copy_m);
1845			}
1846			else {
1847				assert(copy_m->busy == TRUE);
1848				assert(!m->cleaning);
1849
1850				/*
1851				 * dirty is protected by the object lock
1852				 */
1853				copy_m->dirty = TRUE;
1854
1855				/*
1856				 * The page is already ready for pageout:
1857				 * not on pageout queues and busy.
1858				 * Unlock everything except the
1859				 * copy_object itself.
1860				 */
1861				vm_object_unlock(object);
1862
1863				/*
1864				 * Write the page to the copy-object,
1865				 * flushing it from the kernel.
1866				 */
1867				vm_pageout_initialize_page(copy_m);
1868
1869				/*
1870				 * Since the pageout may have
1871				 * temporarily dropped the
1872				 * copy_object's lock, we
1873				 * check whether we'll have
1874				 * to deallocate the hard way.
1875				 */
1876				if ((copy_object->shadow != object) || (copy_object->ref_count == 1)) {
1877					vm_object_unlock(copy_object);
1878					vm_object_deallocate(copy_object);
1879					vm_object_lock(object);
1880
1881					continue;
1882				}
1883				/*
1884				 * Pick back up the old object's
1885				 * lock.  [It is safe to do so,
1886				 * since it must be deeper in the
1887				 * object tree.]
1888				 */
1889				vm_object_lock(object);
1890			}
1891			/*
1892			 * Because we're pushing a page upward
1893			 * in the object tree, we must restart
1894			 * any faults that are waiting here.
1895			 * [Note that this is an expansion of
1896			 * PAGE_WAKEUP that uses the THREAD_RESTART
1897			 * wait result].  Can't turn off the page's
1898			 * busy bit because we're not done with it.
1899			 */
1900			if (m->wanted) {
1901				m->wanted = FALSE;
1902				thread_wakeup_with_result((event_t) m, THREAD_RESTART);
1903			}
1904		}
1905		/*
1906		 * The reference count on copy_object must be
1907		 * at least 2: one for our extra reference,
1908		 * and at least one from the outside world
1909		 * (we checked that when we last locked
1910		 * copy_object).
1911		 */
1912		vm_object_lock_assert_exclusive(copy_object);
1913		copy_object->ref_count--;
1914		assert(copy_object->ref_count > 0);
1915
1916		VM_OBJ_RES_DECR(copy_object);
1917		vm_object_unlock(copy_object);
1918
1919		break;
1920	}
1921	*result_page = m;
1922	*top_page = first_m;
1923
1924	XPR(XPR_VM_FAULT,
1925		"vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
1926		(integer_t)object, offset, (integer_t)m, (integer_t)first_m, 0);
1927
1928	if (m != VM_PAGE_NULL) {
1929		if (my_fault == DBG_PAGEIN_FAULT) {
1930
1931			VM_STAT_INCR(pageins);
1932			DTRACE_VM2(pgin, int, 1, (uint64_t *), NULL);
1933			DTRACE_VM2(maj_fault, int, 1, (uint64_t *), NULL);
1934			current_task()->pageins++;
1935
1936			if (m->object->internal) {
1937				DTRACE_VM2(anonpgin, int, 1, (uint64_t *), NULL);
1938			} else {
1939				DTRACE_VM2(fspgin, int, 1, (uint64_t *), NULL);
1940			}
1941
1942		        /*
1943			 * evaluate access pattern and update state
1944			 * vm_fault_deactivate_behind depends on the
1945			 * state being up to date
1946			 */
1947		        vm_fault_is_sequential(object, offset, fault_info->behavior);
1948
1949			vm_fault_deactivate_behind(object, offset, fault_info->behavior);
1950		}
1951		if (type_of_fault)
1952		        *type_of_fault = my_fault;
1953	} else
1954		vm_object_unlock(object);
1955
1956	thread_interrupt_level(interruptible_state);
1957
1958#if TRACEFAULTPAGE
1959	dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS, 0);	/* (TEST/DEBUG) */
1960#endif
1961	return (VM_FAULT_SUCCESS);
1962
1963backoff:
1964	thread_interrupt_level(interruptible_state);
1965
1966	if (wait_result == THREAD_INTERRUPTED)
1967		return (VM_FAULT_INTERRUPTED);
1968	return (VM_FAULT_RETRY);
1969
1970#undef	RELEASE_PAGE
1971}
1972
1973
1974
1975/*
1976 * CODE SIGNING:
1977 * When soft faulting a page, we have to validate the page if:
1978 * 1. the page is being mapped in user space
1979 * 2. the page hasn't already been found to be "tainted"
1980 * 3. the page belongs to a code-signed object
1981 * 4. the page has not been validated yet or has been mapped for write.
1982 */
1983#define VM_FAULT_NEED_CS_VALIDATION(pmap, page)				\
1984	((pmap) != kernel_pmap /*1*/ &&					\
1985	 !(page)->cs_tainted /*2*/ &&					\
1986	 (page)->object->code_signed /*3*/ &&				\
1987	 (!(page)->cs_validated || (page)->wpmapped /*4*/))
1988
1989
1990/*
1991 * page queue lock must NOT be held
1992 * m->object must be locked
1993 *
1994 * NOTE: m->object could be locked "shared" only if we are called
1995 * from vm_fault() as part of a soft fault.  If so, we must be
1996 * careful not to modify the VM object in any way that is not
1997 * legal under a shared lock...
1998 */
1999unsigned long cs_enter_tainted_rejected = 0;
2000unsigned long cs_enter_tainted_accepted = 0;
2001kern_return_t
2002vm_fault_enter(vm_page_t m,
2003	       pmap_t pmap,
2004	       vm_map_offset_t vaddr,
2005	       vm_prot_t prot,
2006	       boolean_t wired,
2007	       boolean_t change_wiring,
2008	       boolean_t no_cache,
2009	       int *type_of_fault)
2010{
2011	unsigned int	cache_attr;
2012	kern_return_t	kr;
2013	boolean_t	previously_pmapped = m->pmapped;
2014
2015	vm_object_lock_assert_held(m->object);
2016#if DEBUG
2017	mutex_assert(&vm_page_queue_lock, MA_NOTOWNED);
2018#endif /* DEBUG */
2019
2020	if (m->phys_page == vm_page_guard_addr) {
2021		assert(m->fictitious);
2022		return KERN_SUCCESS;
2023	}
2024
2025        cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
2026
2027	if (m->pmapped == FALSE) {
2028		/*
2029		 * This is the first time this page is being
2030		 * mapped in an address space (pmapped == FALSE).
2031		 *
2032		 * Part of that page may still be in the data cache
2033		 * and not flushed to memory.  In case we end up
2034		 * accessing that page via the instruction cache,
2035		 * we need to ensure that the 2 caches are in sync.
2036		 */
2037		pmap_sync_page_data_phys(m->phys_page);
2038
2039		if ((*type_of_fault == DBG_CACHE_HIT_FAULT) && m->clustered) {
2040		        /*
2041			 * found it in the cache, but this
2042			 * is the first fault-in of the page (m->pmapped == FALSE)
2043			 * so it must have come in as part of
2044			 * a cluster... account 1 pagein against it
2045			 */
2046		        VM_STAT_INCR(pageins);
2047			DTRACE_VM2(pgin, int, 1, (uint64_t *), NULL);
2048
2049			if (m->object->internal) {
2050				DTRACE_VM2(anonpgin, int, 1, (uint64_t *), NULL);
2051			} else {
2052				DTRACE_VM2(fspgin, int, 1, (uint64_t *), NULL);
2053			}
2054
2055			current_task()->pageins++;
2056
2057			*type_of_fault = DBG_PAGEIN_FAULT;
2058		}
2059		VM_PAGE_CONSUME_CLUSTERED(m);
2060
2061	} else if (cache_attr != VM_WIMG_DEFAULT)
2062	        pmap_sync_page_attributes_phys(m->phys_page);
2063
2064	if (*type_of_fault != DBG_COW_FAULT) {
2065		DTRACE_VM2(as_fault, int, 1, (uint64_t *), NULL);
2066
2067		if (pmap == kernel_pmap) {
2068			DTRACE_VM2(kernel_asflt, int, 1, (uint64_t *), NULL);
2069		}
2070	}
2071
2072	if (VM_FAULT_NEED_CS_VALIDATION(pmap, m)) {
2073		vm_object_lock_assert_exclusive(m->object);
2074
2075		if (m->cs_validated) {
2076			vm_cs_revalidates++;
2077		}
2078
2079		/* VM map is locked, so 1 ref will remain on VM object */
2080		vm_page_validate_cs(m);
2081	}
2082
2083	if (m->cs_tainted	/* always invalidate a tainted page */
2084#if CONFIG_ENFORCE_SIGNED_CODE
2085	    /*
2086	     * Code Signing enforcement invalidates an executable page that
2087	     * has no code directory, and thus could not be validated.
2088	     */
2089	    || ((prot & VM_PROT_EXECUTE) && !m->cs_validated )
2090#endif
2091		) {
2092		/*
2093		 * CODE SIGNING:
2094		 * This page has been tainted and can not be trusted.
2095		 * Let's notify the current process and let it take any
2096		 * necessary precautions before we enter the tainted page
2097		 * into its address space.
2098		 */
2099		kr = KERN_SUCCESS;
2100#if CONFIG_ENFORCE_SIGNED_CODE
2101		if (!cs_enforcement_disable) {
2102#endif
2103			if (cs_invalid_page((addr64_t) vaddr)) {
2104				/* reject the tainted page: abort the page fault */
2105				kr = KERN_MEMORY_ERROR;
2106				cs_enter_tainted_rejected++;
2107			} else {
2108				/* proceed with the tainted page */
2109				kr = KERN_SUCCESS;
2110				cs_enter_tainted_accepted++;
2111			}
2112#if CONFIG_ENFORCE_SIGNED_CODE
2113		}
2114#endif
2115		if (cs_debug || kr != KERN_SUCCESS) {
2116			printf("CODESIGNING: vm_fault_enter(0x%llx): "
2117			       "page %p obj %p off 0x%llx *** INVALID PAGE ***\n",
2118			       (long long)vaddr, m, m->object, m->offset);
2119		}
2120	} else {
2121		/* proceed with the valid page */
2122		kr = KERN_SUCCESS;
2123	}
2124
2125	if (kr == KERN_SUCCESS) {
2126	        /*
2127		 * NOTE: we may only hold the vm_object lock SHARED
2128		 * at this point, but the update of pmapped is ok
2129		 * since this is the ONLY bit updated behind the SHARED
2130		 * lock... however, we need to figure out how to do an atomic
2131		 * update on a bit field to make this less fragile... right
2132		 * now I don't know how to coerce 'C' to give me the offset info
2133		 * that's needed for an AtomicCompareAndSwap
2134		 */
2135		m->pmapped = TRUE;
2136		if (prot & VM_PROT_WRITE) {
2137			vm_object_lock_assert_exclusive(m->object);
2138			m->wpmapped = TRUE;
2139		}
2140
2141		PMAP_ENTER(pmap, vaddr, m, prot, cache_attr, wired);
2142	}
2143
2144	/*
2145	 * Hold queues lock to manipulate
2146	 * the page queues.  Change wiring
2147	 * case is obvious.
2148	 */
2149	if (change_wiring) {
2150	        vm_page_lockspin_queues();
2151
2152		if (wired) {
2153			if (kr == KERN_SUCCESS) {
2154				vm_page_wire(m);
2155			}
2156		} else {
2157		        vm_page_unwire(m);
2158		}
2159		vm_page_unlock_queues();
2160
2161	} else {
2162	        if (kr != KERN_SUCCESS) {
2163		        vm_page_lock_queues();
2164		        vm_page_deactivate(m);
2165		        vm_page_unlock_queues();
2166		} else {
2167		        if (((!m->active && !m->inactive) || no_cache) && !m->wire_count && !m->throttled) {
2168			        vm_page_lockspin_queues();
2169				/*
2170				 * test again now that we hold the page queue lock
2171				 */
2172				if (((!m->active && !m->inactive) || no_cache) && !m->wire_count) {
2173
2174					/*
2175					 * If this is a no_cache mapping and the page has never been
2176					 * mapped before or was previously a no_cache page, then we
2177					 * want to leave pages in the speculative state so that they
2178					 * can be readily recycled if free memory runs low.  Otherwise
2179					 * the page is activated as normal.
2180					 */
2181
2182					if (no_cache && (!previously_pmapped || m->no_cache)) {
2183						m->no_cache = TRUE;
2184
2185						if (m->active || m->inactive)
2186							VM_PAGE_QUEUES_REMOVE(m);
2187
2188						if (!m->speculative)
2189							vm_page_speculate(m, TRUE);
2190
2191					} else if (!m->active && !m->inactive)
2192						vm_page_activate(m);
2193
2194				}
2195
2196				vm_page_unlock_queues();
2197			}
2198		}
2199	}
2200	return kr;
2201}
2202
2203
2204/*
2205 *	Routine:	vm_fault
2206 *	Purpose:
2207 *		Handle page faults, including pseudo-faults
2208 *		used to change the wiring status of pages.
2209 *	Returns:
2210 *		Explicit continuations have been removed.
2211 *	Implementation:
2212 *		vm_fault and vm_fault_page save mucho state
2213 *		in the moral equivalent of a closure.  The state
2214 *		structure is allocated when first entering vm_fault
2215 *		and deallocated when leaving vm_fault.
2216 */
2217
2218extern int _map_enter_debug;
2219
2220unsigned long vm_fault_collapse_total = 0;
2221unsigned long vm_fault_collapse_skipped = 0;
2222
2223kern_return_t
2224vm_fault(
2225	vm_map_t	map,
2226	vm_map_offset_t	vaddr,
2227	vm_prot_t	fault_type,
2228	boolean_t	change_wiring,
2229	int		interruptible,
2230	pmap_t		caller_pmap,
2231	vm_map_offset_t	caller_pmap_addr)
2232{
2233	vm_map_version_t	version;	/* Map version for verificiation */
2234	boolean_t		wired;		/* Should mapping be wired down? */
2235	vm_object_t		object;		/* Top-level object */
2236	vm_object_offset_t	offset;		/* Top-level offset */
2237	vm_prot_t		prot;		/* Protection for mapping */
2238	vm_object_t		old_copy_object; /* Saved copy object */
2239	vm_page_t		result_page;	/* Result of vm_fault_page */
2240	vm_page_t		top_page;	/* Placeholder page */
2241	kern_return_t		kr;
2242
2243	vm_page_t		m;	/* Fast access to result_page */
2244	kern_return_t		error_code;
2245	vm_object_t		cur_object;
2246	vm_object_offset_t	cur_offset;
2247	vm_page_t		cur_m;
2248	vm_object_t		new_object;
2249	int                     type_of_fault;
2250	pmap_t			pmap;
2251	boolean_t		interruptible_state;
2252	vm_map_t		real_map = map;
2253	vm_map_t		original_map = map;
2254	vm_prot_t		original_fault_type;
2255	struct vm_object_fault_info fault_info;
2256	boolean_t		need_collapse = FALSE;
2257	int			object_lock_type = 0;
2258	int			cur_object_lock_type;
2259	vm_object_t		top_object = VM_OBJECT_NULL;
2260
2261
2262	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_START,
2263			      (int)((uint64_t)vaddr >> 32),
2264			      (int)vaddr,
2265			      0,
2266			      0,
2267			      0);
2268
2269	if (get_preemption_level() != 0) {
2270	        KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_END,
2271				      (int)((uint64_t)vaddr >> 32),
2272				      (int)vaddr,
2273				      KERN_FAILURE,
2274				      0,
2275				      0);
2276
2277		return (KERN_FAILURE);
2278	}
2279	interruptible_state = thread_interrupt_level(interruptible);
2280
2281	VM_STAT_INCR(faults);
2282	current_task()->faults++;
2283	original_fault_type = fault_type;
2284
2285	if (fault_type & VM_PROT_WRITE)
2286	        object_lock_type = OBJECT_LOCK_EXCLUSIVE;
2287	else
2288	        object_lock_type = OBJECT_LOCK_SHARED;
2289
2290	cur_object_lock_type = OBJECT_LOCK_SHARED;
2291
2292RetryFault:
2293	/*
2294	 * assume we will hit a page in the cache
2295	 * otherwise, explicitly override with
2296	 * the real fault type once we determine it
2297	 */
2298	type_of_fault = DBG_CACHE_HIT_FAULT;
2299
2300	/*
2301	 *	Find the backing store object and offset into
2302	 *	it to begin the search.
2303	 */
2304	fault_type = original_fault_type;
2305	map = original_map;
2306	vm_map_lock_read(map);
2307
2308	kr = vm_map_lookup_locked(&map, vaddr, fault_type,
2309				  object_lock_type, &version,
2310				  &object, &offset, &prot, &wired,
2311				  &fault_info,
2312				  &real_map);
2313
2314	if (kr != KERN_SUCCESS) {
2315		vm_map_unlock_read(map);
2316		goto done;
2317	}
2318	pmap = real_map->pmap;
2319	fault_info.interruptible = interruptible;
2320
2321	/*
2322	 * If the page is wired, we must fault for the current protection
2323	 * value, to avoid further faults.
2324	 */
2325	if (wired) {
2326		fault_type = prot | VM_PROT_WRITE;
2327		/*
2328		 * since we're treating this fault as a 'write'
2329		 * we must hold the top object lock exclusively
2330		 */
2331		if (object_lock_type == OBJECT_LOCK_SHARED) {
2332
2333		        object_lock_type = OBJECT_LOCK_EXCLUSIVE;
2334
2335			if (vm_object_lock_upgrade(object) == FALSE) {
2336			        /*
2337				 * couldn't upgrade, so explictly
2338				 * take the lock exclusively
2339				 */
2340			        vm_object_lock(object);
2341			}
2342		}
2343	}
2344
2345#if	VM_FAULT_CLASSIFY
2346	/*
2347	 *	Temporary data gathering code
2348	 */
2349	vm_fault_classify(object, offset, fault_type);
2350#endif
2351	/*
2352	 *	Fast fault code.  The basic idea is to do as much as
2353	 *	possible while holding the map lock and object locks.
2354	 *      Busy pages are not used until the object lock has to
2355	 *	be dropped to do something (copy, zero fill, pmap enter).
2356	 *	Similarly, paging references aren't acquired until that
2357	 *	point, and object references aren't used.
2358	 *
2359	 *	If we can figure out what to do
2360	 *	(zero fill, copy on write, pmap enter) while holding
2361	 *	the locks, then it gets done.  Otherwise, we give up,
2362	 *	and use the original fault path (which doesn't hold
2363	 *	the map lock, and relies on busy pages).
2364	 *	The give up cases include:
2365	 * 		- Have to talk to pager.
2366	 *		- Page is busy, absent or in error.
2367	 *		- Pager has locked out desired access.
2368	 *		- Fault needs to be restarted.
2369	 *		- Have to push page into copy object.
2370	 *
2371	 *	The code is an infinite loop that moves one level down
2372	 *	the shadow chain each time.  cur_object and cur_offset
2373	 * 	refer to the current object being examined. object and offset
2374	 *	are the original object from the map.  The loop is at the
2375	 *	top level if and only if object and cur_object are the same.
2376	 *
2377	 *	Invariants:  Map lock is held throughout.  Lock is held on
2378	 *		original object and cur_object (if different) when
2379	 *		continuing or exiting loop.
2380	 *
2381	 */
2382
2383
2384	/*
2385	 * If this page is to be inserted in a copy delay object
2386	 * for writing, and if the object has a copy, then the
2387	 * copy delay strategy is implemented in the slow fault page.
2388	 */
2389	if (object->copy_strategy == MEMORY_OBJECT_COPY_DELAY &&
2390	    object->copy != VM_OBJECT_NULL && (fault_type & VM_PROT_WRITE))
2391	        goto handle_copy_delay;
2392
2393	cur_object = object;
2394	cur_offset = offset;
2395
2396	while (TRUE) {
2397		m = vm_page_lookup(cur_object, cur_offset);
2398
2399		if (m != VM_PAGE_NULL) {
2400			if (m->busy) {
2401			        wait_result_t	result;
2402
2403				/*
2404				 * in order to do the PAGE_ASSERT_WAIT, we must
2405				 * have object that 'm' belongs to locked exclusively
2406				 */
2407				if (object != cur_object) {
2408				        vm_object_unlock(object);
2409
2410					if (cur_object_lock_type == OBJECT_LOCK_SHARED) {
2411
2412					        cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE;
2413
2414						if (vm_object_lock_upgrade(cur_object) == FALSE) {
2415						        /*
2416							 * couldn't upgrade so go do a full retry
2417							 * immediately since we've already dropped
2418							 * the top object lock associated with this page
2419							 * and the current one got dropped due to the
2420							 * failed upgrade... the state is no longer valid
2421							 */
2422						        vm_map_unlock_read(map);
2423							if (real_map != map)
2424							        vm_map_unlock(real_map);
2425
2426							goto RetryFault;
2427						}
2428					}
2429				} else if (object_lock_type == OBJECT_LOCK_SHARED) {
2430
2431				        object_lock_type = OBJECT_LOCK_EXCLUSIVE;
2432
2433					if (vm_object_lock_upgrade(object) == FALSE) {
2434					        /*
2435						 * couldn't upgrade, so explictly take the lock
2436						 * exclusively and go relookup the page since we
2437						 * will have dropped the object lock and
2438						 * a different thread could have inserted
2439						 * a page at this offset
2440						 * no need for a full retry since we're
2441						 * at the top level of the object chain
2442						 */
2443					        vm_object_lock(object);
2444
2445						continue;
2446					}
2447				}
2448				vm_map_unlock_read(map);
2449				if (real_map != map)
2450				        vm_map_unlock(real_map);
2451
2452				result = PAGE_ASSERT_WAIT(m, interruptible);
2453
2454				vm_object_unlock(cur_object);
2455
2456				if (result == THREAD_WAITING) {
2457				        result = thread_block(THREAD_CONTINUE_NULL);
2458
2459					counter(c_vm_fault_page_block_busy_kernel++);
2460				}
2461				if (result == THREAD_AWAKENED || result == THREAD_RESTART)
2462				        goto RetryFault;
2463
2464				kr = KERN_ABORTED;
2465				goto done;
2466			}
2467			if (m->phys_page == vm_page_guard_addr) {
2468				/*
2469				 * Guard page: let the slow path deal with it
2470				 */
2471				break;
2472			}
2473			if (m->unusual && (m->error || m->restart || m->private || m->absent)) {
2474			        /*
2475				 * Unusual case... let the slow path deal with it
2476				 */
2477				break;
2478			}
2479			if (m->encrypted) {
2480				/*
2481				 * ENCRYPTED SWAP:
2482				 * We've soft-faulted (because it's not in the page
2483				 * table) on an encrypted page.
2484				 * Keep the page "busy" so that no one messes with
2485				 * it during the decryption.
2486				 * Release the extra locks we're holding, keep only
2487				 * the page's VM object lock.
2488				 *
2489				 * in order to set 'busy' on 'm', we must
2490				 * have object that 'm' belongs to locked exclusively
2491				 */
2492			        if (object != cur_object) {
2493					vm_object_unlock(object);
2494
2495					if (cur_object_lock_type == OBJECT_LOCK_SHARED) {
2496
2497					        cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE;
2498
2499						if (vm_object_lock_upgrade(cur_object) == FALSE) {
2500						        /*
2501							 * couldn't upgrade so go do a full retry
2502							 * immediately since we've already dropped
2503							 * the top object lock associated with this page
2504							 * and the current one got dropped due to the
2505							 * failed upgrade... the state is no longer valid
2506							 */
2507						        vm_map_unlock_read(map);
2508							if (real_map != map)
2509							        vm_map_unlock(real_map);
2510
2511							goto RetryFault;
2512						}
2513					}
2514				} else if (object_lock_type == OBJECT_LOCK_SHARED) {
2515
2516				        object_lock_type = OBJECT_LOCK_EXCLUSIVE;
2517
2518					if (vm_object_lock_upgrade(object) == FALSE) {
2519					        /*
2520						 * couldn't upgrade, so explictly take the lock
2521						 * exclusively and go relookup the page since we
2522						 * will have dropped the object lock and
2523						 * a different thread could have inserted
2524						 * a page at this offset
2525						 * no need for a full retry since we're
2526						 * at the top level of the object chain
2527						 */
2528					        vm_object_lock(object);
2529
2530						continue;
2531					}
2532				}
2533				m->busy = TRUE;
2534
2535				vm_map_unlock_read(map);
2536				if (real_map != map)
2537					vm_map_unlock(real_map);
2538
2539				vm_page_decrypt(m, 0);
2540
2541				assert(m->busy);
2542				PAGE_WAKEUP_DONE(m);
2543
2544				vm_object_unlock(cur_object);
2545				/*
2546				 * Retry from the top, in case anything
2547				 * changed while we were decrypting...
2548				 */
2549				goto RetryFault;
2550			}
2551			ASSERT_PAGE_DECRYPTED(m);
2552
2553			if (VM_FAULT_NEED_CS_VALIDATION(map->pmap, m)) {
2554				/*
2555				 * We might need to validate this page
2556				 * against its code signature, so we
2557				 * want to hold the VM object exclusively.
2558				 */
2559			        if (object != cur_object) {
2560					if (cur_object_lock_type == OBJECT_LOCK_SHARED) {
2561						vm_object_unlock(object);
2562						vm_object_unlock(cur_object);
2563
2564					        cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE;
2565
2566						vm_map_unlock_read(map);
2567						if (real_map != map)
2568							vm_map_unlock(real_map);
2569
2570						goto RetryFault;
2571					}
2572
2573				} else if (object_lock_type == OBJECT_LOCK_SHARED) {
2574
2575				        object_lock_type = OBJECT_LOCK_EXCLUSIVE;
2576
2577					if (vm_object_lock_upgrade(object) == FALSE) {
2578					        /*
2579						 * couldn't upgrade, so explictly take the lock
2580						 * exclusively and go relookup the page since we
2581						 * will have dropped the object lock and
2582						 * a different thread could have inserted
2583						 * a page at this offset
2584						 * no need for a full retry since we're
2585						 * at the top level of the object chain
2586						 */
2587					        vm_object_lock(object);
2588
2589						continue;
2590					}
2591				}
2592			}
2593			/*
2594			 *	Two cases of map in faults:
2595			 *	    - At top level w/o copy object.
2596			 *	    - Read fault anywhere.
2597			 *		--> must disallow write.
2598			 */
2599
2600			if (object == cur_object && object->copy == VM_OBJECT_NULL) {
2601				if ((fault_type & VM_PROT_WRITE) == 0) {
2602					/*
2603					 * This is not a "write" fault, so we
2604					 * might not have taken the object lock
2605					 * exclusively and we might not be able
2606					 * to update the "wpmapped" bit in
2607					 * vm_fault_enter().
2608					 * Let's just grant read access to
2609					 * the page for now and we'll
2610					 * soft-fault again if we need write
2611					 * access later...
2612					 */
2613					prot &= ~VM_PROT_WRITE;
2614				}
2615				goto FastPmapEnter;
2616			}
2617
2618			if ((fault_type & VM_PROT_WRITE) == 0) {
2619
2620				prot &= ~VM_PROT_WRITE;
2621
2622			  	if (object != cur_object) {
2623				        /*
2624					 * We still need to hold the top object
2625					 * lock here to prevent a race between
2626					 * a read fault (taking only "shared"
2627					 * locks) and a write fault (taking
2628					 * an "exclusive" lock on the top
2629					 * object.
2630					 * Otherwise, as soon as we release the
2631					 * top lock, the write fault could
2632					 * proceed and actually complete before
2633					 * the read fault, and the copied page's
2634					 * translation could then be overwritten
2635					 * by the read fault's translation for
2636					 * the original page.
2637					 *
2638					 * Let's just record what the top object
2639					 * is and we'll release it later.
2640					 */
2641					top_object = object;
2642
2643					/*
2644					 * switch to the object that has the new page
2645					 */
2646					object = cur_object;
2647					object_lock_type = cur_object_lock_type;
2648				}
2649FastPmapEnter:
2650				/*
2651				 * prepare for the pmap_enter...
2652				 * object and map are both locked
2653				 * m contains valid data
2654				 * object == m->object
2655				 * cur_object == NULL or it's been unlocked
2656				 * no paging references on either object or cur_object
2657				 */
2658#if	MACH_KDB
2659				if (db_watchpoint_list && (fault_type & VM_PROT_WRITE) == 0)
2660					prot &= ~VM_PROT_WRITE;
2661#endif
2662				if (caller_pmap) {
2663				        kr = vm_fault_enter(m,
2664							    caller_pmap,
2665							    caller_pmap_addr,
2666							    prot,
2667							    wired,
2668							    change_wiring,
2669							    fault_info.no_cache,
2670							    &type_of_fault);
2671				} else {
2672				        kr = vm_fault_enter(m,
2673							    pmap,
2674							    vaddr,
2675							    prot,
2676							    wired,
2677							    change_wiring,
2678							    fault_info.no_cache,
2679							    &type_of_fault);
2680				}
2681
2682				if (top_object != VM_OBJECT_NULL) {
2683					/*
2684					 * It's safe to drop the top object
2685					 * now that we've done our
2686					 * vm_fault_enter().  Any other fault
2687					 * in progress for that virtual
2688					 * address will either find our page
2689					 * and translation or put in a new page
2690					 * and translation.
2691					 */
2692					vm_object_unlock(top_object);
2693					top_object = VM_OBJECT_NULL;
2694				}
2695
2696				if (need_collapse == TRUE)
2697				        vm_object_collapse(object, offset, TRUE);
2698
2699				if (type_of_fault == DBG_PAGEIN_FAULT) {
2700				        /*
2701					 * evaluate access pattern and update state
2702					 * vm_fault_deactivate_behind depends on the
2703					 * state being up to date
2704					 */
2705				        vm_fault_is_sequential(object, cur_offset, fault_info.behavior);
2706
2707					vm_fault_deactivate_behind(object, cur_offset, fault_info.behavior);
2708				}
2709				/*
2710				 * That's it, clean up and return.
2711				 */
2712				if (m->busy)
2713				        PAGE_WAKEUP_DONE(m);
2714
2715				vm_object_unlock(object);
2716
2717				vm_map_unlock_read(map);
2718				if (real_map != map)
2719					vm_map_unlock(real_map);
2720
2721				goto done;
2722			}
2723			/*
2724			 * COPY ON WRITE FAULT
2725			 *
2726			 * If objects match, then
2727			 * object->copy must not be NULL (else control
2728			 * would be in previous code block), and we
2729			 * have a potential push into the copy object
2730			 * with which we can't cope with here.
2731			 */
2732			if (cur_object == object) {
2733			        /*
2734				 * must take the slow path to
2735				 * deal with the copy push
2736				 */
2737				break;
2738			}
2739			assert(object_lock_type == OBJECT_LOCK_EXCLUSIVE);
2740
2741			/*
2742			 * This is now a shadow based copy on write
2743			 * fault -- it requires a copy up the shadow
2744			 * chain.
2745			 *
2746			 * Allocate a page in the original top level
2747			 * object. Give up if allocate fails.  Also
2748			 * need to remember current page, as it's the
2749			 * source of the copy.
2750			 *
2751			 * at this point we hold locks on both
2752			 * object and cur_object... no need to take
2753			 * paging refs or mark pages BUSY since
2754			 * we don't drop either object lock until
2755			 * the page has been copied and inserted
2756			 */
2757			cur_m = m;
2758			m = vm_page_grab();
2759
2760			if (m == VM_PAGE_NULL) {
2761			        /*
2762				 * no free page currently available...
2763				 * must take the slow path
2764				 */
2765				break;
2766			}
2767			/*
2768			 * Now do the copy.  Mark the source page busy...
2769			 *
2770			 *	NOTE: This code holds the map lock across
2771			 *	the page copy.
2772			 */
2773			vm_page_copy(cur_m, m);
2774			vm_page_insert(m, object, offset);
2775			m->dirty = TRUE;
2776
2777			/*
2778			 * Now cope with the source page and object
2779			 */
2780			if (object->ref_count > 1 && cur_m->pmapped)
2781			        pmap_disconnect(cur_m->phys_page);
2782
2783			need_collapse = TRUE;
2784
2785			if (!cur_object->internal &&
2786			    cur_object->copy_strategy == MEMORY_OBJECT_COPY_DELAY) {
2787			        /*
2788				 * The object from which we've just
2789				 * copied a page is most probably backed
2790				 * by a vnode.  We don't want to waste too
2791				 * much time trying to collapse the VM objects
2792				 * and create a bottleneck when several tasks
2793				 * map the same file.
2794				 */
2795			        if (cur_object->copy == object) {
2796				        /*
2797					 * Shared mapping or no COW yet.
2798					 * We can never collapse a copy
2799					 * object into its backing object.
2800					 */
2801				        need_collapse = FALSE;
2802				} else if (cur_object->copy == object->shadow &&
2803					   object->shadow->resident_page_count == 0) {
2804				        /*
2805					 * Shared mapping after a COW occurred.
2806					 */
2807				        need_collapse = FALSE;
2808				}
2809			}
2810			vm_object_unlock(cur_object);
2811
2812			if (need_collapse == FALSE)
2813			        vm_fault_collapse_skipped++;
2814			vm_fault_collapse_total++;
2815
2816			type_of_fault = DBG_COW_FAULT;
2817			VM_STAT_INCR(cow_faults);
2818			DTRACE_VM2(cow_fault, int, 1, (uint64_t *), NULL);
2819			current_task()->cow_faults++;
2820
2821			goto FastPmapEnter;
2822
2823		} else {
2824			/*
2825			 * No page at cur_object, cur_offset... m == NULL
2826			 */
2827			if (cur_object->pager_created) {
2828			        if (MUST_ASK_PAGER(cur_object, cur_offset) == TRUE) {
2829				        /*
2830					 * May have to talk to a pager...
2831					 * take the slow path.
2832					 */
2833				        break;
2834				}
2835				/*
2836				 * existence map present and indicates
2837				 * that the pager doesn't have this page
2838				 */
2839			}
2840			if (cur_object->shadow == VM_OBJECT_NULL) {
2841				/*
2842				 * Zero fill fault.  Page gets
2843				 * inserted into the original object.
2844				 */
2845				if (cur_object->shadow_severed) {
2846
2847					if (object != cur_object)
2848					        vm_object_unlock(cur_object);
2849					vm_object_unlock(object);
2850
2851					vm_map_unlock_read(map);
2852					if (real_map != map)
2853						vm_map_unlock(real_map);
2854
2855					kr = KERN_MEMORY_ERROR;
2856					goto done;
2857				}
2858				if (VM_PAGE_ZFILL_THROTTLED()) {
2859					/*
2860					 * drop all of our locks...
2861					 * wait until the free queue is
2862					 * pumped back up and then
2863					 * redrive the fault
2864					 */
2865					if (object != cur_object)
2866						vm_object_unlock(cur_object);
2867					vm_object_unlock(object);
2868					vm_map_unlock_read(map);
2869					if (real_map != map)
2870						vm_map_unlock(real_map);
2871
2872					if (vm_page_wait((change_wiring) ?
2873							 THREAD_UNINT :
2874							 THREAD_ABORTSAFE))
2875						goto RetryFault;
2876
2877					kr = KERN_ABORTED;
2878					goto done;
2879				}
2880				if (vm_backing_store_low) {
2881				        /*
2882					 * we are protecting the system from
2883					 * backing store exhaustion...
2884					 * must take the slow path if we're
2885					 * not privileged
2886					 */
2887					if (!(current_task()->priv_flags & VM_BACKING_STORE_PRIV))
2888					        break;
2889				}
2890			  	if (cur_object != object) {
2891					vm_object_unlock(cur_object);
2892
2893					cur_object = object;
2894				}
2895				if (object_lock_type == OBJECT_LOCK_SHARED) {
2896
2897				        object_lock_type = OBJECT_LOCK_EXCLUSIVE;
2898
2899					if (vm_object_lock_upgrade(object) == FALSE) {
2900					        /*
2901						 * couldn't upgrade so do a full retry on the fault
2902						 * since we dropped the object lock which
2903						 * could allow another thread to insert
2904						 * a page at this offset
2905						 */
2906					        vm_map_unlock_read(map);
2907						if (real_map != map)
2908						        vm_map_unlock(real_map);
2909
2910						goto RetryFault;
2911					}
2912				}
2913				m = vm_page_alloc(object, offset);
2914
2915				if (m == VM_PAGE_NULL) {
2916				        /*
2917					 * no free page currently available...
2918					 * must take the slow path
2919					 */
2920					break;
2921				}
2922
2923				/*
2924				 * Now zero fill page...
2925				 * the page is probably going to
2926				 * be written soon, so don't bother
2927				 * to clear the modified bit
2928				 *
2929				 *   NOTE: This code holds the map
2930				 *   lock across the zero fill.
2931				 */
2932				type_of_fault = vm_fault_zero_page(m, map->no_zero_fill);
2933
2934				goto FastPmapEnter;
2935		        }
2936			/*
2937			 * On to the next level in the shadow chain
2938			 */
2939			cur_offset += cur_object->shadow_offset;
2940			new_object = cur_object->shadow;
2941
2942			/*
2943			 * take the new_object's lock with the indicated state
2944			 */
2945			if (cur_object_lock_type == OBJECT_LOCK_SHARED)
2946			        vm_object_lock_shared(new_object);
2947			else
2948			        vm_object_lock(new_object);
2949
2950			if (cur_object != object)
2951				vm_object_unlock(cur_object);
2952
2953			cur_object = new_object;
2954
2955			continue;
2956		}
2957	}
2958	/*
2959	 * Cleanup from fast fault failure.  Drop any object
2960	 * lock other than original and drop map lock.
2961	 */
2962	if (object != cur_object)
2963		vm_object_unlock(cur_object);
2964
2965	/*
2966	 * must own the object lock exclusively at this point
2967	 */
2968	if (object_lock_type == OBJECT_LOCK_SHARED) {
2969	        object_lock_type = OBJECT_LOCK_EXCLUSIVE;
2970
2971		if (vm_object_lock_upgrade(object) == FALSE) {
2972		        /*
2973			 * couldn't upgrade, so explictly
2974			 * take the lock exclusively
2975			 * no need to retry the fault at this
2976			 * point since "vm_fault_page" will
2977			 * completely re-evaluate the state
2978			 */
2979		        vm_object_lock(object);
2980		}
2981	}
2982
2983handle_copy_delay:
2984	vm_map_unlock_read(map);
2985	if (real_map != map)
2986		vm_map_unlock(real_map);
2987
2988   	/*
2989	 * Make a reference to this object to
2990	 * prevent its disposal while we are messing with
2991	 * it.  Once we have the reference, the map is free
2992	 * to be diddled.  Since objects reference their
2993	 * shadows (and copies), they will stay around as well.
2994	 */
2995	vm_object_reference_locked(object);
2996	vm_object_paging_begin(object);
2997
2998	XPR(XPR_VM_FAULT,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
2999
3000	error_code = 0;
3001
3002	kr = vm_fault_page(object, offset, fault_type,
3003			   (change_wiring && !wired),
3004			   &prot, &result_page, &top_page,
3005			   &type_of_fault,
3006			   &error_code, map->no_zero_fill,
3007			   FALSE, &fault_info);
3008
3009	/*
3010	 * if kr != VM_FAULT_SUCCESS, then the paging reference
3011	 * has been dropped and the object unlocked... the ref_count
3012	 * is still held
3013	 *
3014	 * if kr == VM_FAULT_SUCCESS, then the paging reference
3015	 * is still held along with the ref_count on the original object
3016	 *
3017	 *	if m != NULL, then the object it belongs to
3018	 *	is returned locked with a paging reference
3019	 *
3020	 *	if top_page != NULL, then it's BUSY and the
3021	 *	object it belongs to has a paging reference
3022	 *	but is returned unlocked
3023	 */
3024	if (kr != VM_FAULT_SUCCESS) {
3025	        /*
3026		 * we didn't succeed, lose the object reference immediately.
3027		 */
3028		vm_object_deallocate(object);
3029
3030		/*
3031		 * See why we failed, and take corrective action.
3032		 */
3033		switch (kr) {
3034		case VM_FAULT_MEMORY_SHORTAGE:
3035			if (vm_page_wait((change_wiring) ?
3036					 THREAD_UNINT :
3037					 THREAD_ABORTSAFE))
3038				goto RetryFault;
3039			/*
3040			 * fall thru
3041			 */
3042		case VM_FAULT_INTERRUPTED:
3043			kr = KERN_ABORTED;
3044			goto done;
3045		case VM_FAULT_RETRY:
3046			goto RetryFault;
3047		case VM_FAULT_MEMORY_ERROR:
3048			if (error_code)
3049				kr = error_code;
3050			else
3051				kr = KERN_MEMORY_ERROR;
3052			goto done;
3053		}
3054	}
3055	m = result_page;
3056
3057	if (m != VM_PAGE_NULL) {
3058		assert((change_wiring && !wired) ?
3059	   	    (top_page == VM_PAGE_NULL) :
3060	   	    ((top_page == VM_PAGE_NULL) == (m->object == object)));
3061	}
3062
3063	/*
3064	 * What to do with the resulting page from vm_fault_page
3065	 * if it doesn't get entered into the physical map:
3066	 */
3067#define RELEASE_PAGE(m)					\
3068	MACRO_BEGIN					\
3069	PAGE_WAKEUP_DONE(m);				\
3070	vm_page_lockspin_queues();			\
3071	if (!m->active && !m->inactive && !m->throttled)\
3072		vm_page_activate(m);			\
3073	vm_page_unlock_queues();			\
3074	MACRO_END
3075
3076	/*
3077	 * We must verify that the maps have not changed
3078	 * since our last lookup.
3079	 */
3080	if (m != VM_PAGE_NULL) {
3081		old_copy_object = m->object->copy;
3082		vm_object_unlock(m->object);
3083	} else
3084		old_copy_object = VM_OBJECT_NULL;
3085
3086	/*
3087	 * no object locks are held at this point
3088	 */
3089	if ((map != original_map) || !vm_map_verify(map, &version)) {
3090		vm_object_t		retry_object;
3091		vm_object_offset_t	retry_offset;
3092		vm_prot_t		retry_prot;
3093
3094		/*
3095		 * To avoid trying to write_lock the map while another
3096		 * thread has it read_locked (in vm_map_pageable), we
3097		 * do not try for write permission.  If the page is
3098		 * still writable, we will get write permission.  If it
3099		 * is not, or has been marked needs_copy, we enter the
3100		 * mapping without write permission, and will merely
3101		 * take another fault.
3102		 */
3103		map = original_map;
3104		vm_map_lock_read(map);
3105
3106		kr = vm_map_lookup_locked(&map, vaddr,
3107					  fault_type & ~VM_PROT_WRITE,
3108					  OBJECT_LOCK_EXCLUSIVE, &version,
3109					  &retry_object, &retry_offset, &retry_prot,
3110					  &wired,
3111					  &fault_info,
3112					  &real_map);
3113		pmap = real_map->pmap;
3114
3115		if (kr != KERN_SUCCESS) {
3116			vm_map_unlock_read(map);
3117
3118			if (m != VM_PAGE_NULL) {
3119			        /*
3120				 * retake the lock so that
3121				 * we can drop the paging reference
3122				 * in vm_fault_cleanup and do the
3123				 * PAGE_WAKEUP_DONE in RELEASE_PAGE
3124				 */
3125				vm_object_lock(m->object);
3126
3127				RELEASE_PAGE(m);
3128
3129				vm_fault_cleanup(m->object, top_page);
3130			} else {
3131			        /*
3132				 * retake the lock so that
3133				 * we can drop the paging reference
3134				 * in vm_fault_cleanup
3135				 */
3136			        vm_object_lock(object);
3137
3138			        vm_fault_cleanup(object, top_page);
3139			}
3140			vm_object_deallocate(object);
3141
3142			goto done;
3143		}
3144		vm_object_unlock(retry_object);
3145
3146		if ((retry_object != object) || (retry_offset != offset)) {
3147
3148			vm_map_unlock_read(map);
3149			if (real_map != map)
3150				vm_map_unlock(real_map);
3151
3152			if (m != VM_PAGE_NULL) {
3153			        /*
3154				 * retake the lock so that
3155				 * we can drop the paging reference
3156				 * in vm_fault_cleanup and do the
3157				 * PAGE_WAKEUP_DONE in RELEASE_PAGE
3158				 */
3159			        vm_object_lock(m->object);
3160
3161				RELEASE_PAGE(m);
3162
3163				vm_fault_cleanup(m->object, top_page);
3164			} else {
3165			        /*
3166				 * retake the lock so that
3167				 * we can drop the paging reference
3168				 * in vm_fault_cleanup
3169				 */
3170			        vm_object_lock(object);
3171
3172			        vm_fault_cleanup(object, top_page);
3173			}
3174			vm_object_deallocate(object);
3175
3176			goto RetryFault;
3177		}
3178		/*
3179		 * Check whether the protection has changed or the object
3180		 * has been copied while we left the map unlocked.
3181		 */
3182		prot &= retry_prot;
3183	}
3184	if (m != VM_PAGE_NULL) {
3185		vm_object_lock(m->object);
3186
3187		if (m->object->copy != old_copy_object) {
3188		        /*
3189			 * The copy object changed while the top-level object
3190			 * was unlocked, so take away write permission.
3191			 */
3192			prot &= ~VM_PROT_WRITE;
3193		}
3194	} else
3195		vm_object_lock(object);
3196
3197	/*
3198	 * If we want to wire down this page, but no longer have
3199	 * adequate permissions, we must start all over.
3200	 */
3201	if (wired && (fault_type != (prot | VM_PROT_WRITE))) {
3202
3203		vm_map_verify_done(map, &version);
3204		if (real_map != map)
3205			vm_map_unlock(real_map);
3206
3207		if (m != VM_PAGE_NULL) {
3208			RELEASE_PAGE(m);
3209
3210			vm_fault_cleanup(m->object, top_page);
3211		} else
3212		        vm_fault_cleanup(object, top_page);
3213
3214		vm_object_deallocate(object);
3215
3216		goto RetryFault;
3217	}
3218	if (m != VM_PAGE_NULL) {
3219		/*
3220		 * Put this page into the physical map.
3221		 * We had to do the unlock above because pmap_enter
3222		 * may cause other faults.  The page may be on
3223		 * the pageout queues.  If the pageout daemon comes
3224		 * across the page, it will remove it from the queues.
3225		 */
3226		if (caller_pmap) {
3227			kr = vm_fault_enter(m,
3228					    caller_pmap,
3229					    caller_pmap_addr,
3230					    prot,
3231					    wired,
3232					    change_wiring,
3233					    fault_info.no_cache,
3234					    &type_of_fault);
3235		} else {
3236			kr = vm_fault_enter(m,
3237					    pmap,
3238					    vaddr,
3239					    prot,
3240					    wired,
3241					    change_wiring,
3242					    fault_info.no_cache,
3243					    &type_of_fault);
3244		}
3245		if (kr != KERN_SUCCESS) {
3246			/* abort this page fault */
3247			vm_map_verify_done(map, &version);
3248			if (real_map != map)
3249				vm_map_unlock(real_map);
3250			PAGE_WAKEUP_DONE(m);
3251			vm_fault_cleanup(m->object, top_page);
3252			vm_object_deallocate(object);
3253			goto done;
3254		}
3255	} else {
3256
3257		vm_map_entry_t		entry;
3258		vm_map_offset_t		laddr;
3259		vm_map_offset_t		ldelta, hdelta;
3260
3261		/*
3262		 * do a pmap block mapping from the physical address
3263		 * in the object
3264		 */
3265
3266#ifdef ppc
3267		/* While we do not worry about execution protection in   */
3268		/* general, certian pages may have instruction execution */
3269		/* disallowed.  We will check here, and if not allowed   */
3270		/* to execute, we return with a protection failure.      */
3271
3272		if ((fault_type & VM_PROT_EXECUTE) &&
3273			(!pmap_eligible_for_execute((ppnum_t)(object->shadow_offset >> 12)))) {
3274
3275			vm_map_verify_done(map, &version);
3276
3277			if (real_map != map)
3278				vm_map_unlock(real_map);
3279
3280			vm_fault_cleanup(object, top_page);
3281			vm_object_deallocate(object);
3282
3283			kr = KERN_PROTECTION_FAILURE;
3284			goto done;
3285		}
3286#endif	/* ppc */
3287
3288		if (real_map != map)
3289			vm_map_unlock(real_map);
3290
3291		if (original_map != map) {
3292			vm_map_unlock_read(map);
3293			vm_map_lock_read(original_map);
3294			map = original_map;
3295		}
3296		real_map = map;
3297
3298		laddr = vaddr;
3299		hdelta = 0xFFFFF000;
3300		ldelta = 0xFFFFF000;
3301
3302		while (vm_map_lookup_entry(map, laddr, &entry)) {
3303			if (ldelta > (laddr - entry->vme_start))
3304				ldelta = laddr - entry->vme_start;
3305			if (hdelta > (entry->vme_end - laddr))
3306				hdelta = entry->vme_end - laddr;
3307			if (entry->is_sub_map) {
3308
3309				laddr = (laddr - entry->vme_start)
3310							+ entry->offset;
3311				vm_map_lock_read(entry->object.sub_map);
3312
3313				if (map != real_map)
3314					vm_map_unlock_read(map);
3315				if (entry->use_pmap) {
3316					vm_map_unlock_read(real_map);
3317					real_map = entry->object.sub_map;
3318				}
3319				map = entry->object.sub_map;
3320
3321			} else {
3322				break;
3323			}
3324		}
3325
3326		if (vm_map_lookup_entry(map, laddr, &entry) &&
3327					(entry->object.vm_object != NULL) &&
3328					(entry->object.vm_object == object)) {
3329
3330			if (caller_pmap) {
3331				/*
3332				 * Set up a block mapped area
3333				 */
3334				pmap_map_block(caller_pmap,
3335					       (addr64_t)(caller_pmap_addr - ldelta),
3336					       (((vm_map_offset_t) (entry->object.vm_object->shadow_offset)) +
3337						entry->offset + (laddr - entry->vme_start) - ldelta) >> 12,
3338					       ((ldelta + hdelta) >> 12), prot,
3339					       (VM_WIMG_MASK & (int)object->wimg_bits), 0);
3340			} else {
3341				/*
3342				 * Set up a block mapped area
3343				 */
3344				pmap_map_block(real_map->pmap,
3345					       (addr64_t)(vaddr - ldelta),
3346					       (((vm_map_offset_t)(entry->object.vm_object->shadow_offset)) +
3347						entry->offset + (laddr - entry->vme_start) - ldelta) >> 12,
3348					       ((ldelta + hdelta) >> 12), prot,
3349					       (VM_WIMG_MASK & (int)object->wimg_bits), 0);
3350			}
3351		}
3352	}
3353
3354	/*
3355	 * Unlock everything, and return
3356	 */
3357	vm_map_verify_done(map, &version);
3358	if (real_map != map)
3359		vm_map_unlock(real_map);
3360
3361	if (m != VM_PAGE_NULL) {
3362		PAGE_WAKEUP_DONE(m);
3363
3364		vm_fault_cleanup(m->object, top_page);
3365	} else
3366	        vm_fault_cleanup(object, top_page);
3367
3368	vm_object_deallocate(object);
3369
3370#undef	RELEASE_PAGE
3371
3372	kr = KERN_SUCCESS;
3373done:
3374	thread_interrupt_level(interruptible_state);
3375
3376	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_END,
3377			      (int)((uint64_t)vaddr >> 32),
3378			      (int)vaddr,
3379			      kr,
3380			      type_of_fault,
3381			      0);
3382
3383	return (kr);
3384}
3385
3386/*
3387 *	vm_fault_wire:
3388 *
3389 *	Wire down a range of virtual addresses in a map.
3390 */
3391kern_return_t
3392vm_fault_wire(
3393	vm_map_t	map,
3394	vm_map_entry_t	entry,
3395	pmap_t		pmap,
3396	vm_map_offset_t	pmap_addr)
3397{
3398
3399	register vm_map_offset_t	va;
3400	register vm_map_offset_t	end_addr = entry->vme_end;
3401	register kern_return_t	rc;
3402
3403	assert(entry->in_transition);
3404
3405	if ((entry->object.vm_object != NULL) &&
3406			!entry->is_sub_map &&
3407			entry->object.vm_object->phys_contiguous) {
3408		return KERN_SUCCESS;
3409	}
3410
3411	/*
3412	 *	Inform the physical mapping system that the
3413	 *	range of addresses may not fault, so that
3414	 *	page tables and such can be locked down as well.
3415	 */
3416
3417	pmap_pageable(pmap, pmap_addr,
3418		pmap_addr + (end_addr - entry->vme_start), FALSE);
3419
3420	/*
3421	 *	We simulate a fault to get the page and enter it
3422	 *	in the physical map.
3423	 */
3424
3425	for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
3426		if ((rc = vm_fault_wire_fast(
3427			map, va, entry, pmap,
3428			pmap_addr + (va - entry->vme_start)
3429			)) != KERN_SUCCESS) {
3430			rc = vm_fault(map, va, VM_PROT_NONE, TRUE,
3431			  	(pmap == kernel_pmap) ?
3432					THREAD_UNINT : THREAD_ABORTSAFE,
3433				pmap, pmap_addr + (va - entry->vme_start));
3434			DTRACE_VM2(softlock, int, 1, (uint64_t *), NULL);
3435		}
3436
3437		if (rc != KERN_SUCCESS) {
3438			struct vm_map_entry	tmp_entry = *entry;
3439
3440			/* unwire wired pages */
3441			tmp_entry.vme_end = va;
3442			vm_fault_unwire(map,
3443				&tmp_entry, FALSE, pmap, pmap_addr);
3444
3445			return rc;
3446		}
3447	}
3448	return KERN_SUCCESS;
3449}
3450
3451/*
3452 *	vm_fault_unwire:
3453 *
3454 *	Unwire a range of virtual addresses in a map.
3455 */
3456void
3457vm_fault_unwire(
3458	vm_map_t	map,
3459	vm_map_entry_t	entry,
3460	boolean_t	deallocate,
3461	pmap_t		pmap,
3462	vm_map_offset_t	pmap_addr)
3463{
3464	register vm_map_offset_t	va;
3465	register vm_map_offset_t	end_addr = entry->vme_end;
3466	vm_object_t		object;
3467	struct vm_object_fault_info fault_info;
3468
3469	object = (entry->is_sub_map)
3470			? VM_OBJECT_NULL : entry->object.vm_object;
3471
3472	/*
3473	 * If it's marked phys_contiguous, then vm_fault_wire() didn't actually
3474	 * do anything since such memory is wired by default.  So we don't have
3475	 * anything to undo here.
3476	 */
3477
3478	if (object != VM_OBJECT_NULL && object->phys_contiguous)
3479		return;
3480
3481	fault_info.interruptible = THREAD_UNINT;
3482	fault_info.behavior = entry->behavior;
3483	fault_info.user_tag = entry->alias;
3484	fault_info.lo_offset = entry->offset;
3485	fault_info.hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
3486	fault_info.no_cache = entry->no_cache;
3487
3488	/*
3489	 *	Since the pages are wired down, we must be able to
3490	 *	get their mappings from the physical map system.
3491	 */
3492
3493	for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
3494
3495		if (object == VM_OBJECT_NULL) {
3496			if (pmap) {
3497				pmap_change_wiring(pmap,
3498						   pmap_addr + (va - entry->vme_start), FALSE);
3499			}
3500			(void) vm_fault(map, va, VM_PROT_NONE,
3501					TRUE, THREAD_UNINT, pmap, pmap_addr);
3502		} else {
3503		 	vm_prot_t	prot;
3504			vm_page_t	result_page;
3505			vm_page_t	top_page;
3506			vm_object_t	result_object;
3507			vm_fault_return_t result;
3508
3509			fault_info.cluster_size = end_addr - va;
3510
3511			do {
3512				prot = VM_PROT_NONE;
3513
3514				vm_object_lock(object);
3515				vm_object_paging_begin(object);
3516				XPR(XPR_VM_FAULT,
3517					"vm_fault_unwire -> vm_fault_page\n",
3518					0,0,0,0,0);
3519			 	result = vm_fault_page(
3520					object,
3521					entry->offset + (va - entry->vme_start),
3522					VM_PROT_NONE, TRUE,
3523					&prot, &result_page, &top_page,
3524					(int *)0,
3525					NULL, map->no_zero_fill,
3526					FALSE, &fault_info);
3527			} while (result == VM_FAULT_RETRY);
3528
3529			/*
3530			 * If this was a mapping to a file on a device that has been forcibly
3531			 * unmounted, then we won't get a page back from vm_fault_page().  Just
3532			 * move on to the next one in case the remaining pages are mapped from
3533			 * different objects.  During a forced unmount, the object is terminated
3534			 * so the alive flag will be false if this happens.  A forced unmount will
3535			 * will occur when an external disk is unplugged before the user does an
3536			 * eject, so we don't want to panic in that situation.
3537			 */
3538
3539			if (result == VM_FAULT_MEMORY_ERROR && !object->alive)
3540				continue;
3541
3542			if (result != VM_FAULT_SUCCESS)
3543				panic("vm_fault_unwire: failure");
3544
3545			result_object = result_page->object;
3546
3547			if ((pmap) && (result_page->phys_page != vm_page_guard_addr)) {
3548				pmap_change_wiring(pmap,
3549						   pmap_addr + (va - entry->vme_start), FALSE);
3550			}
3551			if (deallocate) {
3552				assert(result_page->phys_page !=
3553				       vm_page_fictitious_addr);
3554				pmap_disconnect(result_page->phys_page);
3555				VM_PAGE_FREE(result_page);
3556			} else {
3557				vm_page_lockspin_queues();
3558				vm_page_unwire(result_page);
3559				vm_page_unlock_queues();
3560				PAGE_WAKEUP_DONE(result_page);
3561			}
3562			vm_fault_cleanup(result_object, top_page);
3563		}
3564	}
3565
3566	/*
3567	 *	Inform the physical mapping system that the range
3568	 *	of addresses may fault, so that page tables and
3569	 *	such may be unwired themselves.
3570	 */
3571
3572	pmap_pageable(pmap, pmap_addr,
3573		pmap_addr + (end_addr - entry->vme_start), TRUE);
3574
3575}
3576
3577/*
3578 *	vm_fault_wire_fast:
3579 *
3580 *	Handle common case of a wire down page fault at the given address.
3581 *	If successful, the page is inserted into the associated physical map.
3582 *	The map entry is passed in to avoid the overhead of a map lookup.
3583 *
3584 *	NOTE: the given address should be truncated to the
3585 *	proper page address.
3586 *
3587 *	KERN_SUCCESS is returned if the page fault is handled; otherwise,
3588 *	a standard error specifying why the fault is fatal is returned.
3589 *
3590 *	The map in question must be referenced, and remains so.
3591 *	Caller has a read lock on the map.
3592 *
3593 *	This is a stripped version of vm_fault() for wiring pages.  Anything
3594 *	other than the common case will return KERN_FAILURE, and the caller
3595 *	is expected to call vm_fault().
3596 */
3597kern_return_t
3598vm_fault_wire_fast(
3599	__unused vm_map_t	map,
3600	vm_map_offset_t	va,
3601	vm_map_entry_t	entry,
3602	pmap_t			pmap,
3603	vm_map_offset_t	pmap_addr)
3604{
3605	vm_object_t		object;
3606	vm_object_offset_t	offset;
3607	register vm_page_t	m;
3608	vm_prot_t		prot;
3609	thread_t           	thread = current_thread();
3610	int			type_of_fault;
3611	kern_return_t		kr;
3612
3613	VM_STAT_INCR(faults);
3614
3615	if (thread != THREAD_NULL && thread->task != TASK_NULL)
3616	  thread->task->faults++;
3617
3618/*
3619 *	Recovery actions
3620 */
3621
3622#undef	RELEASE_PAGE
3623#define RELEASE_PAGE(m)	{				\
3624	PAGE_WAKEUP_DONE(m);				\
3625	vm_page_lockspin_queues();			\
3626	vm_page_unwire(m);				\
3627	vm_page_unlock_queues();			\
3628}
3629
3630
3631#undef	UNLOCK_THINGS
3632#define UNLOCK_THINGS	{				\
3633	vm_object_paging_end(object);			   \
3634	vm_object_unlock(object);			   \
3635}
3636
3637#undef	UNLOCK_AND_DEALLOCATE
3638#define UNLOCK_AND_DEALLOCATE	{			\
3639	UNLOCK_THINGS;					\
3640	vm_object_deallocate(object);			\
3641}
3642/*
3643 *	Give up and have caller do things the hard way.
3644 */
3645
3646#define GIVE_UP {					\
3647	UNLOCK_AND_DEALLOCATE;				\
3648	return(KERN_FAILURE);				\
3649}
3650
3651
3652	/*
3653	 *	If this entry is not directly to a vm_object, bail out.
3654	 */
3655	if (entry->is_sub_map)
3656		return(KERN_FAILURE);
3657
3658	/*
3659	 *	Find the backing store object and offset into it.
3660	 */
3661
3662	object = entry->object.vm_object;
3663	offset = (va - entry->vme_start) + entry->offset;
3664	prot = entry->protection;
3665
3666   	/*
3667	 *	Make a reference to this object to prevent its
3668	 *	disposal while we are messing with it.
3669	 */
3670
3671	vm_object_lock(object);
3672	vm_object_reference_locked(object);
3673	vm_object_paging_begin(object);
3674
3675	/*
3676	 *	INVARIANTS (through entire routine):
3677	 *
3678	 *	1)	At all times, we must either have the object
3679	 *		lock or a busy page in some object to prevent
3680	 *		some other thread from trying to bring in
3681	 *		the same page.
3682	 *
3683	 *	2)	Once we have a busy page, we must remove it from
3684	 *		the pageout queues, so that the pageout daemon
3685	 *		will not grab it away.
3686	 *
3687	 */
3688
3689	/*
3690	 *	Look for page in top-level object.  If it's not there or
3691	 *	there's something going on, give up.
3692	 * ENCRYPTED SWAP: use the slow fault path, since we'll need to
3693	 * decrypt the page before wiring it down.
3694	 */
3695	m = vm_page_lookup(object, offset);
3696	if ((m == VM_PAGE_NULL) || (m->busy) || (m->encrypted) ||
3697	    (m->unusual && ( m->error || m->restart || m->absent))) {
3698
3699		GIVE_UP;
3700	}
3701	ASSERT_PAGE_DECRYPTED(m);
3702
3703	if (m->fictitious &&
3704	    m->phys_page == vm_page_guard_addr) {
3705		/*
3706		 * Guard pages are fictitious pages and are never
3707		 * entered into a pmap, so let's say it's been wired...
3708		 */
3709		kr = KERN_SUCCESS;
3710		goto done;
3711	}
3712
3713	/*
3714	 *	Wire the page down now.  All bail outs beyond this
3715	 *	point must unwire the page.
3716	 */
3717
3718	vm_page_lockspin_queues();
3719	vm_page_wire(m);
3720	vm_page_unlock_queues();
3721
3722	/*
3723	 *	Mark page busy for other threads.
3724	 */
3725	assert(!m->busy);
3726	m->busy = TRUE;
3727	assert(!m->absent);
3728
3729	/*
3730	 *	Give up if the page is being written and there's a copy object
3731	 */
3732	if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
3733		RELEASE_PAGE(m);
3734		GIVE_UP;
3735	}
3736
3737	/*
3738	 *	Put this page into the physical map.
3739	 */
3740	type_of_fault = DBG_CACHE_HIT_FAULT;
3741	kr = vm_fault_enter(m,
3742			    pmap,
3743			    pmap_addr,
3744			    prot,
3745			    TRUE,
3746			    FALSE,
3747			    FALSE,
3748			    &type_of_fault);
3749
3750done:
3751	/*
3752	 *	Unlock everything, and return
3753	 */
3754
3755	PAGE_WAKEUP_DONE(m);
3756	UNLOCK_AND_DEALLOCATE;
3757
3758	return kr;
3759
3760}
3761
3762/*
3763 *	Routine:	vm_fault_copy_cleanup
3764 *	Purpose:
3765 *		Release a page used by vm_fault_copy.
3766 */
3767
3768void
3769vm_fault_copy_cleanup(
3770	vm_page_t	page,
3771	vm_page_t	top_page)
3772{
3773	vm_object_t	object = page->object;
3774
3775	vm_object_lock(object);
3776	PAGE_WAKEUP_DONE(page);
3777	vm_page_lockspin_queues();
3778	if (!page->active && !page->inactive && !page->throttled)
3779		vm_page_activate(page);
3780	vm_page_unlock_queues();
3781	vm_fault_cleanup(object, top_page);
3782}
3783
3784void
3785vm_fault_copy_dst_cleanup(
3786	vm_page_t	page)
3787{
3788	vm_object_t	object;
3789
3790	if (page != VM_PAGE_NULL) {
3791		object = page->object;
3792		vm_object_lock(object);
3793		vm_page_lockspin_queues();
3794		vm_page_unwire(page);
3795		vm_page_unlock_queues();
3796		vm_object_paging_end(object);
3797		vm_object_unlock(object);
3798	}
3799}
3800
3801/*
3802 *	Routine:	vm_fault_copy
3803 *
3804 *	Purpose:
3805 *		Copy pages from one virtual memory object to another --
3806 *		neither the source nor destination pages need be resident.
3807 *
3808 *		Before actually copying a page, the version associated with
3809 *		the destination address map wil be verified.
3810 *
3811 *	In/out conditions:
3812 *		The caller must hold a reference, but not a lock, to
3813 *		each of the source and destination objects and to the
3814 *		destination map.
3815 *
3816 *	Results:
3817 *		Returns KERN_SUCCESS if no errors were encountered in
3818 *		reading or writing the data.  Returns KERN_INTERRUPTED if
3819 *		the operation was interrupted (only possible if the
3820 *		"interruptible" argument is asserted).  Other return values
3821 *		indicate a permanent error in copying the data.
3822 *
3823 *		The actual amount of data copied will be returned in the
3824 *		"copy_size" argument.  In the event that the destination map
3825 *		verification failed, this amount may be less than the amount
3826 *		requested.
3827 */
3828kern_return_t
3829vm_fault_copy(
3830	vm_object_t		src_object,
3831	vm_object_offset_t	src_offset,
3832	vm_map_size_t		*copy_size,		/* INOUT */
3833	vm_object_t		dst_object,
3834	vm_object_offset_t	dst_offset,
3835	vm_map_t		dst_map,
3836	vm_map_version_t	 *dst_version,
3837	int			interruptible)
3838{
3839	vm_page_t		result_page;
3840
3841	vm_page_t		src_page;
3842	vm_page_t		src_top_page;
3843	vm_prot_t		src_prot;
3844
3845	vm_page_t		dst_page;
3846	vm_page_t		dst_top_page;
3847	vm_prot_t		dst_prot;
3848
3849	vm_map_size_t		amount_left;
3850	vm_object_t		old_copy_object;
3851	kern_return_t		error = 0;
3852
3853	vm_map_size_t		part_size;
3854	struct vm_object_fault_info fault_info_src;
3855	struct vm_object_fault_info fault_info_dst;
3856
3857	/*
3858	 * In order not to confuse the clustered pageins, align
3859	 * the different offsets on a page boundary.
3860	 */
3861
3862#define	RETURN(x)					\
3863	MACRO_BEGIN					\
3864	*copy_size -= amount_left;			\
3865	MACRO_RETURN(x);				\
3866	MACRO_END
3867
3868	amount_left = *copy_size;
3869
3870	fault_info_src.interruptible = interruptible;
3871	fault_info_src.behavior = VM_BEHAVIOR_SEQUENTIAL;
3872	fault_info_src.user_tag  = 0;
3873	fault_info_src.lo_offset = vm_object_trunc_page(src_offset);
3874	fault_info_src.hi_offset = fault_info_src.lo_offset + amount_left;
3875	fault_info_src.no_cache   = FALSE;
3876
3877	fault_info_dst.interruptible = interruptible;
3878	fault_info_dst.behavior = VM_BEHAVIOR_SEQUENTIAL;
3879	fault_info_dst.user_tag  = 0;
3880	fault_info_dst.lo_offset = vm_object_trunc_page(dst_offset);
3881	fault_info_dst.hi_offset = fault_info_dst.lo_offset + amount_left;
3882	fault_info_dst.no_cache   = FALSE;
3883
3884	do { /* while (amount_left > 0) */
3885		/*
3886		 * There may be a deadlock if both source and destination
3887		 * pages are the same. To avoid this deadlock, the copy must
3888		 * start by getting the destination page in order to apply
3889		 * COW semantics if any.
3890		 */
3891
3892	RetryDestinationFault: ;
3893
3894		dst_prot = VM_PROT_WRITE|VM_PROT_READ;
3895
3896		vm_object_lock(dst_object);
3897		vm_object_paging_begin(dst_object);
3898
3899		fault_info_dst.cluster_size = amount_left;
3900
3901		XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
3902		switch (vm_fault_page(dst_object,
3903				      vm_object_trunc_page(dst_offset),
3904				      VM_PROT_WRITE|VM_PROT_READ,
3905				      FALSE,
3906				      &dst_prot, &dst_page, &dst_top_page,
3907				      (int *)0,
3908				      &error,
3909				      dst_map->no_zero_fill,
3910				      FALSE, &fault_info_dst)) {
3911		case VM_FAULT_SUCCESS:
3912			break;
3913		case VM_FAULT_RETRY:
3914			goto RetryDestinationFault;
3915		case VM_FAULT_MEMORY_SHORTAGE:
3916			if (vm_page_wait(interruptible))
3917				goto RetryDestinationFault;
3918			/* fall thru */
3919		case VM_FAULT_INTERRUPTED:
3920			RETURN(MACH_SEND_INTERRUPTED);
3921		case VM_FAULT_MEMORY_ERROR:
3922			if (error)
3923				return (error);
3924			else
3925				return(KERN_MEMORY_ERROR);
3926		}
3927		assert ((dst_prot & VM_PROT_WRITE) != VM_PROT_NONE);
3928
3929		old_copy_object = dst_page->object->copy;
3930
3931		/*
3932		 * There exists the possiblity that the source and
3933		 * destination page are the same.  But we can't
3934		 * easily determine that now.  If they are the
3935		 * same, the call to vm_fault_page() for the
3936		 * destination page will deadlock.  To prevent this we
3937		 * wire the page so we can drop busy without having
3938		 * the page daemon steal the page.  We clean up the
3939		 * top page  but keep the paging reference on the object
3940		 * holding the dest page so it doesn't go away.
3941		 */
3942
3943		vm_page_lockspin_queues();
3944		vm_page_wire(dst_page);
3945		vm_page_unlock_queues();
3946		PAGE_WAKEUP_DONE(dst_page);
3947		vm_object_unlock(dst_page->object);
3948
3949		if (dst_top_page != VM_PAGE_NULL) {
3950			vm_object_lock(dst_object);
3951			VM_PAGE_FREE(dst_top_page);
3952			vm_object_paging_end(dst_object);
3953			vm_object_unlock(dst_object);
3954		}
3955
3956	RetrySourceFault: ;
3957
3958		if (src_object == VM_OBJECT_NULL) {
3959			/*
3960			 *	No source object.  We will just
3961			 *	zero-fill the page in dst_object.
3962			 */
3963			src_page = VM_PAGE_NULL;
3964			result_page = VM_PAGE_NULL;
3965		} else {
3966			vm_object_lock(src_object);
3967			src_page = vm_page_lookup(src_object,
3968						  vm_object_trunc_page(src_offset));
3969			if (src_page == dst_page) {
3970				src_prot = dst_prot;
3971				result_page = VM_PAGE_NULL;
3972			} else {
3973				src_prot = VM_PROT_READ;
3974				vm_object_paging_begin(src_object);
3975
3976				fault_info_src.cluster_size = amount_left;
3977
3978				XPR(XPR_VM_FAULT,
3979					"vm_fault_copy(2) -> vm_fault_page\n",
3980					0,0,0,0,0);
3981				switch (vm_fault_page(
3982						src_object,
3983						vm_object_trunc_page(src_offset),
3984						VM_PROT_READ, FALSE,
3985						&src_prot,
3986						&result_page, &src_top_page,
3987						(int *)0, &error, FALSE,
3988						FALSE, &fault_info_src)) {
3989
3990				case VM_FAULT_SUCCESS:
3991					break;
3992				case VM_FAULT_RETRY:
3993					goto RetrySourceFault;
3994				case VM_FAULT_MEMORY_SHORTAGE:
3995					if (vm_page_wait(interruptible))
3996						goto RetrySourceFault;
3997					/* fall thru */
3998				case VM_FAULT_INTERRUPTED:
3999					vm_fault_copy_dst_cleanup(dst_page);
4000					RETURN(MACH_SEND_INTERRUPTED);
4001				case VM_FAULT_MEMORY_ERROR:
4002					vm_fault_copy_dst_cleanup(dst_page);
4003					if (error)
4004						return (error);
4005					else
4006						return(KERN_MEMORY_ERROR);
4007				}
4008
4009
4010				assert((src_top_page == VM_PAGE_NULL) ==
4011				       (result_page->object == src_object));
4012			}
4013			assert ((src_prot & VM_PROT_READ) != VM_PROT_NONE);
4014			vm_object_unlock(result_page->object);
4015		}
4016
4017		if (!vm_map_verify(dst_map, dst_version)) {
4018			if (result_page != VM_PAGE_NULL && src_page != dst_page)
4019				vm_fault_copy_cleanup(result_page, src_top_page);
4020			vm_fault_copy_dst_cleanup(dst_page);
4021			break;
4022		}
4023
4024		vm_object_lock(dst_page->object);
4025
4026		if (dst_page->object->copy != old_copy_object) {
4027			vm_object_unlock(dst_page->object);
4028			vm_map_verify_done(dst_map, dst_version);
4029			if (result_page != VM_PAGE_NULL && src_page != dst_page)
4030				vm_fault_copy_cleanup(result_page, src_top_page);
4031			vm_fault_copy_dst_cleanup(dst_page);
4032			break;
4033		}
4034		vm_object_unlock(dst_page->object);
4035
4036		/*
4037		 *	Copy the page, and note that it is dirty
4038		 *	immediately.
4039		 */
4040
4041		if (!page_aligned(src_offset) ||
4042			!page_aligned(dst_offset) ||
4043			!page_aligned(amount_left)) {
4044
4045			vm_object_offset_t	src_po,
4046						dst_po;
4047
4048			src_po = src_offset - vm_object_trunc_page(src_offset);
4049			dst_po = dst_offset - vm_object_trunc_page(dst_offset);
4050
4051			if (dst_po > src_po) {
4052				part_size = PAGE_SIZE - dst_po;
4053			} else {
4054				part_size = PAGE_SIZE - src_po;
4055			}
4056			if (part_size > (amount_left)){
4057				part_size = amount_left;
4058			}
4059
4060			if (result_page == VM_PAGE_NULL) {
4061				vm_page_part_zero_fill(dst_page,
4062							dst_po, part_size);
4063			} else {
4064				vm_page_part_copy(result_page, src_po,
4065					dst_page, dst_po, part_size);
4066				if(!dst_page->dirty){
4067					vm_object_lock(dst_object);
4068					dst_page->dirty = TRUE;
4069					vm_object_unlock(dst_page->object);
4070				}
4071
4072			}
4073		} else {
4074			part_size = PAGE_SIZE;
4075
4076			if (result_page == VM_PAGE_NULL)
4077				vm_page_zero_fill(dst_page);
4078			else{
4079				vm_page_copy(result_page, dst_page);
4080				if(!dst_page->dirty){
4081					vm_object_lock(dst_object);
4082					dst_page->dirty = TRUE;
4083					vm_object_unlock(dst_page->object);
4084				}
4085			}
4086
4087		}
4088
4089		/*
4090		 *	Unlock everything, and return
4091		 */
4092
4093		vm_map_verify_done(dst_map, dst_version);
4094
4095		if (result_page != VM_PAGE_NULL && src_page != dst_page)
4096			vm_fault_copy_cleanup(result_page, src_top_page);
4097		vm_fault_copy_dst_cleanup(dst_page);
4098
4099		amount_left -= part_size;
4100		src_offset += part_size;
4101		dst_offset += part_size;
4102	} while (amount_left > 0);
4103
4104	RETURN(KERN_SUCCESS);
4105#undef	RETURN
4106
4107	/*NOTREACHED*/
4108}
4109
4110#if	VM_FAULT_CLASSIFY
4111/*
4112 *	Temporary statistics gathering support.
4113 */
4114
4115/*
4116 *	Statistics arrays:
4117 */
4118#define VM_FAULT_TYPES_MAX	5
4119#define	VM_FAULT_LEVEL_MAX	8
4120
4121int	vm_fault_stats[VM_FAULT_TYPES_MAX][VM_FAULT_LEVEL_MAX];
4122
4123#define	VM_FAULT_TYPE_ZERO_FILL	0
4124#define	VM_FAULT_TYPE_MAP_IN	1
4125#define	VM_FAULT_TYPE_PAGER	2
4126#define	VM_FAULT_TYPE_COPY	3
4127#define	VM_FAULT_TYPE_OTHER	4
4128
4129
4130void
4131vm_fault_classify(vm_object_t		object,
4132		  vm_object_offset_t	offset,
4133		  vm_prot_t		fault_type)
4134{
4135	int		type, level = 0;
4136	vm_page_t	m;
4137
4138	while (TRUE) {
4139		m = vm_page_lookup(object, offset);
4140		if (m != VM_PAGE_NULL) {
4141		        if (m->busy || m->error || m->restart || m->absent) {
4142				type = VM_FAULT_TYPE_OTHER;
4143				break;
4144			}
4145			if (((fault_type & VM_PROT_WRITE) == 0) ||
4146			    ((level == 0) && object->copy == VM_OBJECT_NULL)) {
4147				type = VM_FAULT_TYPE_MAP_IN;
4148				break;
4149			}
4150			type = VM_FAULT_TYPE_COPY;
4151			break;
4152		}
4153		else {
4154			if (object->pager_created) {
4155				type = VM_FAULT_TYPE_PAGER;
4156				break;
4157			}
4158			if (object->shadow == VM_OBJECT_NULL) {
4159				type = VM_FAULT_TYPE_ZERO_FILL;
4160				break;
4161		        }
4162
4163			offset += object->shadow_offset;
4164			object = object->shadow;
4165			level++;
4166			continue;
4167		}
4168	}
4169
4170	if (level > VM_FAULT_LEVEL_MAX)
4171		level = VM_FAULT_LEVEL_MAX;
4172
4173	vm_fault_stats[type][level] += 1;
4174
4175	return;
4176}
4177
4178/* cleanup routine to call from debugger */
4179
4180void
4181vm_fault_classify_init(void)
4182{
4183	int type, level;
4184
4185	for (type = 0; type < VM_FAULT_TYPES_MAX; type++) {
4186		for (level = 0; level < VM_FAULT_LEVEL_MAX; level++) {
4187			vm_fault_stats[type][level] = 0;
4188		}
4189	}
4190
4191	return;
4192}
4193#endif	/* VM_FAULT_CLASSIFY */
4194
4195
4196extern int cs_validation;
4197
4198void
4199vm_page_validate_cs_mapped(
4200	vm_page_t	page,
4201	const void 	*kaddr)
4202{
4203	vm_object_t		object;
4204	vm_object_offset_t	offset;
4205	kern_return_t		kr;
4206	memory_object_t		pager;
4207	void			*blobs;
4208	boolean_t		validated, tainted;
4209
4210	assert(page->busy);
4211	vm_object_lock_assert_exclusive(page->object);
4212
4213	if (!cs_validation) {
4214		return;
4215	}
4216
4217	if (page->wpmapped && !page->cs_tainted) {
4218		/*
4219		 * This page was mapped for "write" access sometime in the
4220		 * past and could still be modifiable in the future.
4221		 * Consider it tainted.
4222		 * [ If the page was already found to be "tainted", no
4223		 * need to re-validate. ]
4224		 */
4225		page->cs_validated = TRUE;
4226		page->cs_tainted = TRUE;
4227		if (cs_debug) {
4228			printf("CODESIGNING: vm_page_validate_cs: "
4229			       "page %p obj %p off 0x%llx "
4230			       "was modified\n",
4231			       page, page->object, page->offset);
4232		}
4233		vm_cs_validated_dirtied++;
4234	}
4235
4236	if (page->cs_validated) {
4237		return;
4238	}
4239
4240	vm_cs_validates++;
4241
4242	object = page->object;
4243	assert(object->code_signed);
4244	offset = page->offset;
4245
4246	if (!object->alive || object->terminating || object->pager == NULL) {
4247		/*
4248		 * The object is terminating and we don't have its pager
4249		 * so we can't validate the data...
4250		 */
4251		return;
4252	}
4253	/*
4254	 * Since we get here to validate a page that was brought in by
4255	 * the pager, we know that this pager is all setup and ready
4256	 * by now.
4257	 */
4258	assert(!object->internal);
4259	assert(object->pager != NULL);
4260	assert(object->pager_ready);
4261
4262	pager = object->pager;
4263
4264	kr = vnode_pager_get_object_cs_blobs(pager, &blobs);
4265	if (kr != KERN_SUCCESS) {
4266		blobs = NULL;
4267	}
4268
4269	/* verify the SHA1 hash for this page */
4270	validated = cs_validate_page(blobs,
4271				     offset + object->paging_offset,
4272				     (const void *)kaddr,
4273				     &tainted);
4274
4275	page->cs_validated = validated;
4276	if (validated) {
4277		page->cs_tainted = tainted;
4278	}
4279}
4280
4281void
4282vm_page_validate_cs(
4283	vm_page_t	page)
4284{
4285	vm_object_t		object;
4286	vm_object_offset_t	offset;
4287	vm_map_offset_t		koffset;
4288	vm_map_size_t		ksize;
4289	vm_offset_t		kaddr;
4290	kern_return_t		kr;
4291	boolean_t		busy_page;
4292
4293	vm_object_lock_assert_held(page->object);
4294
4295	if (!cs_validation) {
4296		return;
4297	}
4298
4299	if (page->wpmapped && !page->cs_tainted) {
4300		vm_object_lock_assert_exclusive(page->object);
4301
4302		/*
4303		 * This page was mapped for "write" access sometime in the
4304		 * past and could still be modifiable in the future.
4305		 * Consider it tainted.
4306		 * [ If the page was already found to be "tainted", no
4307		 * need to re-validate. ]
4308		 */
4309		page->cs_validated = TRUE;
4310		page->cs_tainted = TRUE;
4311		if (cs_debug) {
4312			printf("CODESIGNING: vm_page_validate_cs: "
4313			       "page %p obj %p off 0x%llx "
4314			       "was modified\n",
4315			       page, page->object, page->offset);
4316		}
4317		vm_cs_validated_dirtied++;
4318	}
4319
4320	if (page->cs_validated) {
4321		return;
4322	}
4323
4324	vm_object_lock_assert_exclusive(page->object);
4325
4326	object = page->object;
4327	assert(object->code_signed);
4328	offset = page->offset;
4329
4330	busy_page = page->busy;
4331	if (!busy_page) {
4332		/* keep page busy while we map (and unlock) the VM object */
4333		page->busy = TRUE;
4334	}
4335
4336	/*
4337	 * Take a paging reference on the VM object
4338	 * to protect it from collapse or bypass,
4339	 * and keep it from disappearing too.
4340	 */
4341	vm_object_paging_begin(object);
4342
4343	/* map the page in the kernel address space */
4344	koffset = 0;
4345	ksize = PAGE_SIZE_64;
4346	kr = vm_paging_map_object(&koffset,
4347				  page,
4348				  object,
4349				  offset,
4350				  &ksize,
4351				  VM_PROT_READ,
4352				  FALSE); /* can't unlock object ! */
4353	if (kr != KERN_SUCCESS) {
4354		panic("vm_page_validate_cs: could not map page: 0x%x\n", kr);
4355	}
4356	kaddr = CAST_DOWN(vm_offset_t, koffset);
4357
4358	/* validate the mapped page */
4359	vm_page_validate_cs_mapped(page, (const void *) kaddr);
4360
4361	assert(page->busy);
4362	assert(object == page->object);
4363	vm_object_lock_assert_exclusive(object);
4364
4365	if (!busy_page) {
4366		PAGE_WAKEUP_DONE(page);
4367	}
4368	if (koffset != 0) {
4369		/* unmap the map from the kernel address space */
4370		vm_paging_unmap_object(object, koffset, koffset + ksize);
4371		koffset = 0;
4372		ksize = 0;
4373		kaddr = 0;
4374	}
4375	vm_object_paging_end(object);
4376}
4377