1/*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
49 *  School of Computer Science
50 *  Carnegie Mellon University
51 *  Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 *	File:	vm/vm_map.c
60 *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
61 *	Date:	1985
62 *
63 *	Virtual memory mapping module.
64 */
65
66#include <task_swapper.h>
67#include <mach_assert.h>
68#include <libkern/OSAtomic.h>
69
70#include <mach/kern_return.h>
71#include <mach/port.h>
72#include <mach/vm_attributes.h>
73#include <mach/vm_param.h>
74#include <mach/vm_behavior.h>
75#include <mach/vm_statistics.h>
76#include <mach/memory_object.h>
77#include <mach/mach_vm.h>
78#include <machine/cpu_capabilities.h>
79#include <mach/sdt.h>
80
81#include <kern/assert.h>
82#include <kern/counters.h>
83#include <kern/kalloc.h>
84#include <kern/zalloc.h>
85
86#include <vm/cpm.h>
87#include <vm/vm_init.h>
88#include <vm/vm_fault.h>
89#include <vm/vm_map.h>
90#include <vm/vm_object.h>
91#include <vm/vm_page.h>
92#include <vm/vm_pageout.h>
93#include <vm/vm_kern.h>
94#include <ipc/ipc_port.h>
95#include <kern/sched_prim.h>
96#include <kern/misc_protos.h>
97#include <kern/xpr.h>
98
99#include <mach/vm_map_server.h>
100#include <mach/mach_host_server.h>
101#include <vm/vm_protos.h>
102#include <vm/vm_purgeable_internal.h>
103
104#include <vm/vm_protos.h>
105#include <vm/vm_shared_region.h>
106#include <vm/vm_map_store.h>
107
108extern u_int32_t random(void);	/* from <libkern/libkern.h> */
109/* Internal prototypes
110 */
111
112static void vm_map_simplify_range(
113	vm_map_t	map,
114	vm_map_offset_t	start,
115	vm_map_offset_t	end);	/* forward */
116
117static boolean_t	vm_map_range_check(
118	vm_map_t	map,
119	vm_map_offset_t	start,
120	vm_map_offset_t	end,
121	vm_map_entry_t	*entry);
122
123static vm_map_entry_t	_vm_map_entry_create(
124	struct vm_map_header	*map_header, boolean_t map_locked);
125
126static void		_vm_map_entry_dispose(
127	struct vm_map_header	*map_header,
128	vm_map_entry_t		entry);
129
130static void		vm_map_pmap_enter(
131	vm_map_t		map,
132	vm_map_offset_t 	addr,
133	vm_map_offset_t		end_addr,
134	vm_object_t 		object,
135	vm_object_offset_t	offset,
136	vm_prot_t		protection);
137
138static void		_vm_map_clip_end(
139	struct vm_map_header	*map_header,
140	vm_map_entry_t		entry,
141	vm_map_offset_t		end);
142
143static void		_vm_map_clip_start(
144	struct vm_map_header	*map_header,
145	vm_map_entry_t		entry,
146	vm_map_offset_t		start);
147
148static void		vm_map_entry_delete(
149	vm_map_t	map,
150	vm_map_entry_t	entry);
151
152static kern_return_t	vm_map_delete(
153	vm_map_t	map,
154	vm_map_offset_t	start,
155	vm_map_offset_t	end,
156	int		flags,
157	vm_map_t	zap_map);
158
159static kern_return_t	vm_map_copy_overwrite_unaligned(
160	vm_map_t	dst_map,
161	vm_map_entry_t	entry,
162	vm_map_copy_t	copy,
163	vm_map_address_t start);
164
165static kern_return_t	vm_map_copy_overwrite_aligned(
166	vm_map_t	dst_map,
167	vm_map_entry_t	tmp_entry,
168	vm_map_copy_t	copy,
169	vm_map_offset_t start,
170	pmap_t		pmap);
171
172static kern_return_t	vm_map_copyin_kernel_buffer(
173	vm_map_t	src_map,
174	vm_map_address_t src_addr,
175	vm_map_size_t	len,
176	boolean_t	src_destroy,
177	vm_map_copy_t	*copy_result);  /* OUT */
178
179static kern_return_t	vm_map_copyout_kernel_buffer(
180	vm_map_t	map,
181	vm_map_address_t *addr,	/* IN/OUT */
182	vm_map_copy_t	copy,
183	boolean_t	overwrite);
184
185static void		vm_map_fork_share(
186	vm_map_t	old_map,
187	vm_map_entry_t	old_entry,
188	vm_map_t	new_map);
189
190static boolean_t	vm_map_fork_copy(
191	vm_map_t	old_map,
192	vm_map_entry_t	*old_entry_p,
193	vm_map_t	new_map);
194
195void		vm_map_region_top_walk(
196	vm_map_entry_t		   entry,
197	vm_region_top_info_t       top);
198
199void		vm_map_region_walk(
200	vm_map_t		   map,
201	vm_map_offset_t		   va,
202	vm_map_entry_t		   entry,
203	vm_object_offset_t	   offset,
204	vm_object_size_t	   range,
205	vm_region_extended_info_t  extended,
206	boolean_t		   look_for_pages);
207
208static kern_return_t	vm_map_wire_nested(
209	vm_map_t		   map,
210	vm_map_offset_t		   start,
211	vm_map_offset_t		   end,
212	vm_prot_t		   access_type,
213	boolean_t		   user_wire,
214	pmap_t			   map_pmap,
215	vm_map_offset_t		   pmap_addr);
216
217static kern_return_t	vm_map_unwire_nested(
218	vm_map_t		   map,
219	vm_map_offset_t		   start,
220	vm_map_offset_t		   end,
221	boolean_t		   user_wire,
222	pmap_t			   map_pmap,
223	vm_map_offset_t		   pmap_addr);
224
225static kern_return_t	vm_map_overwrite_submap_recurse(
226	vm_map_t		   dst_map,
227	vm_map_offset_t		   dst_addr,
228	vm_map_size_t		   dst_size);
229
230static kern_return_t	vm_map_copy_overwrite_nested(
231	vm_map_t		   dst_map,
232	vm_map_offset_t		   dst_addr,
233	vm_map_copy_t		   copy,
234	boolean_t		   interruptible,
235	pmap_t			   pmap,
236	boolean_t		   discard_on_success);
237
238static kern_return_t	vm_map_remap_extract(
239	vm_map_t		map,
240	vm_map_offset_t		addr,
241	vm_map_size_t		size,
242	boolean_t		copy,
243	struct vm_map_header 	*map_header,
244	vm_prot_t		*cur_protection,
245	vm_prot_t		*max_protection,
246	vm_inherit_t		inheritance,
247	boolean_t		pageable);
248
249static kern_return_t	vm_map_remap_range_allocate(
250	vm_map_t		map,
251	vm_map_address_t	*address,
252	vm_map_size_t		size,
253	vm_map_offset_t		mask,
254	int			flags,
255	vm_map_entry_t		*map_entry);
256
257static void		vm_map_region_look_for_page(
258	vm_map_t		   map,
259	vm_map_offset_t            va,
260	vm_object_t		   object,
261	vm_object_offset_t	   offset,
262	int                        max_refcnt,
263	int                        depth,
264	vm_region_extended_info_t  extended);
265
266static int		vm_map_region_count_obj_refs(
267	vm_map_entry_t    	   entry,
268	vm_object_t       	   object);
269
270
271static kern_return_t	vm_map_willneed(
272	vm_map_t	map,
273	vm_map_offset_t	start,
274	vm_map_offset_t	end);
275
276static kern_return_t	vm_map_reuse_pages(
277	vm_map_t	map,
278	vm_map_offset_t	start,
279	vm_map_offset_t	end);
280
281static kern_return_t	vm_map_reusable_pages(
282	vm_map_t	map,
283	vm_map_offset_t	start,
284	vm_map_offset_t	end);
285
286static kern_return_t	vm_map_can_reuse(
287	vm_map_t	map,
288	vm_map_offset_t	start,
289	vm_map_offset_t	end);
290
291
292/*
293 * Macros to copy a vm_map_entry. We must be careful to correctly
294 * manage the wired page count. vm_map_entry_copy() creates a new
295 * map entry to the same memory - the wired count in the new entry
296 * must be set to zero. vm_map_entry_copy_full() creates a new
297 * entry that is identical to the old entry.  This preserves the
298 * wire count; it's used for map splitting and zone changing in
299 * vm_map_copyout.
300 */
301
302#define vm_map_entry_copy(NEW,OLD)	\
303MACRO_BEGIN				\
304boolean_t _vmec_reserved = (NEW)->from_reserved_zone;	\
305	*(NEW) = *(OLD);                \
306	(NEW)->is_shared = FALSE;	\
307	(NEW)->needs_wakeup = FALSE;    \
308	(NEW)->in_transition = FALSE;   \
309	(NEW)->wired_count = 0;         \
310	(NEW)->user_wired_count = 0;    \
311	(NEW)->permanent = FALSE;	\
312	(NEW)->used_for_jit = FALSE;	\
313	(NEW)->from_reserved_zone = _vmec_reserved;			\
314MACRO_END
315
316#define vm_map_entry_copy_full(NEW,OLD)			\
317MACRO_BEGIN						\
318boolean_t _vmecf_reserved = (NEW)->from_reserved_zone;	\
319(*(NEW) = *(OLD));					\
320(NEW)->from_reserved_zone = _vmecf_reserved;			\
321MACRO_END
322
323/*
324 *	Decide if we want to allow processes to execute from their data or stack areas.
325 *	override_nx() returns true if we do.  Data/stack execution can be enabled independently
326 *	for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
327 *	or allow_stack_exec to enable data execution for that type of data area for that particular
328 *	ABI (or both by or'ing the flags together).  These are initialized in the architecture
329 *	specific pmap files since the default behavior varies according to architecture.  The
330 *	main reason it varies is because of the need to provide binary compatibility with old
331 *	applications that were written before these restrictions came into being.  In the old
332 *	days, an app could execute anything it could read, but this has slowly been tightened
333 *	up over time.  The default behavior is:
334 *
335 *	32-bit PPC apps		may execute from both stack and data areas
336 *	32-bit Intel apps	may exeucte from data areas but not stack
337 *	64-bit PPC/Intel apps	may not execute from either data or stack
338 *
339 *	An application on any architecture may override these defaults by explicitly
340 *	adding PROT_EXEC permission to the page in question with the mprotect(2)
341 *	system call.  This code here just determines what happens when an app tries to
342 * 	execute from a page that lacks execute permission.
343 *
344 *	Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
345 *	default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
346 *	a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
347 *	execution from data areas for a particular binary even if the arch normally permits it. As
348 *	a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
349 *	to support some complicated use cases, notably browsers with out-of-process plugins that
350 *	are not all NX-safe.
351 */
352
353extern int allow_data_exec, allow_stack_exec;
354
355int
356override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
357{
358	int current_abi;
359
360	/*
361	 * Determine if the app is running in 32 or 64 bit mode.
362	 */
363
364	if (vm_map_is_64bit(map))
365		current_abi = VM_ABI_64;
366	else
367		current_abi = VM_ABI_32;
368
369	/*
370	 * Determine if we should allow the execution based on whether it's a
371	 * stack or data area and the current architecture.
372	 */
373
374	if (user_tag == VM_MEMORY_STACK)
375		return allow_stack_exec & current_abi;
376
377	return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
378}
379
380
381/*
382 *	Virtual memory maps provide for the mapping, protection,
383 *	and sharing of virtual memory objects.  In addition,
384 *	this module provides for an efficient virtual copy of
385 *	memory from one map to another.
386 *
387 *	Synchronization is required prior to most operations.
388 *
389 *	Maps consist of an ordered doubly-linked list of simple
390 *	entries; a single hint is used to speed up lookups.
391 *
392 *	Sharing maps have been deleted from this version of Mach.
393 *	All shared objects are now mapped directly into the respective
394 *	maps.  This requires a change in the copy on write strategy;
395 *	the asymmetric (delayed) strategy is used for shared temporary
396 *	objects instead of the symmetric (shadow) strategy.  All maps
397 *	are now "top level" maps (either task map, kernel map or submap
398 *	of the kernel map).
399 *
400 *	Since portions of maps are specified by start/end addreses,
401 *	which may not align with existing map entries, all
402 *	routines merely "clip" entries to these start/end values.
403 *	[That is, an entry is split into two, bordering at a
404 *	start or end value.]  Note that these clippings may not
405 *	always be necessary (as the two resulting entries are then
406 *	not changed); however, the clipping is done for convenience.
407 *	No attempt is currently made to "glue back together" two
408 *	abutting entries.
409 *
410 *	The symmetric (shadow) copy strategy implements virtual copy
411 *	by copying VM object references from one map to
412 *	another, and then marking both regions as copy-on-write.
413 *	It is important to note that only one writeable reference
414 *	to a VM object region exists in any map when this strategy
415 *	is used -- this means that shadow object creation can be
416 *	delayed until a write operation occurs.  The symmetric (delayed)
417 *	strategy allows multiple maps to have writeable references to
418 *	the same region of a vm object, and hence cannot delay creating
419 *	its copy objects.  See vm_object_copy_quickly() in vm_object.c.
420 *	Copying of permanent objects is completely different; see
421 *	vm_object_copy_strategically() in vm_object.c.
422 */
423
424static zone_t	vm_map_zone;		/* zone for vm_map structures */
425static zone_t	vm_map_entry_zone;	/* zone for vm_map_entry structures */
426static zone_t	vm_map_entry_reserved_zone;	/* zone with reserve for non-blocking
427					 * allocations */
428static zone_t	vm_map_copy_zone;	/* zone for vm_map_copy structures */
429
430
431/*
432 *	Placeholder object for submap operations.  This object is dropped
433 *	into the range by a call to vm_map_find, and removed when
434 *	vm_map_submap creates the submap.
435 */
436
437vm_object_t	vm_submap_object;
438
439static void		*map_data;
440static vm_size_t	map_data_size;
441static void		*kentry_data;
442static vm_size_t	kentry_data_size;
443
444#if CONFIG_EMBEDDED
445#define		NO_COALESCE_LIMIT  0
446#else
447#define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
448#endif
449
450/* Skip acquiring locks if we're in the midst of a kernel core dump */
451unsigned int not_in_kdp = 1;
452
453unsigned int vm_map_set_cache_attr_count = 0;
454
455kern_return_t
456vm_map_set_cache_attr(
457	vm_map_t	map,
458	vm_map_offset_t	va)
459{
460	vm_map_entry_t	map_entry;
461	vm_object_t	object;
462	kern_return_t	kr = KERN_SUCCESS;
463
464	vm_map_lock_read(map);
465
466	if (!vm_map_lookup_entry(map, va, &map_entry) ||
467	    map_entry->is_sub_map) {
468		/*
469		 * that memory is not properly mapped
470		 */
471		kr = KERN_INVALID_ARGUMENT;
472		goto done;
473	}
474	object = map_entry->object.vm_object;
475
476	if (object == VM_OBJECT_NULL) {
477		/*
478		 * there should be a VM object here at this point
479		 */
480		kr = KERN_INVALID_ARGUMENT;
481		goto done;
482	}
483	vm_object_lock(object);
484	object->set_cache_attr = TRUE;
485	vm_object_unlock(object);
486
487	vm_map_set_cache_attr_count++;
488done:
489	vm_map_unlock_read(map);
490
491	return kr;
492}
493
494
495#if CONFIG_CODE_DECRYPTION
496/*
497 * vm_map_apple_protected:
498 * This remaps the requested part of the object with an object backed by
499 * the decrypting pager.
500 * crypt_info contains entry points and session data for the crypt module.
501 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
502 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
503 */
504kern_return_t
505vm_map_apple_protected(
506	vm_map_t	map,
507	vm_map_offset_t	start,
508	vm_map_offset_t	end,
509	struct pager_crypt_info *crypt_info)
510{
511	boolean_t	map_locked;
512	kern_return_t	kr;
513	vm_map_entry_t	map_entry;
514	memory_object_t	protected_mem_obj;
515	vm_object_t	protected_object;
516	vm_map_offset_t	map_addr;
517
518	vm_map_lock_read(map);
519	map_locked = TRUE;
520
521	/* lookup the protected VM object */
522	if (!vm_map_lookup_entry(map,
523				 start,
524				 &map_entry) ||
525	    map_entry->vme_end < end ||
526	    map_entry->is_sub_map) {
527		/* that memory is not properly mapped */
528		kr = KERN_INVALID_ARGUMENT;
529		goto done;
530	}
531	protected_object = map_entry->object.vm_object;
532	if (protected_object == VM_OBJECT_NULL) {
533		/* there should be a VM object here at this point */
534		kr = KERN_INVALID_ARGUMENT;
535		goto done;
536	}
537
538	/* make sure protected object stays alive while map is unlocked */
539	vm_object_reference(protected_object);
540
541	vm_map_unlock_read(map);
542	map_locked = FALSE;
543
544	/*
545	 * Lookup (and create if necessary) the protected memory object
546	 * matching that VM object.
547	 * If successful, this also grabs a reference on the memory object,
548	 * to guarantee that it doesn't go away before we get a chance to map
549	 * it.
550	 */
551	protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
552
553	/* release extra ref on protected object */
554	vm_object_deallocate(protected_object);
555
556	if (protected_mem_obj == NULL) {
557		kr = KERN_FAILURE;
558		goto done;
559	}
560
561	/* map this memory object in place of the current one */
562	map_addr = start;
563	kr = vm_map_enter_mem_object(map,
564				     &map_addr,
565				     end - start,
566				     (mach_vm_offset_t) 0,
567				     VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
568				     (ipc_port_t) protected_mem_obj,
569				     (map_entry->offset +
570				      (start - map_entry->vme_start)),
571				     TRUE,
572				     map_entry->protection,
573				     map_entry->max_protection,
574				     map_entry->inheritance);
575	assert(map_addr == start);
576	/*
577	 * Release the reference obtained by apple_protect_pager_setup().
578	 * The mapping (if it succeeded) is now holding a reference on the
579	 * memory object.
580	 */
581	memory_object_deallocate(protected_mem_obj);
582
583done:
584	if (map_locked) {
585		vm_map_unlock_read(map);
586	}
587	return kr;
588}
589#endif	/* CONFIG_CODE_DECRYPTION */
590
591
592lck_grp_t		vm_map_lck_grp;
593lck_grp_attr_t	vm_map_lck_grp_attr;
594lck_attr_t		vm_map_lck_attr;
595
596
597/*
598 *	vm_map_init:
599 *
600 *	Initialize the vm_map module.  Must be called before
601 *	any other vm_map routines.
602 *
603 *	Map and entry structures are allocated from zones -- we must
604 *	initialize those zones.
605 *
606 *	There are three zones of interest:
607 *
608 *	vm_map_zone:		used to allocate maps.
609 *	vm_map_entry_zone:	used to allocate map entries.
610 *	vm_map_entry_reserved_zone:	fallback zone for kernel map entries
611 *
612 *	The kernel allocates map entries from a special zone that is initially
613 *	"crammed" with memory.  It would be difficult (perhaps impossible) for
614 *	the kernel to allocate more memory to a entry zone when it became
615 *	empty since the very act of allocating memory implies the creation
616 *	of a new entry.
617 */
618void
619vm_map_init(
620	void)
621{
622	vm_size_t entry_zone_alloc_size;
623	const char *mez_name = "VM map entries";
624
625	vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
626			    PAGE_SIZE, "maps");
627	zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
628#if	defined(__LP64__)
629	entry_zone_alloc_size = PAGE_SIZE * 5;
630#else
631	entry_zone_alloc_size = PAGE_SIZE * 6;
632#endif
633	vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
634				  1024*1024, entry_zone_alloc_size,
635				  mez_name);
636	zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
637	zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
638	zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
639
640	vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
641				   kentry_data_size * 64, kentry_data_size,
642				   "Reserved VM map entries");
643	zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
644
645	vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
646				 16*1024, PAGE_SIZE, "VM map copies");
647	zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
648
649	/*
650	 *	Cram the map and kentry zones with initial data.
651	 *	Set reserved_zone non-collectible to aid zone_gc().
652	 */
653	zone_change(vm_map_zone, Z_COLLECT, FALSE);
654
655	zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
656	zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
657	zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
658	zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
659	zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
660	zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
661	zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
662
663	zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
664	zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
665
666	lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
667	lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
668	lck_attr_setdefault(&vm_map_lck_attr);
669
670#if CONFIG_FREEZE
671	default_freezer_init();
672#endif /* CONFIG_FREEZE */
673}
674
675void
676vm_map_steal_memory(
677	void)
678{
679	uint32_t kentry_initial_pages;
680
681	map_data_size = round_page(10 * sizeof(struct _vm_map));
682	map_data = pmap_steal_memory(map_data_size);
683
684	/*
685	 * kentry_initial_pages corresponds to the number of kernel map entries
686	 * required during bootstrap until the asynchronous replenishment
687	 * scheme is activated and/or entries are available from the general
688	 * map entry pool.
689	 */
690#if	defined(__LP64__)
691	kentry_initial_pages = 10;
692#else
693	kentry_initial_pages = 6;
694#endif
695
696#if CONFIG_GZALLOC
697	/* If using the guard allocator, reserve more memory for the kernel
698	 * reserved map entry pool.
699	*/
700	if (gzalloc_enabled())
701		kentry_initial_pages *= 1024;
702#endif
703
704	kentry_data_size = kentry_initial_pages * PAGE_SIZE;
705	kentry_data = pmap_steal_memory(kentry_data_size);
706}
707
708void vm_kernel_reserved_entry_init(void) {
709	zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
710}
711
712/*
713 *	vm_map_create:
714 *
715 *	Creates and returns a new empty VM map with
716 *	the given physical map structure, and having
717 *	the given lower and upper address bounds.
718 */
719vm_map_t
720vm_map_create(
721	pmap_t			pmap,
722	vm_map_offset_t	min,
723	vm_map_offset_t	max,
724	boolean_t		pageable)
725{
726	static int		color_seed = 0;
727	register vm_map_t	result;
728
729	result = (vm_map_t) zalloc(vm_map_zone);
730	if (result == VM_MAP_NULL)
731		panic("vm_map_create");
732
733	vm_map_first_entry(result) = vm_map_to_entry(result);
734	vm_map_last_entry(result)  = vm_map_to_entry(result);
735	result->hdr.nentries = 0;
736	result->hdr.entries_pageable = pageable;
737
738	vm_map_store_init( &(result->hdr) );
739
740	result->size = 0;
741	result->user_wire_limit = MACH_VM_MAX_ADDRESS;	/* default limit is unlimited */
742	result->user_wire_size  = 0;
743	result->ref_count = 1;
744#if	TASK_SWAPPER
745	result->res_count = 1;
746	result->sw_state = MAP_SW_IN;
747#endif	/* TASK_SWAPPER */
748	result->pmap = pmap;
749	result->min_offset = min;
750	result->max_offset = max;
751	result->wiring_required = FALSE;
752	result->no_zero_fill = FALSE;
753	result->mapped_in_other_pmaps = FALSE;
754	result->wait_for_space = FALSE;
755	result->switch_protect = FALSE;
756	result->disable_vmentry_reuse = FALSE;
757	result->map_disallow_data_exec = FALSE;
758	result->highest_entry_end = 0;
759	result->first_free = vm_map_to_entry(result);
760	result->hint = vm_map_to_entry(result);
761	result->color_rr = (color_seed++) & vm_color_mask;
762 	result->jit_entry_exists = FALSE;
763#if CONFIG_FREEZE
764	result->default_freezer_handle = NULL;
765#endif
766	vm_map_lock_init(result);
767	lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
768
769	return(result);
770}
771
772/*
773 *	vm_map_entry_create:	[ internal use only ]
774 *
775 *	Allocates a VM map entry for insertion in the
776 *	given map (or map copy).  No fields are filled.
777 */
778#define	vm_map_entry_create(map, map_locked)	_vm_map_entry_create(&(map)->hdr, map_locked)
779
780#define	vm_map_copy_entry_create(copy, map_locked)					\
781	_vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
782unsigned reserved_zalloc_count, nonreserved_zalloc_count;
783
784static vm_map_entry_t
785_vm_map_entry_create(
786	struct vm_map_header	*map_header, boolean_t __unused map_locked)
787{
788	zone_t	zone;
789	vm_map_entry_t	entry;
790
791	zone = vm_map_entry_zone;
792
793	assert(map_header->entries_pageable ? !map_locked : TRUE);
794
795	if (map_header->entries_pageable) {
796		entry = (vm_map_entry_t) zalloc(zone);
797	}
798	else {
799		entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
800
801		if (entry == VM_MAP_ENTRY_NULL) {
802			zone = vm_map_entry_reserved_zone;
803			entry = (vm_map_entry_t) zalloc(zone);
804			OSAddAtomic(1, &reserved_zalloc_count);
805		} else
806			OSAddAtomic(1, &nonreserved_zalloc_count);
807	}
808
809	if (entry == VM_MAP_ENTRY_NULL)
810		panic("vm_map_entry_create");
811	entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
812
813	vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
814#if	MAP_ENTRY_CREATION_DEBUG
815	fastbacktrace(&entry->vme_bt[0], (sizeof(entry->vme_bt)/sizeof(uintptr_t)));
816#endif
817	return(entry);
818}
819
820/*
821 *	vm_map_entry_dispose:	[ internal use only ]
822 *
823 *	Inverse of vm_map_entry_create.
824 *
825 * 	write map lock held so no need to
826 *	do anything special to insure correctness
827 * 	of the stores
828 */
829#define	vm_map_entry_dispose(map, entry)			\
830	_vm_map_entry_dispose(&(map)->hdr, (entry))
831
832#define	vm_map_copy_entry_dispose(map, entry) \
833	_vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
834
835static void
836_vm_map_entry_dispose(
837	register struct vm_map_header	*map_header,
838	register vm_map_entry_t		entry)
839{
840	register zone_t		zone;
841
842	if (map_header->entries_pageable || !(entry->from_reserved_zone))
843		zone = vm_map_entry_zone;
844	else
845		zone = vm_map_entry_reserved_zone;
846
847	if (!map_header->entries_pageable) {
848		if (zone == vm_map_entry_zone)
849			OSAddAtomic(-1, &nonreserved_zalloc_count);
850		else
851			OSAddAtomic(-1, &reserved_zalloc_count);
852	}
853
854	zfree(zone, entry);
855}
856
857#if MACH_ASSERT
858static boolean_t first_free_check = FALSE;
859boolean_t
860first_free_is_valid(
861	vm_map_t	map)
862{
863	if (!first_free_check)
864		return TRUE;
865
866	return( first_free_is_valid_store( map ));
867}
868#endif /* MACH_ASSERT */
869
870
871#define vm_map_copy_entry_link(copy, after_where, entry)		\
872	_vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
873
874#define vm_map_copy_entry_unlink(copy, entry)				\
875	_vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
876
877#if	MACH_ASSERT && TASK_SWAPPER
878/*
879 *	vm_map_res_reference:
880 *
881 *	Adds another valid residence count to the given map.
882 *
883 *	Map is locked so this function can be called from
884 *	vm_map_swapin.
885 *
886 */
887void vm_map_res_reference(register vm_map_t map)
888{
889	/* assert map is locked */
890	assert(map->res_count >= 0);
891	assert(map->ref_count >= map->res_count);
892	if (map->res_count == 0) {
893		lck_mtx_unlock(&map->s_lock);
894		vm_map_lock(map);
895		vm_map_swapin(map);
896		lck_mtx_lock(&map->s_lock);
897		++map->res_count;
898		vm_map_unlock(map);
899	} else
900		++map->res_count;
901}
902
903/*
904 *	vm_map_reference_swap:
905 *
906 *	Adds valid reference and residence counts to the given map.
907 *
908 *	The map may not be in memory (i.e. zero residence count).
909 *
910 */
911void vm_map_reference_swap(register vm_map_t map)
912{
913	assert(map != VM_MAP_NULL);
914	lck_mtx_lock(&map->s_lock);
915	assert(map->res_count >= 0);
916	assert(map->ref_count >= map->res_count);
917	map->ref_count++;
918	vm_map_res_reference(map);
919	lck_mtx_unlock(&map->s_lock);
920}
921
922/*
923 *	vm_map_res_deallocate:
924 *
925 *	Decrement residence count on a map; possibly causing swapout.
926 *
927 *	The map must be in memory (i.e. non-zero residence count).
928 *
929 *	The map is locked, so this function is callable from vm_map_deallocate.
930 *
931 */
932void vm_map_res_deallocate(register vm_map_t map)
933{
934	assert(map->res_count > 0);
935	if (--map->res_count == 0) {
936		lck_mtx_unlock(&map->s_lock);
937		vm_map_lock(map);
938		vm_map_swapout(map);
939		vm_map_unlock(map);
940		lck_mtx_lock(&map->s_lock);
941	}
942	assert(map->ref_count >= map->res_count);
943}
944#endif	/* MACH_ASSERT && TASK_SWAPPER */
945
946/*
947 *	vm_map_destroy:
948 *
949 *	Actually destroy a map.
950 */
951void
952vm_map_destroy(
953	vm_map_t	map,
954	int		flags)
955{
956	vm_map_lock(map);
957
958	/* clean up regular map entries */
959	(void) vm_map_delete(map, map->min_offset, map->max_offset,
960			     flags, VM_MAP_NULL);
961	/* clean up leftover special mappings (commpage, etc...) */
962	(void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
963			     flags, VM_MAP_NULL);
964
965#if CONFIG_FREEZE
966	if (map->default_freezer_handle) {
967		default_freezer_handle_deallocate(map->default_freezer_handle);
968		map->default_freezer_handle = NULL;
969	}
970#endif
971	vm_map_unlock(map);
972
973	assert(map->hdr.nentries == 0);
974
975	if(map->pmap)
976		pmap_destroy(map->pmap);
977
978	zfree(vm_map_zone, map);
979}
980
981#if	TASK_SWAPPER
982/*
983 * vm_map_swapin/vm_map_swapout
984 *
985 * Swap a map in and out, either referencing or releasing its resources.
986 * These functions are internal use only; however, they must be exported
987 * because they may be called from macros, which are exported.
988 *
989 * In the case of swapout, there could be races on the residence count,
990 * so if the residence count is up, we return, assuming that a
991 * vm_map_deallocate() call in the near future will bring us back.
992 *
993 * Locking:
994 *	-- We use the map write lock for synchronization among races.
995 *	-- The map write lock, and not the simple s_lock, protects the
996 *	   swap state of the map.
997 *	-- If a map entry is a share map, then we hold both locks, in
998 *	   hierarchical order.
999 *
1000 * Synchronization Notes:
1001 *	1) If a vm_map_swapin() call happens while swapout in progress, it
1002 *	will block on the map lock and proceed when swapout is through.
1003 *	2) A vm_map_reference() call at this time is illegal, and will
1004 *	cause a panic.  vm_map_reference() is only allowed on resident
1005 *	maps, since it refuses to block.
1006 *	3) A vm_map_swapin() call during a swapin will block, and
1007 *	proceeed when the first swapin is done, turning into a nop.
1008 *	This is the reason the res_count is not incremented until
1009 *	after the swapin is complete.
1010 *	4) There is a timing hole after the checks of the res_count, before
1011 *	the map lock is taken, during which a swapin may get the lock
1012 *	before a swapout about to happen.  If this happens, the swapin
1013 *	will detect the state and increment the reference count, causing
1014 *	the swapout to be a nop, thereby delaying it until a later
1015 *	vm_map_deallocate.  If the swapout gets the lock first, then
1016 *	the swapin will simply block until the swapout is done, and
1017 *	then proceed.
1018 *
1019 * Because vm_map_swapin() is potentially an expensive operation, it
1020 * should be used with caution.
1021 *
1022 * Invariants:
1023 *	1) A map with a residence count of zero is either swapped, or
1024 *	   being swapped.
1025 *	2) A map with a non-zero residence count is either resident,
1026 *	   or being swapped in.
1027 */
1028
1029int vm_map_swap_enable = 1;
1030
1031void vm_map_swapin (vm_map_t map)
1032{
1033	register vm_map_entry_t entry;
1034
1035	if (!vm_map_swap_enable)	/* debug */
1036		return;
1037
1038	/*
1039	 * Map is locked
1040	 * First deal with various races.
1041	 */
1042	if (map->sw_state == MAP_SW_IN)
1043		/*
1044		 * we raced with swapout and won.  Returning will incr.
1045		 * the res_count, turning the swapout into a nop.
1046		 */
1047		return;
1048
1049	/*
1050	 * The residence count must be zero.  If we raced with another
1051	 * swapin, the state would have been IN; if we raced with a
1052	 * swapout (after another competing swapin), we must have lost
1053	 * the race to get here (see above comment), in which case
1054	 * res_count is still 0.
1055	 */
1056	assert(map->res_count == 0);
1057
1058	/*
1059	 * There are no intermediate states of a map going out or
1060	 * coming in, since the map is locked during the transition.
1061	 */
1062	assert(map->sw_state == MAP_SW_OUT);
1063
1064	/*
1065	 * We now operate upon each map entry.  If the entry is a sub-
1066	 * or share-map, we call vm_map_res_reference upon it.
1067	 * If the entry is an object, we call vm_object_res_reference
1068	 * (this may iterate through the shadow chain).
1069	 * Note that we hold the map locked the entire time,
1070	 * even if we get back here via a recursive call in
1071	 * vm_map_res_reference.
1072	 */
1073	entry = vm_map_first_entry(map);
1074
1075	while (entry != vm_map_to_entry(map)) {
1076		if (entry->object.vm_object != VM_OBJECT_NULL) {
1077			if (entry->is_sub_map) {
1078				vm_map_t lmap = entry->object.sub_map;
1079				lck_mtx_lock(&lmap->s_lock);
1080				vm_map_res_reference(lmap);
1081				lck_mtx_unlock(&lmap->s_lock);
1082			} else {
1083				vm_object_t object = entry->object.vm_object;
1084				vm_object_lock(object);
1085				/*
1086				 * This call may iterate through the
1087				 * shadow chain.
1088				 */
1089				vm_object_res_reference(object);
1090				vm_object_unlock(object);
1091			}
1092		}
1093		entry = entry->vme_next;
1094	}
1095	assert(map->sw_state == MAP_SW_OUT);
1096	map->sw_state = MAP_SW_IN;
1097}
1098
1099void vm_map_swapout(vm_map_t map)
1100{
1101	register vm_map_entry_t entry;
1102
1103	/*
1104	 * Map is locked
1105	 * First deal with various races.
1106	 * If we raced with a swapin and lost, the residence count
1107	 * will have been incremented to 1, and we simply return.
1108	 */
1109	lck_mtx_lock(&map->s_lock);
1110	if (map->res_count != 0) {
1111		lck_mtx_unlock(&map->s_lock);
1112		return;
1113	}
1114	lck_mtx_unlock(&map->s_lock);
1115
1116	/*
1117	 * There are no intermediate states of a map going out or
1118	 * coming in, since the map is locked during the transition.
1119	 */
1120	assert(map->sw_state == MAP_SW_IN);
1121
1122	if (!vm_map_swap_enable)
1123		return;
1124
1125	/*
1126	 * We now operate upon each map entry.  If the entry is a sub-
1127	 * or share-map, we call vm_map_res_deallocate upon it.
1128	 * If the entry is an object, we call vm_object_res_deallocate
1129	 * (this may iterate through the shadow chain).
1130	 * Note that we hold the map locked the entire time,
1131	 * even if we get back here via a recursive call in
1132	 * vm_map_res_deallocate.
1133	 */
1134	entry = vm_map_first_entry(map);
1135
1136	while (entry != vm_map_to_entry(map)) {
1137		if (entry->object.vm_object != VM_OBJECT_NULL) {
1138			if (entry->is_sub_map) {
1139				vm_map_t lmap = entry->object.sub_map;
1140				lck_mtx_lock(&lmap->s_lock);
1141				vm_map_res_deallocate(lmap);
1142				lck_mtx_unlock(&lmap->s_lock);
1143			} else {
1144				vm_object_t object = entry->object.vm_object;
1145				vm_object_lock(object);
1146				/*
1147				 * This call may take a long time,
1148				 * since it could actively push
1149				 * out pages (if we implement it
1150				 * that way).
1151				 */
1152				vm_object_res_deallocate(object);
1153				vm_object_unlock(object);
1154			}
1155		}
1156		entry = entry->vme_next;
1157	}
1158	assert(map->sw_state == MAP_SW_IN);
1159	map->sw_state = MAP_SW_OUT;
1160}
1161
1162#endif	/* TASK_SWAPPER */
1163
1164/*
1165 *	vm_map_lookup_entry:	[ internal use only ]
1166 *
1167 *	Calls into the vm map store layer to find the map
1168 *	entry containing (or immediately preceding) the
1169 *	specified address in the given map; the entry is returned
1170 *	in the "entry" parameter.  The boolean
1171 *	result indicates whether the address is
1172 *	actually contained in the map.
1173 */
1174boolean_t
1175vm_map_lookup_entry(
1176	register vm_map_t		map,
1177	register vm_map_offset_t	address,
1178	vm_map_entry_t		*entry)		/* OUT */
1179{
1180	return ( vm_map_store_lookup_entry( map, address, entry ));
1181}
1182
1183/*
1184 *	Routine:	vm_map_find_space
1185 *	Purpose:
1186 *		Allocate a range in the specified virtual address map,
1187 *		returning the entry allocated for that range.
1188 *		Used by kmem_alloc, etc.
1189 *
1190 *		The map must be NOT be locked. It will be returned locked
1191 *		on KERN_SUCCESS, unlocked on failure.
1192 *
1193 *		If an entry is allocated, the object/offset fields
1194 *		are initialized to zero.
1195 */
1196kern_return_t
1197vm_map_find_space(
1198	register vm_map_t	map,
1199	vm_map_offset_t		*address,	/* OUT */
1200	vm_map_size_t		size,
1201	vm_map_offset_t		mask,
1202	int			flags,
1203	vm_map_entry_t		*o_entry)	/* OUT */
1204{
1205	register vm_map_entry_t	entry, new_entry;
1206	register vm_map_offset_t	start;
1207	register vm_map_offset_t	end;
1208
1209	if (size == 0) {
1210		*address = 0;
1211		return KERN_INVALID_ARGUMENT;
1212	}
1213
1214	if (flags & VM_FLAGS_GUARD_AFTER) {
1215		/* account for the back guard page in the size */
1216		size += PAGE_SIZE_64;
1217	}
1218
1219	new_entry = vm_map_entry_create(map, FALSE);
1220
1221	/*
1222	 *	Look for the first possible address; if there's already
1223	 *	something at this address, we have to start after it.
1224	 */
1225
1226	vm_map_lock(map);
1227
1228	if( map->disable_vmentry_reuse == TRUE) {
1229		VM_MAP_HIGHEST_ENTRY(map, entry, start);
1230	} else {
1231		assert(first_free_is_valid(map));
1232		if ((entry = map->first_free) == vm_map_to_entry(map))
1233			start = map->min_offset;
1234		else
1235			start = entry->vme_end;
1236	}
1237
1238	/*
1239	 *	In any case, the "entry" always precedes
1240	 *	the proposed new region throughout the loop:
1241	 */
1242
1243	while (TRUE) {
1244		register vm_map_entry_t	next;
1245
1246		/*
1247		 *	Find the end of the proposed new region.
1248		 *	Be sure we didn't go beyond the end, or
1249		 *	wrap around the address.
1250		 */
1251
1252		if (flags & VM_FLAGS_GUARD_BEFORE) {
1253			/* reserve space for the front guard page */
1254			start += PAGE_SIZE_64;
1255		}
1256		end = ((start + mask) & ~mask);
1257
1258		if (end < start) {
1259			vm_map_entry_dispose(map, new_entry);
1260			vm_map_unlock(map);
1261			return(KERN_NO_SPACE);
1262		}
1263		start = end;
1264		end += size;
1265
1266		if ((end > map->max_offset) || (end < start)) {
1267			vm_map_entry_dispose(map, new_entry);
1268			vm_map_unlock(map);
1269			return(KERN_NO_SPACE);
1270		}
1271
1272		/*
1273		 *	If there are no more entries, we must win.
1274		 */
1275
1276		next = entry->vme_next;
1277		if (next == vm_map_to_entry(map))
1278			break;
1279
1280		/*
1281		 *	If there is another entry, it must be
1282		 *	after the end of the potential new region.
1283		 */
1284
1285		if (next->vme_start >= end)
1286			break;
1287
1288		/*
1289		 *	Didn't fit -- move to the next entry.
1290		 */
1291
1292		entry = next;
1293		start = entry->vme_end;
1294	}
1295
1296	/*
1297	 *	At this point,
1298	 *		"start" and "end" should define the endpoints of the
1299	 *			available new range, and
1300	 *		"entry" should refer to the region before the new
1301	 *			range, and
1302	 *
1303	 *		the map should be locked.
1304	 */
1305
1306	if (flags & VM_FLAGS_GUARD_BEFORE) {
1307		/* go back for the front guard page */
1308		start -= PAGE_SIZE_64;
1309	}
1310	*address = start;
1311
1312	assert(start < end);
1313	new_entry->vme_start = start;
1314	new_entry->vme_end = end;
1315	assert(page_aligned(new_entry->vme_start));
1316	assert(page_aligned(new_entry->vme_end));
1317
1318	new_entry->is_shared = FALSE;
1319	new_entry->is_sub_map = FALSE;
1320	new_entry->use_pmap = FALSE;
1321	new_entry->object.vm_object = VM_OBJECT_NULL;
1322	new_entry->offset = (vm_object_offset_t) 0;
1323
1324	new_entry->needs_copy = FALSE;
1325
1326	new_entry->inheritance = VM_INHERIT_DEFAULT;
1327	new_entry->protection = VM_PROT_DEFAULT;
1328	new_entry->max_protection = VM_PROT_ALL;
1329	new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1330	new_entry->wired_count = 0;
1331	new_entry->user_wired_count = 0;
1332
1333	new_entry->in_transition = FALSE;
1334	new_entry->needs_wakeup = FALSE;
1335	new_entry->no_cache = FALSE;
1336	new_entry->permanent = FALSE;
1337	new_entry->superpage_size = 0;
1338
1339	new_entry->used_for_jit = 0;
1340
1341	new_entry->alias = 0;
1342	new_entry->zero_wired_pages = FALSE;
1343
1344	VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1345
1346	/*
1347	 *	Insert the new entry into the list
1348	 */
1349
1350	vm_map_store_entry_link(map, entry, new_entry);
1351
1352	map->size += size;
1353
1354	/*
1355	 *	Update the lookup hint
1356	 */
1357	SAVE_HINT_MAP_WRITE(map, new_entry);
1358
1359	*o_entry = new_entry;
1360	return(KERN_SUCCESS);
1361}
1362
1363int vm_map_pmap_enter_print = TRUE;
1364int vm_map_pmap_enter_enable = TRUE;
1365
1366/*
1367 *	Routine:	vm_map_pmap_enter [internal only]
1368 *
1369 *	Description:
1370 *		Force pages from the specified object to be entered into
1371 *		the pmap at the specified address if they are present.
1372 *		As soon as a page not found in the object the scan ends.
1373 *
1374 *	Returns:
1375 *		Nothing.
1376 *
1377 *	In/out conditions:
1378 *		The source map should not be locked on entry.
1379 */
1380static void
1381vm_map_pmap_enter(
1382	vm_map_t		map,
1383	register vm_map_offset_t 	addr,
1384	register vm_map_offset_t	end_addr,
1385	register vm_object_t 	object,
1386	vm_object_offset_t	offset,
1387	vm_prot_t		protection)
1388{
1389	int			type_of_fault;
1390	kern_return_t		kr;
1391
1392	if(map->pmap == 0)
1393		return;
1394
1395	while (addr < end_addr) {
1396		register vm_page_t	m;
1397
1398		vm_object_lock(object);
1399
1400		m = vm_page_lookup(object, offset);
1401		/*
1402		 * ENCRYPTED SWAP:
1403		 * The user should never see encrypted data, so do not
1404		 * enter an encrypted page in the page table.
1405		 */
1406		if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1407		    m->fictitious ||
1408		    (m->unusual && ( m->error || m->restart || m->absent))) {
1409			vm_object_unlock(object);
1410			return;
1411		}
1412
1413		{
1414			kprintf("vm_map_pmap_enter:");
1415			kprintf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1416			       map, (unsigned long long)addr, object, (unsigned long long)offset);
1417		}
1418		type_of_fault = DBG_CACHE_HIT_FAULT;
1419		kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1420				    VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL,
1421				    &type_of_fault);
1422
1423		vm_object_unlock(object);
1424
1425		offset += PAGE_SIZE_64;
1426		addr += PAGE_SIZE;
1427	}
1428}
1429
1430boolean_t vm_map_pmap_is_empty(
1431	vm_map_t	map,
1432	vm_map_offset_t	start,
1433	vm_map_offset_t end);
1434boolean_t vm_map_pmap_is_empty(
1435	vm_map_t	map,
1436	vm_map_offset_t	start,
1437	vm_map_offset_t	end)
1438{
1439#ifdef MACHINE_PMAP_IS_EMPTY
1440	return pmap_is_empty(map->pmap, start, end);
1441#else 	/* MACHINE_PMAP_IS_EMPTY */
1442	vm_map_offset_t	offset;
1443	ppnum_t		phys_page;
1444
1445	if (map->pmap == NULL) {
1446		return TRUE;
1447	}
1448
1449	for (offset = start;
1450	     offset < end;
1451	     offset += PAGE_SIZE) {
1452		phys_page = pmap_find_phys(map->pmap, offset);
1453		if (phys_page) {
1454			kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1455				"page %d at 0x%llx\n",
1456				map, (long long)start, (long long)end,
1457				phys_page, (long long)offset);
1458			return FALSE;
1459		}
1460	}
1461	return TRUE;
1462#endif	/* MACHINE_PMAP_IS_EMPTY */
1463}
1464
1465#define MAX_TRIES_TO_GET_RANDOM_ADDRESS	1000
1466kern_return_t
1467vm_map_random_address_for_size(
1468	vm_map_t	map,
1469	vm_map_offset_t	*address,
1470	vm_map_size_t	size)
1471{
1472	kern_return_t	kr = KERN_SUCCESS;
1473	int		tries = 0;
1474	vm_map_offset_t	random_addr = 0;
1475	vm_map_offset_t hole_end;
1476
1477	vm_map_entry_t	next_entry = VM_MAP_ENTRY_NULL;
1478	vm_map_entry_t	prev_entry = VM_MAP_ENTRY_NULL;
1479	vm_map_size_t	vm_hole_size = 0;
1480	vm_map_size_t	addr_space_size;
1481
1482	addr_space_size = vm_map_max(map) - vm_map_min(map);
1483
1484	assert(page_aligned(size));
1485
1486	while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1487		random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
1488		random_addr = trunc_page(vm_map_min(map) +
1489					 (random_addr % addr_space_size));
1490
1491		if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
1492			if (prev_entry == vm_map_to_entry(map)) {
1493				next_entry = vm_map_first_entry(map);
1494			} else {
1495				next_entry = prev_entry->vme_next;
1496			}
1497			if (next_entry == vm_map_to_entry(map)) {
1498				hole_end = vm_map_max(map);
1499			} else {
1500				hole_end = next_entry->vme_start;
1501			}
1502			vm_hole_size = hole_end - random_addr;
1503			if (vm_hole_size >= size) {
1504				*address = random_addr;
1505				break;
1506			}
1507		}
1508		tries++;
1509	}
1510
1511	if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1512		kr = KERN_NO_SPACE;
1513	}
1514	return kr;
1515}
1516
1517/*
1518 *	Routine:	vm_map_enter
1519 *
1520 *	Description:
1521 *		Allocate a range in the specified virtual address map.
1522 *		The resulting range will refer to memory defined by
1523 *		the given memory object and offset into that object.
1524 *
1525 *		Arguments are as defined in the vm_map call.
1526 */
1527int _map_enter_debug = 0;
1528static unsigned int vm_map_enter_restore_successes = 0;
1529static unsigned int vm_map_enter_restore_failures = 0;
1530kern_return_t
1531vm_map_enter(
1532	vm_map_t		map,
1533	vm_map_offset_t		*address,	/* IN/OUT */
1534	vm_map_size_t		size,
1535	vm_map_offset_t		mask,
1536	int			flags,
1537	vm_object_t		object,
1538	vm_object_offset_t	offset,
1539	boolean_t		needs_copy,
1540	vm_prot_t		cur_protection,
1541	vm_prot_t		max_protection,
1542	vm_inherit_t		inheritance)
1543{
1544	vm_map_entry_t		entry, new_entry;
1545	vm_map_offset_t		start, tmp_start, tmp_offset;
1546	vm_map_offset_t		end, tmp_end;
1547	vm_map_offset_t		tmp2_start, tmp2_end;
1548	vm_map_offset_t		step;
1549	kern_return_t		result = KERN_SUCCESS;
1550	vm_map_t		zap_old_map = VM_MAP_NULL;
1551	vm_map_t		zap_new_map = VM_MAP_NULL;
1552	boolean_t		map_locked = FALSE;
1553	boolean_t		pmap_empty = TRUE;
1554	boolean_t		new_mapping_established = FALSE;
1555	boolean_t		anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1556	boolean_t		purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1557	boolean_t		overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1558	boolean_t		no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1559	boolean_t		is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1560	boolean_t		permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1561	boolean_t		entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0);
1562	unsigned int		superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1563	char			alias;
1564	vm_map_offset_t		effective_min_offset, effective_max_offset;
1565	kern_return_t		kr;
1566
1567#if 0
1568    kprintf("vm_map_enter: pmap -> <0x%08x>, 0x%08x (%08x, %08x)\n", map, map->pmap, *address, size);
1569#endif
1570
1571	if (superpage_size) {
1572		switch (superpage_size) {
1573			/*
1574			 * Note that the current implementation only supports
1575			 * a single size for superpages, SUPERPAGE_SIZE, per
1576			 * architecture. As soon as more sizes are supposed
1577			 * to be supported, sSUPERPAGE_SIZE has to be replaced
1578			 * with a lookup of the size depending on superpage_size.
1579			 */
1580#ifdef __x86_64__
1581			case SUPERPAGE_SIZE_ANY:
1582				/* handle it like 2 MB and round up to page size */
1583				size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
1584			case SUPERPAGE_SIZE_2MB:
1585				break;
1586#endif
1587			default:
1588				return KERN_INVALID_ARGUMENT;
1589		}
1590		mask = SUPERPAGE_SIZE-1;
1591		if (size & (SUPERPAGE_SIZE-1))
1592			return KERN_INVALID_ARGUMENT;
1593		inheritance = VM_INHERIT_NONE;	/* fork() children won't inherit superpages */
1594	}
1595
1596
1597#if CONFIG_EMBEDDED
1598	if (cur_protection & VM_PROT_WRITE){
1599		if ((cur_protection & VM_PROT_EXECUTE) && !entry_for_jit){
1600			printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__);
1601			cur_protection &= ~VM_PROT_EXECUTE;
1602		}
1603	}
1604#endif /* CONFIG_EMBEDDED */
1605
1606	if (is_submap) {
1607		if (purgable) {
1608			/* submaps can not be purgeable */
1609			return KERN_INVALID_ARGUMENT;
1610		}
1611		if (object == VM_OBJECT_NULL) {
1612			/* submaps can not be created lazily */
1613			return KERN_INVALID_ARGUMENT;
1614		}
1615	}
1616	if (flags & VM_FLAGS_ALREADY) {
1617		/*
1618		 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1619		 * is already present.  For it to be meaningul, the requested
1620		 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1621		 * we shouldn't try and remove what was mapped there first
1622		 * (!VM_FLAGS_OVERWRITE).
1623		 */
1624		if ((flags & VM_FLAGS_ANYWHERE) ||
1625		    (flags & VM_FLAGS_OVERWRITE)) {
1626			return KERN_INVALID_ARGUMENT;
1627		}
1628	}
1629
1630	effective_min_offset = map->min_offset;
1631
1632	if (flags & VM_FLAGS_BEYOND_MAX) {
1633		/*
1634		 * Allow an insertion beyond the map's max offset.
1635		 */
1636		if (vm_map_is_64bit(map))
1637			effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1638		else
1639			effective_max_offset = 0x00000000FFFFF000ULL;
1640	} else {
1641		effective_max_offset = map->max_offset;
1642	}
1643
1644	if (size == 0 ||
1645	    (offset & PAGE_MASK_64) != 0) {
1646		*address = 0;
1647		return KERN_INVALID_ARGUMENT;
1648	}
1649
1650	VM_GET_FLAGS_ALIAS(flags, alias);
1651
1652#define	RETURN(value)	{ result = value; goto BailOut; }
1653
1654	assert(page_aligned(*address));
1655	assert(page_aligned(size));
1656
1657	/*
1658	 * Only zero-fill objects are allowed to be purgable.
1659	 * LP64todo - limit purgable objects to 32-bits for now
1660	 */
1661	if (purgable &&
1662	    (offset != 0 ||
1663	     (object != VM_OBJECT_NULL &&
1664	      (object->vo_size != size ||
1665	       object->purgable == VM_PURGABLE_DENY))
1666	     || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
1667		return KERN_INVALID_ARGUMENT;
1668
1669	if (!anywhere && overwrite) {
1670		/*
1671		 * Create a temporary VM map to hold the old mappings in the
1672		 * affected area while we create the new one.
1673		 * This avoids releasing the VM map lock in
1674		 * vm_map_entry_delete() and allows atomicity
1675		 * when we want to replace some mappings with a new one.
1676		 * It also allows us to restore the old VM mappings if the
1677		 * new mapping fails.
1678		 */
1679		zap_old_map = vm_map_create(PMAP_NULL,
1680					    *address,
1681					    *address + size,
1682					    map->hdr.entries_pageable);
1683	}
1684
1685StartAgain: ;
1686
1687	start = *address;
1688
1689	if (anywhere) {
1690		vm_map_lock(map);
1691		map_locked = TRUE;
1692
1693		if (entry_for_jit) {
1694			if (map->jit_entry_exists) {
1695				result = KERN_INVALID_ARGUMENT;
1696				goto BailOut;
1697			}
1698			/*
1699			 * Get a random start address.
1700			 */
1701			result = vm_map_random_address_for_size(map, address, size);
1702			if (result != KERN_SUCCESS) {
1703				goto BailOut;
1704			}
1705			start = *address;
1706		}
1707
1708
1709		/*
1710		 *	Calculate the first possible address.
1711		 */
1712		if (start < effective_min_offset)
1713			start = effective_min_offset;
1714		if (start > effective_max_offset)
1715			RETURN(KERN_NO_SPACE);
1716
1717		/*
1718		 *	Look for the first possible address;
1719		 *	if there's already something at this
1720		 *	address, we have to start after it.
1721		 */
1722
1723		if( map->disable_vmentry_reuse == TRUE) {
1724			VM_MAP_HIGHEST_ENTRY(map, entry, start);
1725		} else {
1726			assert(first_free_is_valid(map));
1727
1728			entry = map->first_free;
1729
1730			if (entry == vm_map_to_entry(map)) {
1731				entry = NULL;
1732			} else {
1733			       if (entry->vme_next == vm_map_to_entry(map)){
1734				       /*
1735					* Hole at the end of the map.
1736					*/
1737					entry = NULL;
1738			       } else {
1739					if (start < (entry->vme_next)->vme_start ) {
1740						start = entry->vme_end;
1741					} else {
1742						/*
1743						 * Need to do a lookup.
1744						 */
1745						entry = NULL;
1746					}
1747			       }
1748			}
1749
1750			if (entry == NULL) {
1751				vm_map_entry_t	tmp_entry;
1752				if (vm_map_lookup_entry(map, start, &tmp_entry)) {
1753					assert(!entry_for_jit);
1754					start = tmp_entry->vme_end;
1755				}
1756				entry = tmp_entry;
1757			}
1758		}
1759
1760		/*
1761		 *	In any case, the "entry" always precedes
1762		 *	the proposed new region throughout the
1763		 *	loop:
1764		 */
1765
1766		while (TRUE) {
1767			register vm_map_entry_t	next;
1768
1769			/*
1770			 *	Find the end of the proposed new region.
1771			 *	Be sure we didn't go beyond the end, or
1772			 *	wrap around the address.
1773			 */
1774
1775			end = ((start + mask) & ~mask);
1776			if (end < start)
1777				RETURN(KERN_NO_SPACE);
1778			start = end;
1779			end += size;
1780
1781
1782
1783			if ((end > effective_max_offset) || (end < start)) {
1784				if (map->wait_for_space) {
1785					if (size <= (effective_max_offset -
1786						     effective_min_offset)) {
1787						assert_wait((event_t)map,
1788							    THREAD_ABORTSAFE);
1789						vm_map_unlock(map);
1790						map_locked = FALSE;
1791						thread_block(THREAD_CONTINUE_NULL);
1792						goto StartAgain;
1793					}
1794				}
1795                RETURN(KERN_NO_SPACE);
1796
1797			}
1798
1799			/*
1800			 *	If there are no more entries, we must win.
1801			 */
1802
1803			next = entry->vme_next;
1804			if (next == vm_map_to_entry(map))
1805				break;
1806
1807			/*
1808			 *	If there is another entry, it must be
1809			 *	after the end of the potential new region.
1810			 */
1811
1812			if (next->vme_start >= end)
1813				break;
1814
1815			/*
1816			 *	Didn't fit -- move to the next entry.
1817			 */
1818
1819			entry = next;
1820			start = entry->vme_end;
1821		}
1822		*address = start;
1823	} else {
1824		/*
1825		 *	Verify that:
1826		 *		the address doesn't itself violate
1827		 *		the mask requirement.
1828		 */
1829
1830		vm_map_lock(map);
1831		map_locked = TRUE;
1832		if ((start & mask) != 0)
1833			RETURN(KERN_NO_SPACE);
1834
1835		/*
1836		 *	...	the address is within bounds
1837		 */
1838
1839		end = start + size;
1840
1841		if ((start < effective_min_offset) ||
1842		    (end > effective_max_offset) ||
1843		    (start >= end)) {
1844			RETURN(KERN_INVALID_ADDRESS);
1845		}
1846
1847		if (overwrite && zap_old_map != VM_MAP_NULL) {
1848			/*
1849			 * Fixed mapping and "overwrite" flag: attempt to
1850			 * remove all existing mappings in the specified
1851			 * address range, saving them in our "zap_old_map".
1852			 */
1853			(void) vm_map_delete(map, start, end,
1854					     VM_MAP_REMOVE_SAVE_ENTRIES,
1855					     zap_old_map);
1856		}
1857
1858		/*
1859		 *	...	the starting address isn't allocated
1860		 */
1861
1862		if (vm_map_lookup_entry(map, start, &entry)) {
1863			if (! (flags & VM_FLAGS_ALREADY)) {
1864				RETURN(KERN_NO_SPACE);
1865			}
1866			/*
1867			 * Check if what's already there is what we want.
1868			 */
1869			tmp_start = start;
1870			tmp_offset = offset;
1871			if (entry->vme_start < start) {
1872				tmp_start -= start - entry->vme_start;
1873				tmp_offset -= start - entry->vme_start;
1874
1875			}
1876			for (; entry->vme_start < end;
1877			     entry = entry->vme_next) {
1878				/*
1879				 * Check if the mapping's attributes
1880				 * match the existing map entry.
1881				 */
1882				if (entry == vm_map_to_entry(map) ||
1883				    entry->vme_start != tmp_start ||
1884				    entry->is_sub_map != is_submap ||
1885				    entry->offset != tmp_offset ||
1886				    entry->needs_copy != needs_copy ||
1887				    entry->protection != cur_protection ||
1888				    entry->max_protection != max_protection ||
1889				    entry->inheritance != inheritance ||
1890				    entry->alias != alias) {
1891					/* not the same mapping ! */
1892					RETURN(KERN_NO_SPACE);
1893				}
1894				/*
1895				 * Check if the same object is being mapped.
1896				 */
1897				if (is_submap) {
1898					if (entry->object.sub_map !=
1899					    (vm_map_t) object) {
1900						/* not the same submap */
1901						RETURN(KERN_NO_SPACE);
1902					}
1903				} else {
1904					if (entry->object.vm_object != object) {
1905						/* not the same VM object... */
1906						vm_object_t obj2;
1907
1908						obj2 = entry->object.vm_object;
1909						if ((obj2 == VM_OBJECT_NULL ||
1910						     obj2->internal) &&
1911						    (object == VM_OBJECT_NULL ||
1912						     object->internal)) {
1913							/*
1914							 * ... but both are
1915							 * anonymous memory,
1916							 * so equivalent.
1917							 */
1918						} else {
1919							RETURN(KERN_NO_SPACE);
1920						}
1921					}
1922				}
1923
1924				tmp_offset += entry->vme_end - entry->vme_start;
1925				tmp_start += entry->vme_end - entry->vme_start;
1926				if (entry->vme_end >= end) {
1927					/* reached the end of our mapping */
1928					break;
1929				}
1930			}
1931			/* it all matches:  let's use what's already there ! */
1932			RETURN(KERN_MEMORY_PRESENT);
1933		}
1934
1935		/*
1936		 *	...	the next region doesn't overlap the
1937		 *		end point.
1938		 */
1939
1940		if ((entry->vme_next != vm_map_to_entry(map)) &&
1941		    (entry->vme_next->vme_start < end))
1942			RETURN(KERN_NO_SPACE);
1943	}
1944
1945	/*
1946	 *	At this point,
1947	 *		"start" and "end" should define the endpoints of the
1948	 *			available new range, and
1949	 *		"entry" should refer to the region before the new
1950	 *			range, and
1951	 *
1952	 *		the map should be locked.
1953	 */
1954
1955	/*
1956	 *	See whether we can avoid creating a new entry (and object) by
1957	 *	extending one of our neighbors.  [So far, we only attempt to
1958	 *	extend from below.]  Note that we can never extend/join
1959	 *	purgable objects because they need to remain distinct
1960	 *	entities in order to implement their "volatile object"
1961	 *	semantics.
1962	 */
1963
1964	if (purgable || entry_for_jit) {
1965		if (object == VM_OBJECT_NULL) {
1966			object = vm_object_allocate(size);
1967			object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1968			if (purgable) {
1969				object->purgable = VM_PURGABLE_NONVOLATILE;
1970			}
1971			offset = (vm_object_offset_t)0;
1972		}
1973	} else if ((is_submap == FALSE) &&
1974		   (object == VM_OBJECT_NULL) &&
1975		   (entry != vm_map_to_entry(map)) &&
1976		   (entry->vme_end == start) &&
1977		   (!entry->is_shared) &&
1978		   (!entry->is_sub_map) &&
1979		   ((alias == VM_MEMORY_REALLOC) || (entry->alias == alias)) &&
1980		   (entry->inheritance == inheritance) &&
1981		   (entry->protection == cur_protection) &&
1982		   (entry->max_protection == max_protection) &&
1983		   (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
1984		   (entry->in_transition == 0) &&
1985		   (entry->no_cache == no_cache) &&
1986		   ((entry->vme_end - entry->vme_start) + size <=
1987		    (alias == VM_MEMORY_REALLOC ?
1988		     ANON_CHUNK_SIZE :
1989		     NO_COALESCE_LIMIT)) &&
1990		   (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
1991		if (vm_object_coalesce(entry->object.vm_object,
1992				       VM_OBJECT_NULL,
1993				       entry->offset,
1994				       (vm_object_offset_t) 0,
1995				       (vm_map_size_t)(entry->vme_end - entry->vme_start),
1996				       (vm_map_size_t)(end - entry->vme_end))) {
1997			/*
1998			 *	Coalesced the two objects - can extend
1999			 *	the previous map entry to include the
2000			 *	new range.
2001			 */
2002			map->size += (end - entry->vme_end);
2003			assert(entry->vme_start < end);
2004			entry->vme_end = end;
2005			vm_map_store_update_first_free(map, map->first_free);
2006			RETURN(KERN_SUCCESS);
2007		}
2008	}
2009
2010	step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2011	new_entry = NULL;
2012
2013	for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2014		tmp2_end = tmp2_start + step;
2015		/*
2016		 *	Create a new entry
2017		 *	LP64todo - for now, we can only allocate 4GB internal objects
2018		 *	because the default pager can't page bigger ones.  Remove this
2019		 *	when it can.
2020		 *
2021		 * XXX FBDP
2022		 * The reserved "page zero" in each process's address space can
2023		 * be arbitrarily large.  Splitting it into separate 4GB objects and
2024		 * therefore different VM map entries serves no purpose and just
2025		 * slows down operations on the VM map, so let's not split the
2026		 * allocation into 4GB chunks if the max protection is NONE.  That
2027		 * memory should never be accessible, so it will never get to the
2028		 * default pager.
2029		 */
2030		tmp_start = tmp2_start;
2031		if (object == VM_OBJECT_NULL &&
2032		    size > (vm_map_size_t)ANON_CHUNK_SIZE &&
2033		    max_protection != VM_PROT_NONE &&
2034		    superpage_size == 0)
2035			tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
2036		else
2037			tmp_end = tmp2_end;
2038		do {
2039			new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
2040							object,	offset, needs_copy,
2041							FALSE, FALSE,
2042							cur_protection, max_protection,
2043							VM_BEHAVIOR_DEFAULT,
2044							(entry_for_jit)? VM_INHERIT_NONE: inheritance,
2045							0, no_cache,
2046							permanent, superpage_size);
2047			new_entry->alias = alias;
2048			if (entry_for_jit){
2049				if (!(map->jit_entry_exists)){
2050					new_entry->used_for_jit = TRUE;
2051					map->jit_entry_exists = TRUE;
2052				}
2053			}
2054
2055			if (is_submap) {
2056				vm_map_t	submap;
2057				boolean_t	submap_is_64bit;
2058				boolean_t	use_pmap;
2059
2060				new_entry->is_sub_map = TRUE;
2061				submap = (vm_map_t) object;
2062				submap_is_64bit = vm_map_is_64bit(submap);
2063				use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
2064
2065	#ifndef NO_NESTED_PMAP
2066				if (use_pmap && submap->pmap == NULL) {
2067					ledger_t ledger = map->pmap->ledger;
2068					/* we need a sub pmap to nest... */
2069					submap->pmap = pmap_create(ledger, 0,
2070					    submap_is_64bit);
2071					if (submap->pmap == NULL) {
2072						/* let's proceed without nesting... */
2073					}
2074				}
2075				if (use_pmap && submap->pmap != NULL) {
2076					kr = pmap_nest(map->pmap,
2077						       submap->pmap,
2078						       tmp_start,
2079						       tmp_start,
2080						       tmp_end - tmp_start);
2081					if (kr != KERN_SUCCESS) {
2082						printf("vm_map_enter: "
2083						       "pmap_nest(0x%llx,0x%llx) "
2084						       "error 0x%x\n",
2085						       (long long)tmp_start,
2086						       (long long)tmp_end,
2087						       kr);
2088					} else {
2089						/* we're now nested ! */
2090						new_entry->use_pmap = TRUE;
2091						pmap_empty = FALSE;
2092					}
2093				}
2094	#endif /* NO_NESTED_PMAP */
2095			}
2096			entry = new_entry;
2097
2098			if (superpage_size) {
2099				vm_page_t pages, m;
2100				vm_object_t sp_object;
2101
2102				entry->offset = 0;
2103
2104				/* allocate one superpage */
2105				kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2106				if (kr != KERN_SUCCESS) {
2107					new_mapping_established = TRUE; /* will cause deallocation of whole range */
2108					RETURN(kr);
2109				}
2110
2111				/* create one vm_object per superpage */
2112				sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2113				sp_object->phys_contiguous = TRUE;
2114				sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
2115				entry->object.vm_object = sp_object;
2116
2117				/* enter the base pages into the object */
2118				vm_object_lock(sp_object);
2119				for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2120					m = pages;
2121					pmap_zero_page(m->phys_page);
2122					pages = NEXT_PAGE(m);
2123					*(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2124					vm_page_insert(m, sp_object, offset);
2125				}
2126				vm_object_unlock(sp_object);
2127			}
2128		} while (tmp_end != tmp2_end &&
2129			 (tmp_start = tmp_end) &&
2130			 (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2131			  tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2132	}
2133
2134	vm_map_unlock(map);
2135	map_locked = FALSE;
2136	new_mapping_established = TRUE;
2137	/*	Wire down the new entry if the user
2138	 *	requested all new map entries be wired.
2139	 */
2140	if ((map->wiring_required)||(superpage_size)) {
2141		pmap_empty = FALSE; /* pmap won't be empty */
2142		kr = vm_map_wire(map, start, end,
2143				     new_entry->protection, TRUE);
2144		RETURN(kr);
2145	}
2146
2147	if ((object != VM_OBJECT_NULL) &&
2148	    (vm_map_pmap_enter_enable) &&
2149	    (!anywhere)	 &&
2150	    (!needs_copy) &&
2151	    (size < (128*1024))) {
2152		pmap_empty = FALSE; /* pmap won't be empty */
2153
2154		if (override_nx(map, alias) && cur_protection)
2155		        cur_protection |= VM_PROT_EXECUTE;
2156
2157		vm_map_pmap_enter(map, start, end,
2158				  object, offset, cur_protection);
2159	}
2160
2161BailOut: ;
2162	if (result == KERN_SUCCESS) {
2163		vm_prot_t pager_prot;
2164		memory_object_t pager;
2165
2166		if (pmap_empty &&
2167		    !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2168			assert(vm_map_pmap_is_empty(map,
2169						    *address,
2170						    *address+size));
2171		}
2172
2173		/*
2174		 * For "named" VM objects, let the pager know that the
2175		 * memory object is being mapped.  Some pagers need to keep
2176		 * track of this, to know when they can reclaim the memory
2177		 * object, for example.
2178		 * VM calls memory_object_map() for each mapping (specifying
2179		 * the protection of each mapping) and calls
2180		 * memory_object_last_unmap() when all the mappings are gone.
2181		 */
2182		pager_prot = max_protection;
2183		if (needs_copy) {
2184			/*
2185			 * Copy-On-Write mapping: won't modify
2186			 * the memory object.
2187			 */
2188			pager_prot &= ~VM_PROT_WRITE;
2189		}
2190		if (!is_submap &&
2191		    object != VM_OBJECT_NULL &&
2192		    object->named &&
2193		    object->pager != MEMORY_OBJECT_NULL) {
2194			vm_object_lock(object);
2195			pager = object->pager;
2196			if (object->named &&
2197			    pager != MEMORY_OBJECT_NULL) {
2198				assert(object->pager_ready);
2199				vm_object_mapping_wait(object, THREAD_UNINT);
2200				vm_object_mapping_begin(object);
2201				vm_object_unlock(object);
2202
2203				kr = memory_object_map(pager, pager_prot);
2204				assert(kr == KERN_SUCCESS);
2205
2206				vm_object_lock(object);
2207				vm_object_mapping_end(object);
2208			}
2209			vm_object_unlock(object);
2210		}
2211	} else {
2212		if (new_mapping_established) {
2213			/*
2214			 * We have to get rid of the new mappings since we
2215			 * won't make them available to the user.
2216			 * Try and do that atomically, to minimize the risk
2217			 * that someone else create new mappings that range.
2218			 */
2219			zap_new_map = vm_map_create(PMAP_NULL,
2220						    *address,
2221						    *address + size,
2222						    map->hdr.entries_pageable);
2223			if (!map_locked) {
2224				vm_map_lock(map);
2225				map_locked = TRUE;
2226			}
2227			(void) vm_map_delete(map, *address, *address+size,
2228					     VM_MAP_REMOVE_SAVE_ENTRIES,
2229					     zap_new_map);
2230		}
2231		if (zap_old_map != VM_MAP_NULL &&
2232		    zap_old_map->hdr.nentries != 0) {
2233			vm_map_entry_t	entry1, entry2;
2234
2235			/*
2236			 * The new mapping failed.  Attempt to restore
2237			 * the old mappings, saved in the "zap_old_map".
2238			 */
2239			if (!map_locked) {
2240				vm_map_lock(map);
2241				map_locked = TRUE;
2242			}
2243
2244			/* first check if the coast is still clear */
2245			start = vm_map_first_entry(zap_old_map)->vme_start;
2246			end = vm_map_last_entry(zap_old_map)->vme_end;
2247			if (vm_map_lookup_entry(map, start, &entry1) ||
2248			    vm_map_lookup_entry(map, end, &entry2) ||
2249			    entry1 != entry2) {
2250				/*
2251				 * Part of that range has already been
2252				 * re-mapped:  we can't restore the old
2253				 * mappings...
2254				 */
2255				vm_map_enter_restore_failures++;
2256			} else {
2257				/*
2258				 * Transfer the saved map entries from
2259				 * "zap_old_map" to the original "map",
2260				 * inserting them all after "entry1".
2261				 */
2262				for (entry2 = vm_map_first_entry(zap_old_map);
2263				     entry2 != vm_map_to_entry(zap_old_map);
2264				     entry2 = vm_map_first_entry(zap_old_map)) {
2265					vm_map_size_t entry_size;
2266
2267					entry_size = (entry2->vme_end -
2268						      entry2->vme_start);
2269					vm_map_store_entry_unlink(zap_old_map,
2270							    entry2);
2271					zap_old_map->size -= entry_size;
2272					vm_map_store_entry_link(map, entry1, entry2);
2273					map->size += entry_size;
2274					entry1 = entry2;
2275				}
2276				if (map->wiring_required) {
2277					/*
2278					 * XXX TODO: we should rewire the
2279					 * old pages here...
2280					 */
2281				}
2282				vm_map_enter_restore_successes++;
2283			}
2284		}
2285	}
2286
2287	if (map_locked) {
2288		vm_map_unlock(map);
2289	}
2290
2291	/*
2292	 * Get rid of the "zap_maps" and all the map entries that
2293	 * they may still contain.
2294	 */
2295	if (zap_old_map != VM_MAP_NULL) {
2296		vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2297		zap_old_map = VM_MAP_NULL;
2298	}
2299	if (zap_new_map != VM_MAP_NULL) {
2300		vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2301		zap_new_map = VM_MAP_NULL;
2302	}
2303
2304	return result;
2305
2306#undef	RETURN
2307}
2308
2309kern_return_t
2310vm_map_enter_mem_object(
2311	vm_map_t		target_map,
2312	vm_map_offset_t		*address,
2313	vm_map_size_t		initial_size,
2314	vm_map_offset_t		mask,
2315	int			flags,
2316	ipc_port_t		port,
2317	vm_object_offset_t	offset,
2318	boolean_t		copy,
2319	vm_prot_t		cur_protection,
2320	vm_prot_t		max_protection,
2321	vm_inherit_t		inheritance)
2322{
2323	vm_map_address_t	map_addr;
2324	vm_map_size_t		map_size;
2325	vm_object_t		object;
2326	vm_object_size_t	size;
2327	kern_return_t		result;
2328	boolean_t		mask_cur_protection, mask_max_protection;
2329
2330	mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2331	mask_max_protection = max_protection & VM_PROT_IS_MASK;
2332	cur_protection &= ~VM_PROT_IS_MASK;
2333	max_protection &= ~VM_PROT_IS_MASK;
2334
2335	/*
2336	 * Check arguments for validity
2337	 */
2338	if ((target_map == VM_MAP_NULL) ||
2339	    (cur_protection & ~VM_PROT_ALL) ||
2340	    (max_protection & ~VM_PROT_ALL) ||
2341	    (inheritance > VM_INHERIT_LAST_VALID) ||
2342	    initial_size == 0)
2343		return KERN_INVALID_ARGUMENT;
2344
2345	map_addr = vm_map_trunc_page(*address);
2346	map_size = vm_map_round_page(initial_size);
2347	size = vm_object_round_page(initial_size);
2348
2349	/*
2350	 * Find the vm object (if any) corresponding to this port.
2351	 */
2352	if (!IP_VALID(port)) {
2353		object = VM_OBJECT_NULL;
2354		offset = 0;
2355		copy = FALSE;
2356	} else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2357		vm_named_entry_t	named_entry;
2358
2359		named_entry = (vm_named_entry_t) port->ip_kobject;
2360		/* a few checks to make sure user is obeying rules */
2361		if (size == 0) {
2362			if (offset >= named_entry->size)
2363				return KERN_INVALID_RIGHT;
2364			size = named_entry->size - offset;
2365		}
2366		if (mask_max_protection) {
2367			max_protection &= named_entry->protection;
2368		}
2369		if (mask_cur_protection) {
2370			cur_protection &= named_entry->protection;
2371		}
2372		if ((named_entry->protection & max_protection) !=
2373		    max_protection)
2374			return KERN_INVALID_RIGHT;
2375		if ((named_entry->protection & cur_protection) !=
2376		    cur_protection)
2377			return KERN_INVALID_RIGHT;
2378		if (named_entry->size < (offset + size))
2379			return KERN_INVALID_ARGUMENT;
2380
2381		/* the callers parameter offset is defined to be the */
2382		/* offset from beginning of named entry offset in object */
2383		offset = offset + named_entry->offset;
2384
2385		named_entry_lock(named_entry);
2386		if (named_entry->is_sub_map) {
2387			vm_map_t		submap;
2388
2389			submap = named_entry->backing.map;
2390			vm_map_lock(submap);
2391			vm_map_reference(submap);
2392			vm_map_unlock(submap);
2393			named_entry_unlock(named_entry);
2394
2395			result = vm_map_enter(target_map,
2396					      &map_addr,
2397					      map_size,
2398					      mask,
2399					      flags | VM_FLAGS_SUBMAP,
2400					      (vm_object_t) submap,
2401					      offset,
2402					      copy,
2403					      cur_protection,
2404					      max_protection,
2405					      inheritance);
2406			if (result != KERN_SUCCESS) {
2407				vm_map_deallocate(submap);
2408			} else {
2409				/*
2410				 * No need to lock "submap" just to check its
2411				 * "mapped" flag: that flag is never reset
2412				 * once it's been set and if we race, we'll
2413				 * just end up setting it twice, which is OK.
2414				 */
2415				if (submap->mapped_in_other_pmaps == FALSE &&
2416				    vm_map_pmap(submap) != PMAP_NULL &&
2417				    vm_map_pmap(submap) !=
2418				    vm_map_pmap(target_map)) {
2419					/*
2420					 * This submap is being mapped in a map
2421					 * that uses a different pmap.
2422					 * Set its "mapped_in_other_pmaps" flag
2423					 * to indicate that we now need to
2424					 * remove mappings from all pmaps rather
2425					 * than just the submap's pmap.
2426					 */
2427					vm_map_lock(submap);
2428					submap->mapped_in_other_pmaps = TRUE;
2429					vm_map_unlock(submap);
2430				}
2431				*address = map_addr;
2432			}
2433			return result;
2434
2435		} else if (named_entry->is_pager) {
2436			unsigned int	access;
2437			vm_prot_t	protections;
2438			unsigned int	wimg_mode;
2439
2440			protections = named_entry->protection & VM_PROT_ALL;
2441			access = GET_MAP_MEM(named_entry->protection);
2442
2443			object = vm_object_enter(named_entry->backing.pager,
2444						 named_entry->size,
2445						 named_entry->internal,
2446						 FALSE,
2447						 FALSE);
2448			if (object == VM_OBJECT_NULL) {
2449				named_entry_unlock(named_entry);
2450				return KERN_INVALID_OBJECT;
2451			}
2452
2453			/* JMM - drop reference on pager here */
2454
2455			/* create an extra ref for the named entry */
2456			vm_object_lock(object);
2457			vm_object_reference_locked(object);
2458			named_entry->backing.object = object;
2459			named_entry->is_pager = FALSE;
2460			named_entry_unlock(named_entry);
2461
2462			wimg_mode = object->wimg_bits;
2463
2464			if (access == MAP_MEM_IO) {
2465				wimg_mode = VM_WIMG_IO;
2466			} else if (access == MAP_MEM_COPYBACK) {
2467				wimg_mode = VM_WIMG_USE_DEFAULT;
2468			} else if (access == MAP_MEM_INNERWBACK) {
2469				wimg_mode = VM_WIMG_INNERWBACK;
2470			} else if (access == MAP_MEM_WTHRU) {
2471				wimg_mode = VM_WIMG_WTHRU;
2472			} else if (access == MAP_MEM_WCOMB) {
2473				wimg_mode = VM_WIMG_WCOMB;
2474			}
2475
2476			/* wait for object (if any) to be ready */
2477			if (!named_entry->internal) {
2478				while (!object->pager_ready) {
2479					vm_object_wait(
2480						object,
2481						VM_OBJECT_EVENT_PAGER_READY,
2482						THREAD_UNINT);
2483					vm_object_lock(object);
2484				}
2485			}
2486
2487			if (object->wimg_bits != wimg_mode)
2488				vm_object_change_wimg_mode(object, wimg_mode);
2489
2490			object->true_share = TRUE;
2491
2492			if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2493				object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2494			vm_object_unlock(object);
2495		} else {
2496			/* This is the case where we are going to map */
2497			/* an already mapped object.  If the object is */
2498			/* not ready it is internal.  An external     */
2499			/* object cannot be mapped until it is ready  */
2500			/* we can therefore avoid the ready check     */
2501			/* in this case.  */
2502			object = named_entry->backing.object;
2503			assert(object != VM_OBJECT_NULL);
2504			named_entry_unlock(named_entry);
2505			vm_object_reference(object);
2506		}
2507	} else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2508		/*
2509		 * JMM - This is temporary until we unify named entries
2510		 * and raw memory objects.
2511		 *
2512		 * Detected fake ip_kotype for a memory object.  In
2513		 * this case, the port isn't really a port at all, but
2514		 * instead is just a raw memory object.
2515		 */
2516
2517		object = vm_object_enter((memory_object_t)port,
2518					 size, FALSE, FALSE, FALSE);
2519		if (object == VM_OBJECT_NULL)
2520			return KERN_INVALID_OBJECT;
2521
2522		/* wait for object (if any) to be ready */
2523		if (object != VM_OBJECT_NULL) {
2524			if (object == kernel_object) {
2525				printf("Warning: Attempt to map kernel object"
2526					" by a non-private kernel entity\n");
2527				return KERN_INVALID_OBJECT;
2528			}
2529			if (!object->pager_ready) {
2530				vm_object_lock(object);
2531
2532				while (!object->pager_ready) {
2533					vm_object_wait(object,
2534						       VM_OBJECT_EVENT_PAGER_READY,
2535						       THREAD_UNINT);
2536					vm_object_lock(object);
2537				}
2538				vm_object_unlock(object);
2539			}
2540		}
2541	} else {
2542		return KERN_INVALID_OBJECT;
2543	}
2544
2545	if (object != VM_OBJECT_NULL &&
2546	    object->named &&
2547	    object->pager != MEMORY_OBJECT_NULL &&
2548	    object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2549		memory_object_t pager;
2550		vm_prot_t	pager_prot;
2551		kern_return_t	kr;
2552
2553		/*
2554		 * For "named" VM objects, let the pager know that the
2555		 * memory object is being mapped.  Some pagers need to keep
2556		 * track of this, to know when they can reclaim the memory
2557		 * object, for example.
2558		 * VM calls memory_object_map() for each mapping (specifying
2559		 * the protection of each mapping) and calls
2560		 * memory_object_last_unmap() when all the mappings are gone.
2561		 */
2562		pager_prot = max_protection;
2563		if (copy) {
2564			/*
2565			 * Copy-On-Write mapping: won't modify the
2566			 * memory object.
2567			 */
2568			pager_prot &= ~VM_PROT_WRITE;
2569		}
2570		vm_object_lock(object);
2571		pager = object->pager;
2572		if (object->named &&
2573		    pager != MEMORY_OBJECT_NULL &&
2574		    object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2575			assert(object->pager_ready);
2576			vm_object_mapping_wait(object, THREAD_UNINT);
2577			vm_object_mapping_begin(object);
2578			vm_object_unlock(object);
2579
2580			kr = memory_object_map(pager, pager_prot);
2581			assert(kr == KERN_SUCCESS);
2582
2583			vm_object_lock(object);
2584			vm_object_mapping_end(object);
2585		}
2586		vm_object_unlock(object);
2587	}
2588
2589	/*
2590	 *	Perform the copy if requested
2591	 */
2592
2593	if (copy) {
2594		vm_object_t		new_object;
2595		vm_object_offset_t	new_offset;
2596
2597		result = vm_object_copy_strategically(object, offset, size,
2598						      &new_object, &new_offset,
2599						      &copy);
2600
2601
2602		if (result == KERN_MEMORY_RESTART_COPY) {
2603			boolean_t success;
2604			boolean_t src_needs_copy;
2605
2606			/*
2607			 * XXX
2608			 * We currently ignore src_needs_copy.
2609			 * This really is the issue of how to make
2610			 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2611			 * non-kernel users to use. Solution forthcoming.
2612			 * In the meantime, since we don't allow non-kernel
2613			 * memory managers to specify symmetric copy,
2614			 * we won't run into problems here.
2615			 */
2616			new_object = object;
2617			new_offset = offset;
2618			success = vm_object_copy_quickly(&new_object,
2619							 new_offset, size,
2620							 &src_needs_copy,
2621							 &copy);
2622			assert(success);
2623			result = KERN_SUCCESS;
2624		}
2625		/*
2626		 *	Throw away the reference to the
2627		 *	original object, as it won't be mapped.
2628		 */
2629
2630		vm_object_deallocate(object);
2631
2632		if (result != KERN_SUCCESS)
2633			return result;
2634
2635		object = new_object;
2636		offset = new_offset;
2637	}
2638
2639	result = vm_map_enter(target_map,
2640			      &map_addr, map_size,
2641			      (vm_map_offset_t)mask,
2642			      flags,
2643			      object, offset,
2644			      copy,
2645			      cur_protection, max_protection, inheritance);
2646	if (result != KERN_SUCCESS)
2647		vm_object_deallocate(object);
2648	*address = map_addr;
2649	return result;
2650}
2651
2652
2653
2654
2655kern_return_t
2656vm_map_enter_mem_object_control(
2657	vm_map_t		target_map,
2658	vm_map_offset_t		*address,
2659	vm_map_size_t		initial_size,
2660	vm_map_offset_t		mask,
2661	int			flags,
2662	memory_object_control_t	control,
2663	vm_object_offset_t	offset,
2664	boolean_t		copy,
2665	vm_prot_t		cur_protection,
2666	vm_prot_t		max_protection,
2667	vm_inherit_t		inheritance)
2668{
2669	vm_map_address_t	map_addr;
2670	vm_map_size_t		map_size;
2671	vm_object_t		object;
2672	vm_object_size_t	size;
2673	kern_return_t		result;
2674	memory_object_t		pager;
2675	vm_prot_t		pager_prot;
2676	kern_return_t		kr;
2677
2678	/*
2679	 * Check arguments for validity
2680	 */
2681	if ((target_map == VM_MAP_NULL) ||
2682	    (cur_protection & ~VM_PROT_ALL) ||
2683	    (max_protection & ~VM_PROT_ALL) ||
2684	    (inheritance > VM_INHERIT_LAST_VALID) ||
2685	    initial_size == 0)
2686		return KERN_INVALID_ARGUMENT;
2687
2688	map_addr = vm_map_trunc_page(*address);
2689	map_size = vm_map_round_page(initial_size);
2690	size = vm_object_round_page(initial_size);
2691
2692	object = memory_object_control_to_vm_object(control);
2693
2694	if (object == VM_OBJECT_NULL)
2695		return KERN_INVALID_OBJECT;
2696
2697	if (object == kernel_object) {
2698		printf("Warning: Attempt to map kernel object"
2699		       " by a non-private kernel entity\n");
2700		return KERN_INVALID_OBJECT;
2701	}
2702
2703	vm_object_lock(object);
2704	object->ref_count++;
2705	vm_object_res_reference(object);
2706
2707	/*
2708	 * For "named" VM objects, let the pager know that the
2709	 * memory object is being mapped.  Some pagers need to keep
2710	 * track of this, to know when they can reclaim the memory
2711	 * object, for example.
2712	 * VM calls memory_object_map() for each mapping (specifying
2713	 * the protection of each mapping) and calls
2714	 * memory_object_last_unmap() when all the mappings are gone.
2715	 */
2716	pager_prot = max_protection;
2717	if (copy) {
2718		pager_prot &= ~VM_PROT_WRITE;
2719	}
2720	pager = object->pager;
2721	if (object->named &&
2722	    pager != MEMORY_OBJECT_NULL &&
2723	    object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2724		assert(object->pager_ready);
2725		vm_object_mapping_wait(object, THREAD_UNINT);
2726		vm_object_mapping_begin(object);
2727		vm_object_unlock(object);
2728
2729		kr = memory_object_map(pager, pager_prot);
2730		assert(kr == KERN_SUCCESS);
2731
2732		vm_object_lock(object);
2733		vm_object_mapping_end(object);
2734	}
2735	vm_object_unlock(object);
2736
2737	/*
2738	 *	Perform the copy if requested
2739	 */
2740
2741	if (copy) {
2742		vm_object_t		new_object;
2743		vm_object_offset_t	new_offset;
2744
2745		result = vm_object_copy_strategically(object, offset, size,
2746						      &new_object, &new_offset,
2747						      &copy);
2748
2749
2750		if (result == KERN_MEMORY_RESTART_COPY) {
2751			boolean_t success;
2752			boolean_t src_needs_copy;
2753
2754			/*
2755			 * XXX
2756			 * We currently ignore src_needs_copy.
2757			 * This really is the issue of how to make
2758			 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2759			 * non-kernel users to use. Solution forthcoming.
2760			 * In the meantime, since we don't allow non-kernel
2761			 * memory managers to specify symmetric copy,
2762			 * we won't run into problems here.
2763			 */
2764			new_object = object;
2765			new_offset = offset;
2766			success = vm_object_copy_quickly(&new_object,
2767							 new_offset, size,
2768							 &src_needs_copy,
2769							 &copy);
2770			assert(success);
2771			result = KERN_SUCCESS;
2772		}
2773		/*
2774		 *	Throw away the reference to the
2775		 *	original object, as it won't be mapped.
2776		 */
2777
2778		vm_object_deallocate(object);
2779
2780		if (result != KERN_SUCCESS)
2781			return result;
2782
2783		object = new_object;
2784		offset = new_offset;
2785	}
2786
2787	result = vm_map_enter(target_map,
2788			      &map_addr, map_size,
2789			      (vm_map_offset_t)mask,
2790			      flags,
2791			      object, offset,
2792			      copy,
2793			      cur_protection, max_protection, inheritance);
2794	if (result != KERN_SUCCESS)
2795		vm_object_deallocate(object);
2796	*address = map_addr;
2797
2798	return result;
2799}
2800
2801
2802#if	VM_CPM
2803
2804#ifdef MACH_ASSERT
2805extern pmap_paddr_t	avail_start, avail_end;
2806#endif
2807
2808/*
2809 *	Allocate memory in the specified map, with the caveat that
2810 *	the memory is physically contiguous.  This call may fail
2811 *	if the system can't find sufficient contiguous memory.
2812 *	This call may cause or lead to heart-stopping amounts of
2813 *	paging activity.
2814 *
2815 *	Memory obtained from this call should be freed in the
2816 *	normal way, viz., via vm_deallocate.
2817 */
2818kern_return_t
2819vm_map_enter_cpm(
2820	vm_map_t		map,
2821	vm_map_offset_t	*addr,
2822	vm_map_size_t		size,
2823	int			flags)
2824{
2825	vm_object_t		cpm_obj;
2826	pmap_t			pmap;
2827	vm_page_t		m, pages;
2828	kern_return_t		kr;
2829	vm_map_offset_t		va, start, end, offset;
2830#if	MACH_ASSERT
2831	vm_map_offset_t		prev_addr = 0;
2832#endif	/* MACH_ASSERT */
2833
2834	boolean_t		anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
2835
2836	if (size == 0) {
2837		*addr = 0;
2838		return KERN_SUCCESS;
2839	}
2840	if (anywhere)
2841		*addr = vm_map_min(map);
2842	else
2843		*addr = vm_map_trunc_page(*addr);
2844	size = vm_map_round_page(size);
2845
2846	/*
2847	 * LP64todo - cpm_allocate should probably allow
2848	 * allocations of >4GB, but not with the current
2849	 * algorithm, so just cast down the size for now.
2850	 */
2851	if (size > VM_MAX_ADDRESS)
2852		return KERN_RESOURCE_SHORTAGE;
2853	if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
2854			       &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
2855		return kr;
2856
2857	cpm_obj = vm_object_allocate((vm_object_size_t)size);
2858	assert(cpm_obj != VM_OBJECT_NULL);
2859	assert(cpm_obj->internal);
2860	assert(cpm_obj->vo_size == (vm_object_size_t)size);
2861	assert(cpm_obj->can_persist == FALSE);
2862	assert(cpm_obj->pager_created == FALSE);
2863	assert(cpm_obj->pageout == FALSE);
2864	assert(cpm_obj->shadow == VM_OBJECT_NULL);
2865
2866	/*
2867	 *	Insert pages into object.
2868	 */
2869
2870	vm_object_lock(cpm_obj);
2871	for (offset = 0; offset < size; offset += PAGE_SIZE) {
2872		m = pages;
2873		pages = NEXT_PAGE(m);
2874		*(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2875
2876		assert(!m->gobbled);
2877		assert(!m->wanted);
2878		assert(!m->pageout);
2879		assert(!m->tabled);
2880		assert(VM_PAGE_WIRED(m));
2881		/*
2882		 * ENCRYPTED SWAP:
2883		 * "m" is not supposed to be pageable, so it
2884		 * should not be encrypted.  It wouldn't be safe
2885		 * to enter it in a new VM object while encrypted.
2886		 */
2887		ASSERT_PAGE_DECRYPTED(m);
2888		assert(m->busy);
2889		assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
2890
2891		m->busy = FALSE;
2892		vm_page_insert(m, cpm_obj, offset);
2893	}
2894	assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
2895	vm_object_unlock(cpm_obj);
2896
2897	/*
2898	 *	Hang onto a reference on the object in case a
2899	 *	multi-threaded application for some reason decides
2900	 *	to deallocate the portion of the address space into
2901	 *	which we will insert this object.
2902	 *
2903	 *	Unfortunately, we must insert the object now before
2904	 *	we can talk to the pmap module about which addresses
2905	 *	must be wired down.  Hence, the race with a multi-
2906	 *	threaded app.
2907	 */
2908	vm_object_reference(cpm_obj);
2909
2910	/*
2911	 *	Insert object into map.
2912	 */
2913
2914	kr = vm_map_enter(
2915		map,
2916		addr,
2917		size,
2918		(vm_map_offset_t)0,
2919		flags,
2920		cpm_obj,
2921		(vm_object_offset_t)0,
2922		FALSE,
2923		VM_PROT_ALL,
2924		VM_PROT_ALL,
2925		VM_INHERIT_DEFAULT);
2926
2927	if (kr != KERN_SUCCESS) {
2928		/*
2929		 *	A CPM object doesn't have can_persist set,
2930		 *	so all we have to do is deallocate it to
2931		 *	free up these pages.
2932		 */
2933		assert(cpm_obj->pager_created == FALSE);
2934		assert(cpm_obj->can_persist == FALSE);
2935		assert(cpm_obj->pageout == FALSE);
2936		assert(cpm_obj->shadow == VM_OBJECT_NULL);
2937		vm_object_deallocate(cpm_obj); /* kill acquired ref */
2938		vm_object_deallocate(cpm_obj); /* kill creation ref */
2939	}
2940
2941	/*
2942	 *	Inform the physical mapping system that the
2943	 *	range of addresses may not fault, so that
2944	 *	page tables and such can be locked down as well.
2945	 */
2946	start = *addr;
2947	end = start + size;
2948	pmap = vm_map_pmap(map);
2949	pmap_pageable(pmap, start, end, FALSE);
2950
2951	/*
2952	 *	Enter each page into the pmap, to avoid faults.
2953	 *	Note that this loop could be coded more efficiently,
2954	 *	if the need arose, rather than looking up each page
2955	 *	again.
2956	 */
2957	for (offset = 0, va = start; offset < size;
2958	     va += PAGE_SIZE, offset += PAGE_SIZE) {
2959	        int type_of_fault;
2960
2961		vm_object_lock(cpm_obj);
2962		m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2963		assert(m != VM_PAGE_NULL);
2964
2965		vm_page_zero_fill(m);
2966
2967		type_of_fault = DBG_ZERO_FILL_FAULT;
2968
2969		vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
2970			       VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL,
2971			       &type_of_fault);
2972
2973		vm_object_unlock(cpm_obj);
2974	}
2975
2976#if	MACH_ASSERT
2977	/*
2978	 *	Verify ordering in address space.
2979	 */
2980	for (offset = 0; offset < size; offset += PAGE_SIZE) {
2981		vm_object_lock(cpm_obj);
2982		m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2983		vm_object_unlock(cpm_obj);
2984		if (m == VM_PAGE_NULL)
2985			panic("vm_allocate_cpm:  obj %p off 0x%llx no page",
2986			      cpm_obj, (uint64_t)offset);
2987		assert(m->tabled);
2988		assert(!m->busy);
2989		assert(!m->wanted);
2990		assert(!m->fictitious);
2991		assert(!m->private);
2992		assert(!m->absent);
2993		assert(!m->error);
2994		assert(!m->cleaning);
2995		assert(!m->laundry);
2996		assert(!m->precious);
2997		assert(!m->clustered);
2998		if (offset != 0) {
2999			if (m->phys_page != prev_addr + 1) {
3000				printf("start 0x%llx end 0x%llx va 0x%llx\n",
3001				       (uint64_t)start, (uint64_t)end, (uint64_t)va);
3002				printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
3003				printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
3004				panic("vm_allocate_cpm:  pages not contig!");
3005			}
3006		}
3007		prev_addr = m->phys_page;
3008	}
3009#endif	/* MACH_ASSERT */
3010
3011	vm_object_deallocate(cpm_obj); /* kill extra ref */
3012
3013	return kr;
3014}
3015
3016
3017#else	/* VM_CPM */
3018
3019/*
3020 *	Interface is defined in all cases, but unless the kernel
3021 *	is built explicitly for this option, the interface does
3022 *	nothing.
3023 */
3024
3025kern_return_t
3026vm_map_enter_cpm(
3027	__unused vm_map_t	map,
3028	__unused vm_map_offset_t	*addr,
3029	__unused vm_map_size_t	size,
3030	__unused int		flags)
3031{
3032	return KERN_FAILURE;
3033}
3034#endif /* VM_CPM */
3035
3036/* Not used without nested pmaps */
3037#ifndef NO_NESTED_PMAP
3038/*
3039 * Clip and unnest a portion of a nested submap mapping.
3040 */
3041
3042
3043static void
3044vm_map_clip_unnest(
3045	vm_map_t	map,
3046	vm_map_entry_t	entry,
3047	vm_map_offset_t	start_unnest,
3048	vm_map_offset_t	end_unnest)
3049{
3050	vm_map_offset_t old_start_unnest = start_unnest;
3051	vm_map_offset_t old_end_unnest = end_unnest;
3052
3053	assert(entry->is_sub_map);
3054	assert(entry->object.sub_map != NULL);
3055
3056	/*
3057	 * Query the platform for the optimal unnest range.
3058	 * DRK: There's some duplication of effort here, since
3059	 * callers may have adjusted the range to some extent. This
3060	 * routine was introduced to support 1GiB subtree nesting
3061	 * for x86 platforms, which can also nest on 2MiB boundaries
3062	 * depending on size/alignment.
3063	 */
3064	if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
3065		log_unnest_badness(map, old_start_unnest, old_end_unnest);
3066	}
3067
3068	if (entry->vme_start > start_unnest ||
3069	    entry->vme_end < end_unnest) {
3070		panic("vm_map_clip_unnest(0x%llx,0x%llx): "
3071		      "bad nested entry: start=0x%llx end=0x%llx\n",
3072		      (long long)start_unnest, (long long)end_unnest,
3073		      (long long)entry->vme_start, (long long)entry->vme_end);
3074	}
3075
3076	if (start_unnest > entry->vme_start) {
3077		_vm_map_clip_start(&map->hdr,
3078				   entry,
3079				   start_unnest);
3080		vm_map_store_update_first_free(map, map->first_free);
3081	}
3082	if (entry->vme_end > end_unnest) {
3083		_vm_map_clip_end(&map->hdr,
3084				 entry,
3085				 end_unnest);
3086		vm_map_store_update_first_free(map, map->first_free);
3087	}
3088
3089	pmap_unnest(map->pmap,
3090		    entry->vme_start,
3091		    entry->vme_end - entry->vme_start);
3092	if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
3093		/* clean up parent map/maps */
3094		vm_map_submap_pmap_clean(
3095			map, entry->vme_start,
3096			entry->vme_end,
3097			entry->object.sub_map,
3098			entry->offset);
3099	}
3100	entry->use_pmap = FALSE;
3101	if (entry->alias == VM_MEMORY_SHARED_PMAP) {
3102		entry->alias = VM_MEMORY_UNSHARED_PMAP;
3103	}
3104}
3105#endif	/* NO_NESTED_PMAP */
3106
3107/*
3108 *	vm_map_clip_start:	[ internal use only ]
3109 *
3110 *	Asserts that the given entry begins at or after
3111 *	the specified address; if necessary,
3112 *	it splits the entry into two.
3113 */
3114void
3115vm_map_clip_start(
3116	vm_map_t	map,
3117	vm_map_entry_t	entry,
3118	vm_map_offset_t	startaddr)
3119{
3120#ifndef NO_NESTED_PMAP
3121	if (entry->use_pmap &&
3122	    startaddr >= entry->vme_start) {
3123		vm_map_offset_t	start_unnest, end_unnest;
3124
3125		/*
3126		 * Make sure "startaddr" is no longer in a nested range
3127		 * before we clip.  Unnest only the minimum range the platform
3128		 * can handle.
3129		 * vm_map_clip_unnest may perform additional adjustments to
3130		 * the unnest range.
3131		 */
3132		start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
3133		end_unnest = start_unnest + pmap_nesting_size_min;
3134		vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3135	}
3136#endif /* NO_NESTED_PMAP */
3137	if (startaddr > entry->vme_start) {
3138		if (entry->object.vm_object &&
3139		    !entry->is_sub_map &&
3140		    entry->object.vm_object->phys_contiguous) {
3141			pmap_remove(map->pmap,
3142				    (addr64_t)(entry->vme_start),
3143				    (addr64_t)(entry->vme_end));
3144		}
3145		_vm_map_clip_start(&map->hdr, entry, startaddr);
3146		vm_map_store_update_first_free(map, map->first_free);
3147	}
3148}
3149
3150
3151#define vm_map_copy_clip_start(copy, entry, startaddr) \
3152	MACRO_BEGIN \
3153	if ((startaddr) > (entry)->vme_start) \
3154		_vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
3155	MACRO_END
3156
3157/*
3158 *	This routine is called only when it is known that
3159 *	the entry must be split.
3160 */
3161static void
3162_vm_map_clip_start(
3163	register struct vm_map_header	*map_header,
3164	register vm_map_entry_t		entry,
3165	register vm_map_offset_t		start)
3166{
3167	register vm_map_entry_t	new_entry;
3168
3169	/*
3170	 *	Split off the front portion --
3171	 *	note that we must insert the new
3172	 *	entry BEFORE this one, so that
3173	 *	this entry has the specified starting
3174	 *	address.
3175	 */
3176
3177	new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
3178	vm_map_entry_copy_full(new_entry, entry);
3179
3180	new_entry->vme_end = start;
3181	assert(new_entry->vme_start < new_entry->vme_end);
3182	entry->offset += (start - entry->vme_start);
3183	assert(start < entry->vme_end);
3184	entry->vme_start = start;
3185
3186	_vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
3187
3188	if (entry->is_sub_map)
3189		vm_map_reference(new_entry->object.sub_map);
3190	else
3191		vm_object_reference(new_entry->object.vm_object);
3192}
3193
3194
3195/*
3196 *	vm_map_clip_end:	[ internal use only ]
3197 *
3198 *	Asserts that the given entry ends at or before
3199 *	the specified address; if necessary,
3200 *	it splits the entry into two.
3201 */
3202void
3203vm_map_clip_end(
3204	vm_map_t	map,
3205	vm_map_entry_t	entry,
3206	vm_map_offset_t	endaddr)
3207{
3208	if (endaddr > entry->vme_end) {
3209		/*
3210		 * Within the scope of this clipping, limit "endaddr" to
3211		 * the end of this map entry...
3212		 */
3213		endaddr = entry->vme_end;
3214	}
3215#ifndef NO_NESTED_PMAP
3216	if (entry->use_pmap) {
3217		vm_map_offset_t	start_unnest, end_unnest;
3218
3219		/*
3220		 * Make sure the range between the start of this entry and
3221		 * the new "endaddr" is no longer nested before we clip.
3222		 * Unnest only the minimum range the platform can handle.
3223		 * vm_map_clip_unnest may perform additional adjustments to
3224		 * the unnest range.
3225		 */
3226		start_unnest = entry->vme_start;
3227		end_unnest =
3228			(endaddr + pmap_nesting_size_min - 1) &
3229			~(pmap_nesting_size_min - 1);
3230		vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3231	}
3232#endif /* NO_NESTED_PMAP */
3233	if (endaddr < entry->vme_end) {
3234		if (entry->object.vm_object &&
3235		    !entry->is_sub_map &&
3236		    entry->object.vm_object->phys_contiguous) {
3237			pmap_remove(map->pmap,
3238				    (addr64_t)(entry->vme_start),
3239				    (addr64_t)(entry->vme_end));
3240		}
3241		_vm_map_clip_end(&map->hdr, entry, endaddr);
3242		vm_map_store_update_first_free(map, map->first_free);
3243	}
3244}
3245
3246
3247#define vm_map_copy_clip_end(copy, entry, endaddr) \
3248	MACRO_BEGIN \
3249	if ((endaddr) < (entry)->vme_end) \
3250		_vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
3251	MACRO_END
3252
3253/*
3254 *	This routine is called only when it is known that
3255 *	the entry must be split.
3256 */
3257static void
3258_vm_map_clip_end(
3259	register struct vm_map_header	*map_header,
3260	register vm_map_entry_t		entry,
3261	register vm_map_offset_t	end)
3262{
3263	register vm_map_entry_t	new_entry;
3264
3265	/*
3266	 *	Create a new entry and insert it
3267	 *	AFTER the specified entry
3268	 */
3269
3270	new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
3271	vm_map_entry_copy_full(new_entry, entry);
3272
3273	assert(entry->vme_start < end);
3274	new_entry->vme_start = entry->vme_end = end;
3275	new_entry->offset += (end - entry->vme_start);
3276	assert(new_entry->vme_start < new_entry->vme_end);
3277
3278	_vm_map_store_entry_link(map_header, entry, new_entry);
3279
3280	if (entry->is_sub_map)
3281		vm_map_reference(new_entry->object.sub_map);
3282	else
3283		vm_object_reference(new_entry->object.vm_object);
3284}
3285
3286
3287/*
3288 *	VM_MAP_RANGE_CHECK:	[ internal use only ]
3289 *
3290 *	Asserts that the starting and ending region
3291 *	addresses fall within the valid range of the map.
3292 */
3293#define	VM_MAP_RANGE_CHECK(map, start, end)	\
3294	MACRO_BEGIN				\
3295	if (start < vm_map_min(map))		\
3296		start = vm_map_min(map);	\
3297	if (end > vm_map_max(map))		\
3298		end = vm_map_max(map);		\
3299	if (start > end)			\
3300		start = end;			\
3301	MACRO_END
3302
3303/*
3304 *	vm_map_range_check:	[ internal use only ]
3305 *
3306 *	Check that the region defined by the specified start and
3307 *	end addresses are wholly contained within a single map
3308 *	entry or set of adjacent map entries of the spacified map,
3309 *	i.e. the specified region contains no unmapped space.
3310 *	If any or all of the region is unmapped, FALSE is returned.
3311 *	Otherwise, TRUE is returned and if the output argument 'entry'
3312 *	is not NULL it points to the map entry containing the start
3313 *	of the region.
3314 *
3315 *	The map is locked for reading on entry and is left locked.
3316 */
3317static boolean_t
3318vm_map_range_check(
3319	register vm_map_t	map,
3320	register vm_map_offset_t	start,
3321	register vm_map_offset_t	end,
3322	vm_map_entry_t		*entry)
3323{
3324	vm_map_entry_t		cur;
3325	register vm_map_offset_t	prev;
3326
3327	/*
3328	 * 	Basic sanity checks first
3329	 */
3330	if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
3331		return (FALSE);
3332
3333	/*
3334	 * 	Check first if the region starts within a valid
3335	 *	mapping for the map.
3336	 */
3337	if (!vm_map_lookup_entry(map, start, &cur))
3338		return (FALSE);
3339
3340	/*
3341	 *	Optimize for the case that the region is contained
3342	 *	in a single map entry.
3343	 */
3344	if (entry != (vm_map_entry_t *) NULL)
3345		*entry = cur;
3346	if (end <= cur->vme_end)
3347		return (TRUE);
3348
3349	/*
3350	 * 	If the region is not wholly contained within a
3351	 * 	single entry, walk the entries looking for holes.
3352	 */
3353	prev = cur->vme_end;
3354	cur = cur->vme_next;
3355	while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
3356		if (end <= cur->vme_end)
3357			return (TRUE);
3358		prev = cur->vme_end;
3359		cur = cur->vme_next;
3360	}
3361	return (FALSE);
3362}
3363
3364/*
3365 *	vm_map_submap:		[ kernel use only ]
3366 *
3367 *	Mark the given range as handled by a subordinate map.
3368 *
3369 *	This range must have been created with vm_map_find using
3370 *	the vm_submap_object, and no other operations may have been
3371 *	performed on this range prior to calling vm_map_submap.
3372 *
3373 *	Only a limited number of operations can be performed
3374 *	within this rage after calling vm_map_submap:
3375 *		vm_fault
3376 *	[Don't try vm_map_copyin!]
3377 *
3378 *	To remove a submapping, one must first remove the
3379 *	range from the superior map, and then destroy the
3380 *	submap (if desired).  [Better yet, don't try it.]
3381 */
3382kern_return_t
3383vm_map_submap(
3384	vm_map_t		map,
3385	vm_map_offset_t	start,
3386	vm_map_offset_t	end,
3387	vm_map_t		submap,
3388	vm_map_offset_t	offset,
3389#ifdef NO_NESTED_PMAP
3390	__unused
3391#endif	/* NO_NESTED_PMAP */
3392	boolean_t		use_pmap)
3393{
3394	vm_map_entry_t		entry;
3395	register kern_return_t	result = KERN_INVALID_ARGUMENT;
3396	register vm_object_t	object;
3397
3398	vm_map_lock(map);
3399
3400	if (! vm_map_lookup_entry(map, start, &entry)) {
3401		entry = entry->vme_next;
3402	}
3403
3404	if (entry == vm_map_to_entry(map) ||
3405	    entry->is_sub_map) {
3406		vm_map_unlock(map);
3407		return KERN_INVALID_ARGUMENT;
3408	}
3409
3410	assert(!entry->use_pmap); /* we don't want to unnest anything here */
3411	vm_map_clip_start(map, entry, start);
3412	vm_map_clip_end(map, entry, end);
3413
3414	if ((entry->vme_start == start) && (entry->vme_end == end) &&
3415	    (!entry->is_sub_map) &&
3416	    ((object = entry->object.vm_object) == vm_submap_object) &&
3417	    (object->resident_page_count == 0) &&
3418	    (object->copy == VM_OBJECT_NULL) &&
3419	    (object->shadow == VM_OBJECT_NULL) &&
3420	    (!object->pager_created)) {
3421		entry->offset = (vm_object_offset_t)offset;
3422		entry->object.vm_object = VM_OBJECT_NULL;
3423		vm_object_deallocate(object);
3424		entry->is_sub_map = TRUE;
3425		entry->object.sub_map = submap;
3426		vm_map_reference(submap);
3427		if (submap->mapped_in_other_pmaps == FALSE &&
3428		    vm_map_pmap(submap) != PMAP_NULL &&
3429		    vm_map_pmap(submap) != vm_map_pmap(map)) {
3430			/*
3431			 * This submap is being mapped in a map
3432			 * that uses a different pmap.
3433			 * Set its "mapped_in_other_pmaps" flag
3434			 * to indicate that we now need to
3435			 * remove mappings from all pmaps rather
3436			 * than just the submap's pmap.
3437			 */
3438			submap->mapped_in_other_pmaps = TRUE;
3439		}
3440
3441#ifndef NO_NESTED_PMAP
3442		if (use_pmap) {
3443			/* nest if platform code will allow */
3444			if(submap->pmap == NULL) {
3445				ledger_t ledger = map->pmap->ledger;
3446				submap->pmap = pmap_create(ledger,
3447						(vm_map_size_t) 0, FALSE);
3448				if(submap->pmap == PMAP_NULL) {
3449					vm_map_unlock(map);
3450					return(KERN_NO_SPACE);
3451				}
3452			}
3453			result = pmap_nest(map->pmap,
3454					   (entry->object.sub_map)->pmap,
3455					   (addr64_t)start,
3456					   (addr64_t)start,
3457					   (uint64_t)(end - start));
3458			if(result)
3459				panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
3460			entry->use_pmap = TRUE;
3461		}
3462#else	/* NO_NESTED_PMAP */
3463		pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
3464#endif	/* NO_NESTED_PMAP */
3465		result = KERN_SUCCESS;
3466	}
3467	vm_map_unlock(map);
3468
3469	return(result);
3470}
3471
3472/*
3473 *	vm_map_protect:
3474 *
3475 *	Sets the protection of the specified address
3476 *	region in the target map.  If "set_max" is
3477 *	specified, the maximum protection is to be set;
3478 *	otherwise, only the current protection is affected.
3479 */
3480kern_return_t
3481vm_map_protect(
3482	register vm_map_t	map,
3483	register vm_map_offset_t	start,
3484	register vm_map_offset_t	end,
3485	register vm_prot_t	new_prot,
3486	register boolean_t	set_max)
3487{
3488	register vm_map_entry_t		current;
3489	register vm_map_offset_t	prev;
3490	vm_map_entry_t			entry;
3491	vm_prot_t			new_max;
3492
3493	XPR(XPR_VM_MAP,
3494	    "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
3495	    map, start, end, new_prot, set_max);
3496
3497	vm_map_lock(map);
3498
3499	/* LP64todo - remove this check when vm_map_commpage64()
3500	 * no longer has to stuff in a map_entry for the commpage
3501	 * above the map's max_offset.
3502	 */
3503	if (start >= map->max_offset) {
3504		vm_map_unlock(map);
3505		return(KERN_INVALID_ADDRESS);
3506	}
3507
3508	while(1) {
3509		/*
3510		 * 	Lookup the entry.  If it doesn't start in a valid
3511		 *	entry, return an error.
3512		 */
3513		if (! vm_map_lookup_entry(map, start, &entry)) {
3514			vm_map_unlock(map);
3515			return(KERN_INVALID_ADDRESS);
3516		}
3517
3518		if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
3519			start = SUPERPAGE_ROUND_DOWN(start);
3520			continue;
3521		}
3522		break;
3523 	}
3524	if (entry->superpage_size)
3525 		end = SUPERPAGE_ROUND_UP(end);
3526
3527	/*
3528	 *	Make a first pass to check for protection and address
3529	 *	violations.
3530	 */
3531
3532	current = entry;
3533	prev = current->vme_start;
3534	while ((current != vm_map_to_entry(map)) &&
3535	       (current->vme_start < end)) {
3536
3537		/*
3538		 * If there is a hole, return an error.
3539		 */
3540		if (current->vme_start != prev) {
3541			vm_map_unlock(map);
3542			return(KERN_INVALID_ADDRESS);
3543		}
3544
3545		new_max = current->max_protection;
3546		if(new_prot & VM_PROT_COPY) {
3547			new_max |= VM_PROT_WRITE;
3548			if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
3549				vm_map_unlock(map);
3550				return(KERN_PROTECTION_FAILURE);
3551			}
3552		} else {
3553			if ((new_prot & new_max) != new_prot) {
3554				vm_map_unlock(map);
3555				return(KERN_PROTECTION_FAILURE);
3556			}
3557		}
3558
3559#if CONFIG_EMBEDDED
3560		if (new_prot & VM_PROT_WRITE) {
3561			if ((new_prot & VM_PROT_EXECUTE) && !(current->used_for_jit)) {
3562				printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
3563				new_prot &= ~VM_PROT_EXECUTE;
3564			}
3565		}
3566#endif
3567
3568		prev = current->vme_end;
3569		current = current->vme_next;
3570	}
3571	if (end > prev) {
3572		vm_map_unlock(map);
3573		return(KERN_INVALID_ADDRESS);
3574	}
3575
3576	/*
3577	 *	Go back and fix up protections.
3578	 *	Clip to start here if the range starts within
3579	 *	the entry.
3580	 */
3581
3582	current = entry;
3583	if (current != vm_map_to_entry(map)) {
3584		/* clip and unnest if necessary */
3585		vm_map_clip_start(map, current, start);
3586	}
3587
3588	while ((current != vm_map_to_entry(map)) &&
3589	       (current->vme_start < end)) {
3590
3591		vm_prot_t	old_prot;
3592
3593		vm_map_clip_end(map, current, end);
3594
3595		assert(!current->use_pmap); /* clipping did unnest if needed */
3596
3597		old_prot = current->protection;
3598
3599		if(new_prot & VM_PROT_COPY) {
3600			/* caller is asking specifically to copy the      */
3601			/* mapped data, this implies that max protection  */
3602			/* will include write.  Caller must be prepared   */
3603			/* for loss of shared memory communication in the */
3604			/* target area after taking this step */
3605
3606			if (current->is_sub_map == FALSE && current->object.vm_object == VM_OBJECT_NULL){
3607				current->object.vm_object = vm_object_allocate((vm_map_size_t)(current->vme_end - current->vme_start));
3608				current->offset = 0;
3609			}
3610			current->needs_copy = TRUE;
3611			current->max_protection |= VM_PROT_WRITE;
3612		}
3613
3614		if (set_max)
3615			current->protection =
3616				(current->max_protection =
3617				 new_prot & ~VM_PROT_COPY) &
3618				old_prot;
3619		else
3620			current->protection = new_prot & ~VM_PROT_COPY;
3621
3622		/*
3623		 *	Update physical map if necessary.
3624		 *	If the request is to turn off write protection,
3625		 *	we won't do it for real (in pmap). This is because
3626		 *	it would cause copy-on-write to fail.  We've already
3627		 *	set, the new protection in the map, so if a
3628		 *	write-protect fault occurred, it will be fixed up
3629		 *	properly, COW or not.
3630		 */
3631		if (current->protection != old_prot) {
3632			/* Look one level in we support nested pmaps */
3633			/* from mapped submaps which are direct entries */
3634			/* in our map */
3635
3636			vm_prot_t prot;
3637
3638			prot = current->protection & ~VM_PROT_WRITE;
3639
3640			if (override_nx(map, current->alias) && prot)
3641			        prot |= VM_PROT_EXECUTE;
3642
3643			if (current->is_sub_map && current->use_pmap) {
3644				pmap_protect(current->object.sub_map->pmap,
3645					     current->vme_start,
3646					     current->vme_end,
3647					     prot);
3648			} else {
3649				pmap_protect(map->pmap,
3650					     current->vme_start,
3651					     current->vme_end,
3652					     prot);
3653			}
3654		}
3655		current = current->vme_next;
3656	}
3657
3658	current = entry;
3659	while ((current != vm_map_to_entry(map)) &&
3660	       (current->vme_start <= end)) {
3661		vm_map_simplify_entry(map, current);
3662		current = current->vme_next;
3663	}
3664
3665	vm_map_unlock(map);
3666	return(KERN_SUCCESS);
3667}
3668
3669/*
3670 *	vm_map_inherit:
3671 *
3672 *	Sets the inheritance of the specified address
3673 *	range in the target map.  Inheritance
3674 *	affects how the map will be shared with
3675 *	child maps at the time of vm_map_fork.
3676 */
3677kern_return_t
3678vm_map_inherit(
3679	register vm_map_t	map,
3680	register vm_map_offset_t	start,
3681	register vm_map_offset_t	end,
3682	register vm_inherit_t	new_inheritance)
3683{
3684	register vm_map_entry_t	entry;
3685	vm_map_entry_t	temp_entry;
3686
3687	vm_map_lock(map);
3688
3689	VM_MAP_RANGE_CHECK(map, start, end);
3690
3691	if (vm_map_lookup_entry(map, start, &temp_entry)) {
3692		entry = temp_entry;
3693	}
3694	else {
3695		temp_entry = temp_entry->vme_next;
3696		entry = temp_entry;
3697	}
3698
3699	/* first check entire range for submaps which can't support the */
3700	/* given inheritance. */
3701	while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3702		if(entry->is_sub_map) {
3703			if(new_inheritance == VM_INHERIT_COPY) {
3704				vm_map_unlock(map);
3705				return(KERN_INVALID_ARGUMENT);
3706			}
3707		}
3708
3709		entry = entry->vme_next;
3710	}
3711
3712	entry = temp_entry;
3713	if (entry != vm_map_to_entry(map)) {
3714		/* clip and unnest if necessary */
3715		vm_map_clip_start(map, entry, start);
3716	}
3717
3718	while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3719		vm_map_clip_end(map, entry, end);
3720		assert(!entry->use_pmap); /* clip did unnest if needed */
3721
3722		entry->inheritance = new_inheritance;
3723
3724		entry = entry->vme_next;
3725	}
3726
3727	vm_map_unlock(map);
3728	return(KERN_SUCCESS);
3729}
3730
3731/*
3732 * Update the accounting for the amount of wired memory in this map.  If the user has
3733 * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
3734 */
3735
3736static kern_return_t
3737add_wire_counts(
3738	vm_map_t	map,
3739	vm_map_entry_t	entry,
3740	boolean_t	user_wire)
3741{
3742	vm_map_size_t	size;
3743
3744	if (user_wire) {
3745		unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
3746
3747		/*
3748		 * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
3749		 * this map entry.
3750		 */
3751
3752		if (entry->user_wired_count == 0) {
3753			size = entry->vme_end - entry->vme_start;
3754
3755			/*
3756			 * Since this is the first time the user is wiring this map entry, check to see if we're
3757			 * exceeding the user wire limits.  There is a per map limit which is the smaller of either
3758			 * the process's rlimit or the global vm_user_wire_limit which caps this value.  There is also
3759			 * a system-wide limit on the amount of memory all users can wire.  If the user is over either
3760			 * limit, then we fail.
3761			 */
3762
3763			if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
3764			   size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
3765		    	   size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
3766				return KERN_RESOURCE_SHORTAGE;
3767
3768			/*
3769			 * The first time the user wires an entry, we also increment the wired_count and add this to
3770			 * the total that has been wired in the map.
3771			 */
3772
3773			if (entry->wired_count >= MAX_WIRE_COUNT)
3774				return KERN_FAILURE;
3775
3776			entry->wired_count++;
3777			map->user_wire_size += size;
3778		}
3779
3780		if (entry->user_wired_count >= MAX_WIRE_COUNT)
3781			return KERN_FAILURE;
3782
3783		entry->user_wired_count++;
3784
3785	} else {
3786
3787		/*
3788		 * The kernel's wiring the memory.  Just bump the count and continue.
3789		 */
3790
3791		if (entry->wired_count >= MAX_WIRE_COUNT)
3792			panic("vm_map_wire: too many wirings");
3793
3794		entry->wired_count++;
3795	}
3796
3797	return KERN_SUCCESS;
3798}
3799
3800/*
3801 * Update the memory wiring accounting now that the given map entry is being unwired.
3802 */
3803
3804static void
3805subtract_wire_counts(
3806	vm_map_t	map,
3807	vm_map_entry_t	entry,
3808	boolean_t	user_wire)
3809{
3810
3811	if (user_wire) {
3812
3813		/*
3814		 * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
3815		 */
3816
3817		if (entry->user_wired_count == 1) {
3818
3819			/*
3820			 * We're removing the last user wire reference.  Decrement the wired_count and the total
3821			 * user wired memory for this map.
3822			 */
3823
3824			assert(entry->wired_count >= 1);
3825			entry->wired_count--;
3826			map->user_wire_size -= entry->vme_end - entry->vme_start;
3827		}
3828
3829		assert(entry->user_wired_count >= 1);
3830		entry->user_wired_count--;
3831
3832	} else {
3833
3834		/*
3835		 * The kernel is unwiring the memory.   Just update the count.
3836		 */
3837
3838		assert(entry->wired_count >= 1);
3839		entry->wired_count--;
3840	}
3841}
3842
3843/*
3844 *	vm_map_wire:
3845 *
3846 *	Sets the pageability of the specified address range in the
3847 *	target map as wired.  Regions specified as not pageable require
3848 *	locked-down physical memory and physical page maps.  The
3849 *	access_type variable indicates types of accesses that must not
3850 *	generate page faults.  This is checked against protection of
3851 *	memory being locked-down.
3852 *
3853 *	The map must not be locked, but a reference must remain to the
3854 *	map throughout the call.
3855 */
3856static kern_return_t
3857vm_map_wire_nested(
3858	register vm_map_t	map,
3859	register vm_map_offset_t	start,
3860	register vm_map_offset_t	end,
3861	register vm_prot_t	access_type,
3862	boolean_t		user_wire,
3863	pmap_t			map_pmap,
3864	vm_map_offset_t		pmap_addr)
3865{
3866	register vm_map_entry_t	entry;
3867	struct vm_map_entry	*first_entry, tmp_entry;
3868	vm_map_t		real_map;
3869	register vm_map_offset_t	s,e;
3870	kern_return_t		rc;
3871	boolean_t		need_wakeup;
3872	boolean_t		main_map = FALSE;
3873	wait_interrupt_t	interruptible_state;
3874	thread_t		cur_thread;
3875	unsigned int		last_timestamp;
3876	vm_map_size_t		size;
3877
3878	vm_map_lock(map);
3879	if(map_pmap == NULL)
3880		main_map = TRUE;
3881	last_timestamp = map->timestamp;
3882
3883	VM_MAP_RANGE_CHECK(map, start, end);
3884	assert(page_aligned(start));
3885	assert(page_aligned(end));
3886	if (start == end) {
3887		/* We wired what the caller asked for, zero pages */
3888		vm_map_unlock(map);
3889		return KERN_SUCCESS;
3890	}
3891
3892	need_wakeup = FALSE;
3893	cur_thread = current_thread();
3894
3895	s = start;
3896	rc = KERN_SUCCESS;
3897
3898	if (vm_map_lookup_entry(map, s, &first_entry)) {
3899		entry = first_entry;
3900		/*
3901		 * vm_map_clip_start will be done later.
3902		 * We don't want to unnest any nested submaps here !
3903		 */
3904	} else {
3905		/* Start address is not in map */
3906		rc = KERN_INVALID_ADDRESS;
3907		goto done;
3908	}
3909
3910	while ((entry != vm_map_to_entry(map)) && (s < end)) {
3911		/*
3912		 * At this point, we have wired from "start" to "s".
3913		 * We still need to wire from "s" to "end".
3914		 *
3915		 * "entry" hasn't been clipped, so it could start before "s"
3916		 * and/or end after "end".
3917		 */
3918
3919		/* "e" is how far we want to wire in this entry */
3920		e = entry->vme_end;
3921		if (e > end)
3922			e = end;
3923
3924		/*
3925		 * If another thread is wiring/unwiring this entry then
3926		 * block after informing other thread to wake us up.
3927		 */
3928		if (entry->in_transition) {
3929			wait_result_t wait_result;
3930
3931			/*
3932			 * We have not clipped the entry.  Make sure that
3933			 * the start address is in range so that the lookup
3934			 * below will succeed.
3935			 * "s" is the current starting point: we've already
3936			 * wired from "start" to "s" and we still have
3937			 * to wire from "s" to "end".
3938			 */
3939
3940			entry->needs_wakeup = TRUE;
3941
3942			/*
3943			 * wake up anybody waiting on entries that we have
3944			 * already wired.
3945			 */
3946			if (need_wakeup) {
3947				vm_map_entry_wakeup(map);
3948				need_wakeup = FALSE;
3949			}
3950			/*
3951			 * User wiring is interruptible
3952			 */
3953			wait_result = vm_map_entry_wait(map,
3954							(user_wire) ? THREAD_ABORTSAFE :
3955							THREAD_UNINT);
3956			if (user_wire && wait_result ==	THREAD_INTERRUPTED) {
3957				/*
3958				 * undo the wirings we have done so far
3959				 * We do not clear the needs_wakeup flag,
3960				 * because we cannot tell if we were the
3961				 * only one waiting.
3962				 */
3963				rc = KERN_FAILURE;
3964				goto done;
3965			}
3966
3967			/*
3968			 * Cannot avoid a lookup here. reset timestamp.
3969			 */
3970			last_timestamp = map->timestamp;
3971
3972			/*
3973			 * The entry could have been clipped, look it up again.
3974			 * Worse that can happen is, it may not exist anymore.
3975			 */
3976			if (!vm_map_lookup_entry(map, s, &first_entry)) {
3977				/*
3978				 * User: undo everything upto the previous
3979				 * entry.  let vm_map_unwire worry about
3980				 * checking the validity of the range.
3981				 */
3982				rc = KERN_FAILURE;
3983				goto done;
3984			}
3985			entry = first_entry;
3986			continue;
3987		}
3988
3989		if (entry->is_sub_map) {
3990			vm_map_offset_t	sub_start;
3991			vm_map_offset_t	sub_end;
3992			vm_map_offset_t	local_start;
3993			vm_map_offset_t	local_end;
3994			pmap_t		pmap;
3995
3996			vm_map_clip_start(map, entry, s);
3997			vm_map_clip_end(map, entry, end);
3998
3999			sub_start = entry->offset;
4000			sub_end = entry->vme_end;
4001			sub_end += entry->offset - entry->vme_start;
4002
4003			local_end = entry->vme_end;
4004			if(map_pmap == NULL) {
4005				vm_object_t		object;
4006				vm_object_offset_t	offset;
4007				vm_prot_t		prot;
4008				boolean_t		wired;
4009				vm_map_entry_t		local_entry;
4010				vm_map_version_t	 version;
4011				vm_map_t		lookup_map;
4012
4013				if(entry->use_pmap) {
4014					pmap = entry->object.sub_map->pmap;
4015					/* ppc implementation requires that */
4016					/* submaps pmap address ranges line */
4017					/* up with parent map */
4018#ifdef notdef
4019					pmap_addr = sub_start;
4020#endif
4021					pmap_addr = s;
4022				} else {
4023					pmap = map->pmap;
4024					pmap_addr = s;
4025				}
4026
4027				if (entry->wired_count) {
4028					if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4029						goto done;
4030
4031					/*
4032					 * The map was not unlocked:
4033					 * no need to goto re-lookup.
4034					 * Just go directly to next entry.
4035					 */
4036					entry = entry->vme_next;
4037					s = entry->vme_start;
4038					continue;
4039
4040				}
4041
4042				/* call vm_map_lookup_locked to */
4043				/* cause any needs copy to be   */
4044				/* evaluated */
4045				local_start = entry->vme_start;
4046				lookup_map = map;
4047				vm_map_lock_write_to_read(map);
4048				if(vm_map_lookup_locked(
4049					   &lookup_map, local_start,
4050					   access_type,
4051					   OBJECT_LOCK_EXCLUSIVE,
4052					   &version, &object,
4053					   &offset, &prot, &wired,
4054					   NULL,
4055					   &real_map)) {
4056
4057					vm_map_unlock_read(lookup_map);
4058					vm_map_unwire(map, start,
4059						      s, user_wire);
4060					return(KERN_FAILURE);
4061				}
4062				vm_object_unlock(object);
4063				if(real_map != lookup_map)
4064					vm_map_unlock(real_map);
4065				vm_map_unlock_read(lookup_map);
4066				vm_map_lock(map);
4067
4068				/* we unlocked, so must re-lookup */
4069				if (!vm_map_lookup_entry(map,
4070							 local_start,
4071							 &local_entry)) {
4072					rc = KERN_FAILURE;
4073					goto done;
4074				}
4075
4076				/*
4077				 * entry could have been "simplified",
4078				 * so re-clip
4079				 */
4080				entry = local_entry;
4081				assert(s == local_start);
4082				vm_map_clip_start(map, entry, s);
4083				vm_map_clip_end(map, entry, end);
4084				/* re-compute "e" */
4085				e = entry->vme_end;
4086				if (e > end)
4087					e = end;
4088
4089				/* did we have a change of type? */
4090				if (!entry->is_sub_map) {
4091					last_timestamp = map->timestamp;
4092					continue;
4093				}
4094			} else {
4095				local_start = entry->vme_start;
4096				pmap = map_pmap;
4097			}
4098
4099			if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4100				goto done;
4101
4102			entry->in_transition = TRUE;
4103
4104			vm_map_unlock(map);
4105			rc = vm_map_wire_nested(entry->object.sub_map,
4106						sub_start, sub_end,
4107						access_type,
4108						user_wire, pmap, pmap_addr);
4109			vm_map_lock(map);
4110
4111			/*
4112			 * Find the entry again.  It could have been clipped
4113			 * after we unlocked the map.
4114			 */
4115			if (!vm_map_lookup_entry(map, local_start,
4116						 &first_entry))
4117				panic("vm_map_wire: re-lookup failed");
4118			entry = first_entry;
4119
4120			assert(local_start == s);
4121			/* re-compute "e" */
4122			e = entry->vme_end;
4123			if (e > end)
4124				e = end;
4125
4126			last_timestamp = map->timestamp;
4127			while ((entry != vm_map_to_entry(map)) &&
4128			       (entry->vme_start < e)) {
4129				assert(entry->in_transition);
4130				entry->in_transition = FALSE;
4131				if (entry->needs_wakeup) {
4132					entry->needs_wakeup = FALSE;
4133					need_wakeup = TRUE;
4134				}
4135				if (rc != KERN_SUCCESS) {/* from vm_*_wire */
4136					subtract_wire_counts(map, entry, user_wire);
4137				}
4138				entry = entry->vme_next;
4139			}
4140			if (rc != KERN_SUCCESS) {	/* from vm_*_wire */
4141				goto done;
4142			}
4143
4144			/* no need to relookup again */
4145			s = entry->vme_start;
4146			continue;
4147		}
4148
4149		/*
4150		 * If this entry is already wired then increment
4151		 * the appropriate wire reference count.
4152		 */
4153		if (entry->wired_count) {
4154			/*
4155			 * entry is already wired down, get our reference
4156			 * after clipping to our range.
4157			 */
4158			vm_map_clip_start(map, entry, s);
4159			vm_map_clip_end(map, entry, end);
4160
4161			if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4162				goto done;
4163
4164			/* map was not unlocked: no need to relookup */
4165			entry = entry->vme_next;
4166			s = entry->vme_start;
4167			continue;
4168		}
4169
4170		/*
4171		 * Unwired entry or wire request transmitted via submap
4172		 */
4173
4174
4175		/*
4176		 * Perform actions of vm_map_lookup that need the write
4177		 * lock on the map: create a shadow object for a
4178		 * copy-on-write region, or an object for a zero-fill
4179		 * region.
4180		 */
4181		size = entry->vme_end - entry->vme_start;
4182		/*
4183		 * If wiring a copy-on-write page, we need to copy it now
4184		 * even if we're only (currently) requesting read access.
4185		 * This is aggressive, but once it's wired we can't move it.
4186		 */
4187		if (entry->needs_copy) {
4188			vm_object_shadow(&entry->object.vm_object,
4189					 &entry->offset, size);
4190			entry->needs_copy = FALSE;
4191		} else if (entry->object.vm_object == VM_OBJECT_NULL) {
4192			entry->object.vm_object = vm_object_allocate(size);
4193			entry->offset = (vm_object_offset_t)0;
4194		}
4195
4196		vm_map_clip_start(map, entry, s);
4197		vm_map_clip_end(map, entry, end);
4198
4199		/* re-compute "e" */
4200		e = entry->vme_end;
4201		if (e > end)
4202			e = end;
4203
4204		/*
4205		 * Check for holes and protection mismatch.
4206		 * Holes: Next entry should be contiguous unless this
4207		 *	  is the end of the region.
4208		 * Protection: Access requested must be allowed, unless
4209		 *	wiring is by protection class
4210		 */
4211		if ((entry->vme_end < end) &&
4212		    ((entry->vme_next == vm_map_to_entry(map)) ||
4213		     (entry->vme_next->vme_start > entry->vme_end))) {
4214			/* found a hole */
4215			rc = KERN_INVALID_ADDRESS;
4216			goto done;
4217		}
4218		if ((entry->protection & access_type) != access_type) {
4219			/* found a protection problem */
4220			rc = KERN_PROTECTION_FAILURE;
4221			goto done;
4222		}
4223
4224		assert(entry->wired_count == 0 && entry->user_wired_count == 0);
4225
4226		if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4227			goto done;
4228
4229		entry->in_transition = TRUE;
4230
4231		/*
4232		 * This entry might get split once we unlock the map.
4233		 * In vm_fault_wire(), we need the current range as
4234		 * defined by this entry.  In order for this to work
4235		 * along with a simultaneous clip operation, we make a
4236		 * temporary copy of this entry and use that for the
4237		 * wiring.  Note that the underlying objects do not
4238		 * change during a clip.
4239		 */
4240		tmp_entry = *entry;
4241
4242		/*
4243		 * The in_transition state guarentees that the entry
4244		 * (or entries for this range, if split occured) will be
4245		 * there when the map lock is acquired for the second time.
4246		 */
4247		vm_map_unlock(map);
4248
4249		if (!user_wire && cur_thread != THREAD_NULL)
4250			interruptible_state = thread_interrupt_level(THREAD_UNINT);
4251		else
4252			interruptible_state = THREAD_UNINT;
4253
4254		if(map_pmap)
4255			rc = vm_fault_wire(map,
4256					   &tmp_entry, map_pmap, pmap_addr);
4257		else
4258			rc = vm_fault_wire(map,
4259					   &tmp_entry, map->pmap,
4260					   tmp_entry.vme_start);
4261
4262		if (!user_wire && cur_thread != THREAD_NULL)
4263			thread_interrupt_level(interruptible_state);
4264
4265		vm_map_lock(map);
4266
4267		if (last_timestamp+1 != map->timestamp) {
4268			/*
4269			 * Find the entry again.  It could have been clipped
4270			 * after we unlocked the map.
4271			 */
4272			if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4273						 &first_entry))
4274				panic("vm_map_wire: re-lookup failed");
4275
4276			entry = first_entry;
4277		}
4278
4279		last_timestamp = map->timestamp;
4280
4281		while ((entry != vm_map_to_entry(map)) &&
4282		       (entry->vme_start < tmp_entry.vme_end)) {
4283			assert(entry->in_transition);
4284			entry->in_transition = FALSE;
4285			if (entry->needs_wakeup) {
4286				entry->needs_wakeup = FALSE;
4287				need_wakeup = TRUE;
4288			}
4289			if (rc != KERN_SUCCESS) {	/* from vm_*_wire */
4290				subtract_wire_counts(map, entry, user_wire);
4291			}
4292			entry = entry->vme_next;
4293		}
4294
4295		if (rc != KERN_SUCCESS) {		/* from vm_*_wire */
4296			goto done;
4297		}
4298
4299		s = entry->vme_start;
4300	} /* end while loop through map entries */
4301
4302done:
4303	if (rc == KERN_SUCCESS) {
4304		/* repair any damage we may have made to the VM map */
4305		vm_map_simplify_range(map, start, end);
4306	}
4307
4308	vm_map_unlock(map);
4309
4310	/*
4311	 * wake up anybody waiting on entries we wired.
4312	 */
4313	if (need_wakeup)
4314		vm_map_entry_wakeup(map);
4315
4316	if (rc != KERN_SUCCESS) {
4317		/* undo what has been wired so far */
4318		vm_map_unwire(map, start, s, user_wire);
4319	}
4320
4321	return rc;
4322
4323}
4324
4325kern_return_t
4326vm_map_wire(
4327	register vm_map_t	map,
4328	register vm_map_offset_t	start,
4329	register vm_map_offset_t	end,
4330	register vm_prot_t	access_type,
4331	boolean_t		user_wire)
4332{
4333
4334	kern_return_t	kret;
4335
4336	kret = vm_map_wire_nested(map, start, end, access_type,
4337				  user_wire, (pmap_t)NULL, 0);
4338	return kret;
4339}
4340
4341/*
4342 *	vm_map_unwire:
4343 *
4344 *	Sets the pageability of the specified address range in the target
4345 *	as pageable.  Regions specified must have been wired previously.
4346 *
4347 *	The map must not be locked, but a reference must remain to the map
4348 *	throughout the call.
4349 *
4350 *	Kernel will panic on failures.  User unwire ignores holes and
4351 *	unwired and intransition entries to avoid losing memory by leaving
4352 *	it unwired.
4353 */
4354static kern_return_t
4355vm_map_unwire_nested(
4356	register vm_map_t	map,
4357	register vm_map_offset_t	start,
4358	register vm_map_offset_t	end,
4359	boolean_t		user_wire,
4360	pmap_t			map_pmap,
4361	vm_map_offset_t		pmap_addr)
4362{
4363	register vm_map_entry_t	entry;
4364	struct vm_map_entry	*first_entry, tmp_entry;
4365	boolean_t		need_wakeup;
4366	boolean_t		main_map = FALSE;
4367	unsigned int		last_timestamp;
4368
4369	vm_map_lock(map);
4370	if(map_pmap == NULL)
4371		main_map = TRUE;
4372	last_timestamp = map->timestamp;
4373
4374	VM_MAP_RANGE_CHECK(map, start, end);
4375	assert(page_aligned(start));
4376	assert(page_aligned(end));
4377
4378	if (start == end) {
4379		/* We unwired what the caller asked for: zero pages */
4380		vm_map_unlock(map);
4381		return KERN_SUCCESS;
4382	}
4383
4384	if (vm_map_lookup_entry(map, start, &first_entry)) {
4385		entry = first_entry;
4386		/*
4387		 * vm_map_clip_start will be done later.
4388		 * We don't want to unnest any nested sub maps here !
4389		 */
4390	}
4391	else {
4392		if (!user_wire) {
4393			panic("vm_map_unwire: start not found");
4394		}
4395		/*	Start address is not in map. */
4396		vm_map_unlock(map);
4397		return(KERN_INVALID_ADDRESS);
4398	}
4399
4400	if (entry->superpage_size) {
4401		/* superpages are always wired */
4402		vm_map_unlock(map);
4403		return KERN_INVALID_ADDRESS;
4404	}
4405
4406	need_wakeup = FALSE;
4407	while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4408		if (entry->in_transition) {
4409			/*
4410			 * 1)
4411			 * Another thread is wiring down this entry. Note
4412			 * that if it is not for the other thread we would
4413			 * be unwiring an unwired entry.  This is not
4414			 * permitted.  If we wait, we will be unwiring memory
4415			 * we did not wire.
4416			 *
4417			 * 2)
4418			 * Another thread is unwiring this entry.  We did not
4419			 * have a reference to it, because if we did, this
4420			 * entry will not be getting unwired now.
4421			 */
4422			if (!user_wire) {
4423				/*
4424				 * XXX FBDP
4425				 * This could happen:  there could be some
4426				 * overlapping vslock/vsunlock operations
4427				 * going on.
4428				 * We should probably just wait and retry,
4429				 * but then we have to be careful that this
4430				 * entry could get "simplified" after
4431				 * "in_transition" gets unset and before
4432				 * we re-lookup the entry, so we would
4433				 * have to re-clip the entry to avoid
4434				 * re-unwiring what we have already unwired...
4435				 * See vm_map_wire_nested().
4436				 *
4437				 * Or we could just ignore "in_transition"
4438				 * here and proceed to decement the wired
4439				 * count(s) on this entry.  That should be fine
4440				 * as long as "wired_count" doesn't drop all
4441				 * the way to 0 (and we should panic if THAT
4442				 * happens).
4443				 */
4444				panic("vm_map_unwire: in_transition entry");
4445			}
4446
4447			entry = entry->vme_next;
4448			continue;
4449		}
4450
4451		if (entry->is_sub_map) {
4452			vm_map_offset_t	sub_start;
4453			vm_map_offset_t	sub_end;
4454			vm_map_offset_t	local_end;
4455			pmap_t		pmap;
4456
4457			vm_map_clip_start(map, entry, start);
4458			vm_map_clip_end(map, entry, end);
4459
4460			sub_start = entry->offset;
4461			sub_end = entry->vme_end - entry->vme_start;
4462			sub_end += entry->offset;
4463			local_end = entry->vme_end;
4464			if(map_pmap == NULL) {
4465				if(entry->use_pmap) {
4466					pmap = entry->object.sub_map->pmap;
4467					pmap_addr = sub_start;
4468				} else {
4469					pmap = map->pmap;
4470					pmap_addr = start;
4471				}
4472				if (entry->wired_count == 0 ||
4473				    (user_wire && entry->user_wired_count == 0)) {
4474					if (!user_wire)
4475						panic("vm_map_unwire: entry is unwired");
4476					entry = entry->vme_next;
4477					continue;
4478				}
4479
4480				/*
4481				 * Check for holes
4482				 * Holes: Next entry should be contiguous unless
4483				 * this is the end of the region.
4484				 */
4485				if (((entry->vme_end < end) &&
4486				     ((entry->vme_next == vm_map_to_entry(map)) ||
4487				      (entry->vme_next->vme_start
4488				       > entry->vme_end)))) {
4489					if (!user_wire)
4490						panic("vm_map_unwire: non-contiguous region");
4491/*
4492					entry = entry->vme_next;
4493					continue;
4494*/
4495				}
4496
4497				subtract_wire_counts(map, entry, user_wire);
4498
4499				if (entry->wired_count != 0) {
4500					entry = entry->vme_next;
4501					continue;
4502				}
4503
4504				entry->in_transition = TRUE;
4505				tmp_entry = *entry;/* see comment in vm_map_wire() */
4506
4507				/*
4508				 * We can unlock the map now. The in_transition state
4509				 * guarantees existance of the entry.
4510				 */
4511				vm_map_unlock(map);
4512				vm_map_unwire_nested(entry->object.sub_map,
4513						     sub_start, sub_end, user_wire, pmap, pmap_addr);
4514				vm_map_lock(map);
4515
4516				if (last_timestamp+1 != map->timestamp) {
4517					/*
4518					 * Find the entry again.  It could have been
4519					 * clipped or deleted after we unlocked the map.
4520					 */
4521					if (!vm_map_lookup_entry(map,
4522								 tmp_entry.vme_start,
4523								 &first_entry)) {
4524						if (!user_wire)
4525							panic("vm_map_unwire: re-lookup failed");
4526						entry = first_entry->vme_next;
4527					} else
4528						entry = first_entry;
4529				}
4530				last_timestamp = map->timestamp;
4531
4532				/*
4533				 * clear transition bit for all constituent entries
4534				 * that were in the original entry (saved in
4535				 * tmp_entry).  Also check for waiters.
4536				 */
4537				while ((entry != vm_map_to_entry(map)) &&
4538				       (entry->vme_start < tmp_entry.vme_end)) {
4539					assert(entry->in_transition);
4540					entry->in_transition = FALSE;
4541					if (entry->needs_wakeup) {
4542						entry->needs_wakeup = FALSE;
4543						need_wakeup = TRUE;
4544					}
4545					entry = entry->vme_next;
4546				}
4547				continue;
4548			} else {
4549				vm_map_unlock(map);
4550				vm_map_unwire_nested(entry->object.sub_map,
4551						     sub_start, sub_end, user_wire, map_pmap,
4552						     pmap_addr);
4553				vm_map_lock(map);
4554
4555				if (last_timestamp+1 != map->timestamp) {
4556					/*
4557					 * Find the entry again.  It could have been
4558					 * clipped or deleted after we unlocked the map.
4559					 */
4560					if (!vm_map_lookup_entry(map,
4561								 tmp_entry.vme_start,
4562								 &first_entry)) {
4563						if (!user_wire)
4564							panic("vm_map_unwire: re-lookup failed");
4565						entry = first_entry->vme_next;
4566					} else
4567						entry = first_entry;
4568				}
4569				last_timestamp = map->timestamp;
4570			}
4571		}
4572
4573
4574		if ((entry->wired_count == 0) ||
4575		    (user_wire && entry->user_wired_count == 0)) {
4576			if (!user_wire)
4577				panic("vm_map_unwire: entry is unwired");
4578
4579			entry = entry->vme_next;
4580			continue;
4581		}
4582
4583		assert(entry->wired_count > 0 &&
4584		       (!user_wire || entry->user_wired_count > 0));
4585
4586		vm_map_clip_start(map, entry, start);
4587		vm_map_clip_end(map, entry, end);
4588
4589		/*
4590		 * Check for holes
4591		 * Holes: Next entry should be contiguous unless
4592		 *	  this is the end of the region.
4593		 */
4594		if (((entry->vme_end < end) &&
4595		     ((entry->vme_next == vm_map_to_entry(map)) ||
4596		      (entry->vme_next->vme_start > entry->vme_end)))) {
4597
4598			if (!user_wire)
4599				panic("vm_map_unwire: non-contiguous region");
4600			entry = entry->vme_next;
4601			continue;
4602		}
4603
4604		subtract_wire_counts(map, entry, user_wire);
4605
4606		if (entry->wired_count != 0) {
4607			entry = entry->vme_next;
4608			continue;
4609		}
4610
4611		if(entry->zero_wired_pages) {
4612			entry->zero_wired_pages = FALSE;
4613		}
4614
4615		entry->in_transition = TRUE;
4616		tmp_entry = *entry;	/* see comment in vm_map_wire() */
4617
4618		/*
4619		 * We can unlock the map now. The in_transition state
4620		 * guarantees existance of the entry.
4621		 */
4622		vm_map_unlock(map);
4623		if(map_pmap) {
4624			vm_fault_unwire(map,
4625					&tmp_entry, FALSE, map_pmap, pmap_addr);
4626		} else {
4627			vm_fault_unwire(map,
4628					&tmp_entry, FALSE, map->pmap,
4629					tmp_entry.vme_start);
4630		}
4631		vm_map_lock(map);
4632
4633		if (last_timestamp+1 != map->timestamp) {
4634			/*
4635			 * Find the entry again.  It could have been clipped
4636			 * or deleted after we unlocked the map.
4637			 */
4638			if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4639						 &first_entry)) {
4640				if (!user_wire)
4641					panic("vm_map_unwire: re-lookup failed");
4642				entry = first_entry->vme_next;
4643			} else
4644				entry = first_entry;
4645		}
4646		last_timestamp = map->timestamp;
4647
4648		/*
4649		 * clear transition bit for all constituent entries that
4650		 * were in the original entry (saved in tmp_entry).  Also
4651		 * check for waiters.
4652		 */
4653		while ((entry != vm_map_to_entry(map)) &&
4654		       (entry->vme_start < tmp_entry.vme_end)) {
4655			assert(entry->in_transition);
4656			entry->in_transition = FALSE;
4657			if (entry->needs_wakeup) {
4658				entry->needs_wakeup = FALSE;
4659				need_wakeup = TRUE;
4660			}
4661			entry = entry->vme_next;
4662		}
4663	}
4664
4665	/*
4666	 * We might have fragmented the address space when we wired this
4667	 * range of addresses.  Attempt to re-coalesce these VM map entries
4668	 * with their neighbors now that they're no longer wired.
4669	 * Under some circumstances, address space fragmentation can
4670	 * prevent VM object shadow chain collapsing, which can cause
4671	 * swap space leaks.
4672	 */
4673	vm_map_simplify_range(map, start, end);
4674
4675	vm_map_unlock(map);
4676	/*
4677	 * wake up anybody waiting on entries that we have unwired.
4678	 */
4679	if (need_wakeup)
4680		vm_map_entry_wakeup(map);
4681	return(KERN_SUCCESS);
4682
4683}
4684
4685kern_return_t
4686vm_map_unwire(
4687	register vm_map_t	map,
4688	register vm_map_offset_t	start,
4689	register vm_map_offset_t	end,
4690	boolean_t		user_wire)
4691{
4692	return vm_map_unwire_nested(map, start, end,
4693				    user_wire, (pmap_t)NULL, 0);
4694}
4695
4696
4697/*
4698 *	vm_map_entry_delete:	[ internal use only ]
4699 *
4700 *	Deallocate the given entry from the target map.
4701 */
4702static void
4703vm_map_entry_delete(
4704	register vm_map_t	map,
4705	register vm_map_entry_t	entry)
4706{
4707	register vm_map_offset_t	s, e;
4708	register vm_object_t	object;
4709	register vm_map_t	submap;
4710
4711	s = entry->vme_start;
4712	e = entry->vme_end;
4713	assert(page_aligned(s));
4714	assert(page_aligned(e));
4715	assert(entry->wired_count == 0);
4716	assert(entry->user_wired_count == 0);
4717	assert(!entry->permanent);
4718
4719	if (entry->is_sub_map) {
4720		object = NULL;
4721		submap = entry->object.sub_map;
4722	} else {
4723		submap = NULL;
4724		object = entry->object.vm_object;
4725	}
4726
4727	vm_map_store_entry_unlink(map, entry);
4728	map->size -= e - s;
4729
4730	vm_map_entry_dispose(map, entry);
4731
4732	vm_map_unlock(map);
4733	/*
4734	 *	Deallocate the object only after removing all
4735	 *	pmap entries pointing to its pages.
4736	 */
4737	if (submap)
4738		vm_map_deallocate(submap);
4739	else
4740		vm_object_deallocate(object);
4741
4742}
4743
4744void
4745vm_map_submap_pmap_clean(
4746	vm_map_t	map,
4747	vm_map_offset_t	start,
4748	vm_map_offset_t	end,
4749	vm_map_t	sub_map,
4750	vm_map_offset_t	offset)
4751{
4752	vm_map_offset_t	submap_start;
4753	vm_map_offset_t	submap_end;
4754	vm_map_size_t	remove_size;
4755	vm_map_entry_t	entry;
4756
4757	submap_end = offset + (end - start);
4758	submap_start = offset;
4759
4760	vm_map_lock_read(sub_map);
4761	if(vm_map_lookup_entry(sub_map, offset, &entry)) {
4762
4763		remove_size = (entry->vme_end - entry->vme_start);
4764		if(offset > entry->vme_start)
4765			remove_size -= offset - entry->vme_start;
4766
4767
4768		if(submap_end < entry->vme_end) {
4769			remove_size -=
4770				entry->vme_end - submap_end;
4771		}
4772		if(entry->is_sub_map) {
4773			vm_map_submap_pmap_clean(
4774				sub_map,
4775				start,
4776				start + remove_size,
4777				entry->object.sub_map,
4778				entry->offset);
4779		} else {
4780
4781			if((map->mapped_in_other_pmaps) && (map->ref_count)
4782			   && (entry->object.vm_object != NULL)) {
4783				vm_object_pmap_protect(
4784					entry->object.vm_object,
4785					entry->offset+(offset-entry->vme_start),
4786					remove_size,
4787					PMAP_NULL,
4788					entry->vme_start,
4789					VM_PROT_NONE);
4790			} else {
4791				pmap_remove(map->pmap,
4792					    (addr64_t)start,
4793					    (addr64_t)(start + remove_size));
4794			}
4795		}
4796	}
4797
4798	entry = entry->vme_next;
4799
4800	while((entry != vm_map_to_entry(sub_map))
4801	      && (entry->vme_start < submap_end)) {
4802		remove_size = (entry->vme_end - entry->vme_start);
4803		if(submap_end < entry->vme_end) {
4804			remove_size -= entry->vme_end - submap_end;
4805		}
4806		if(entry->is_sub_map) {
4807			vm_map_submap_pmap_clean(
4808				sub_map,
4809				(start + entry->vme_start) - offset,
4810				((start + entry->vme_start) - offset) + remove_size,
4811				entry->object.sub_map,
4812				entry->offset);
4813		} else {
4814			if((map->mapped_in_other_pmaps) && (map->ref_count)
4815			   && (entry->object.vm_object != NULL)) {
4816				vm_object_pmap_protect(
4817					entry->object.vm_object,
4818					entry->offset,
4819					remove_size,
4820					PMAP_NULL,
4821					entry->vme_start,
4822					VM_PROT_NONE);
4823			} else {
4824				pmap_remove(map->pmap,
4825					    (addr64_t)((start + entry->vme_start)
4826						       - offset),
4827					    (addr64_t)(((start + entry->vme_start)
4828							- offset) + remove_size));
4829			}
4830		}
4831		entry = entry->vme_next;
4832	}
4833	vm_map_unlock_read(sub_map);
4834	return;
4835}
4836
4837/*
4838 *	vm_map_delete:	[ internal use only ]
4839 *
4840 *	Deallocates the given address range from the target map.
4841 *	Removes all user wirings. Unwires one kernel wiring if
4842 *	VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
4843 *	away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
4844 *	interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
4845 *
4846 *	This routine is called with map locked and leaves map locked.
4847 */
4848static kern_return_t
4849vm_map_delete(
4850	vm_map_t		map,
4851	vm_map_offset_t		start,
4852	vm_map_offset_t		end,
4853	int			flags,
4854	vm_map_t		zap_map)
4855{
4856	vm_map_entry_t		entry, next;
4857	struct	 vm_map_entry	*first_entry, tmp_entry;
4858	register vm_map_offset_t s;
4859	register vm_object_t	object;
4860	boolean_t		need_wakeup;
4861	unsigned int		last_timestamp = ~0; /* unlikely value */
4862	int			interruptible;
4863
4864	interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
4865		THREAD_ABORTSAFE : THREAD_UNINT;
4866
4867	/*
4868	 * All our DMA I/O operations in IOKit are currently done by
4869	 * wiring through the map entries of the task requesting the I/O.
4870	 * Because of this, we must always wait for kernel wirings
4871	 * to go away on the entries before deleting them.
4872	 *
4873	 * Any caller who wants to actually remove a kernel wiring
4874	 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
4875	 * properly remove one wiring instead of blasting through
4876	 * them all.
4877	 */
4878	flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
4879
4880	while(1) {
4881		/*
4882		 *	Find the start of the region, and clip it
4883		 */
4884		if (vm_map_lookup_entry(map, start, &first_entry)) {
4885			entry = first_entry;
4886			if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */				start = SUPERPAGE_ROUND_DOWN(start);
4887				start = SUPERPAGE_ROUND_DOWN(start);
4888				continue;
4889			}
4890			if (start == entry->vme_start) {
4891				/*
4892				 * No need to clip.  We don't want to cause
4893				 * any unnecessary unnesting in this case...
4894				 */
4895			} else {
4896				vm_map_clip_start(map, entry, start);
4897			}
4898
4899			/*
4900			 *	Fix the lookup hint now, rather than each
4901			 *	time through the loop.
4902			 */
4903			SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4904		} else {
4905			entry = first_entry->vme_next;
4906		}
4907		break;
4908	}
4909	if (entry->superpage_size)
4910		end = SUPERPAGE_ROUND_UP(end);
4911
4912	need_wakeup = FALSE;
4913	/*
4914	 *	Step through all entries in this region
4915	 */
4916	s = entry->vme_start;
4917	while ((entry != vm_map_to_entry(map)) && (s < end)) {
4918		/*
4919		 * At this point, we have deleted all the memory entries
4920		 * between "start" and "s".  We still need to delete
4921		 * all memory entries between "s" and "end".
4922		 * While we were blocked and the map was unlocked, some
4923		 * new memory entries could have been re-allocated between
4924		 * "start" and "s" and we don't want to mess with those.
4925		 * Some of those entries could even have been re-assembled
4926		 * with an entry after "s" (in vm_map_simplify_entry()), so
4927		 * we may have to vm_map_clip_start() again.
4928		 */
4929
4930		if (entry->vme_start >= s) {
4931			/*
4932			 * This entry starts on or after "s"
4933			 * so no need to clip its start.
4934			 */
4935		} else {
4936			/*
4937			 * This entry has been re-assembled by a
4938			 * vm_map_simplify_entry().  We need to
4939			 * re-clip its start.
4940			 */
4941			vm_map_clip_start(map, entry, s);
4942		}
4943		if (entry->vme_end <= end) {
4944			/*
4945			 * This entry is going away completely, so no need
4946			 * to clip and possibly cause an unnecessary unnesting.
4947			 */
4948		} else {
4949			vm_map_clip_end(map, entry, end);
4950		}
4951
4952		if (entry->permanent) {
4953			panic("attempt to remove permanent VM map entry "
4954			      "%p [0x%llx:0x%llx]\n",
4955			      entry, (uint64_t) s, (uint64_t) end);
4956		}
4957
4958
4959		if (entry->in_transition) {
4960			wait_result_t wait_result;
4961
4962			/*
4963			 * Another thread is wiring/unwiring this entry.
4964			 * Let the other thread know we are waiting.
4965			 */
4966			assert(s == entry->vme_start);
4967			entry->needs_wakeup = TRUE;
4968
4969			/*
4970			 * wake up anybody waiting on entries that we have
4971			 * already unwired/deleted.
4972			 */
4973			if (need_wakeup) {
4974				vm_map_entry_wakeup(map);
4975				need_wakeup = FALSE;
4976			}
4977
4978			wait_result = vm_map_entry_wait(map, interruptible);
4979
4980			if (interruptible &&
4981			    wait_result == THREAD_INTERRUPTED) {
4982				/*
4983				 * We do not clear the needs_wakeup flag,
4984				 * since we cannot tell if we were the only one.
4985				 */
4986				vm_map_unlock(map);
4987				return KERN_ABORTED;
4988			}
4989
4990			/*
4991			 * The entry could have been clipped or it
4992			 * may not exist anymore.  Look it up again.
4993			 */
4994			if (!vm_map_lookup_entry(map, s, &first_entry)) {
4995				assert((map != kernel_map) &&
4996				       (!entry->is_sub_map));
4997				/*
4998				 * User: use the next entry
4999				 */
5000				entry = first_entry->vme_next;
5001				s = entry->vme_start;
5002			} else {
5003				entry = first_entry;
5004				SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5005			}
5006			last_timestamp = map->timestamp;
5007			continue;
5008		} /* end in_transition */
5009
5010		if (entry->wired_count) {
5011			boolean_t	user_wire;
5012
5013			user_wire = entry->user_wired_count > 0;
5014
5015			/*
5016			 * 	Remove a kernel wiring if requested
5017			 */
5018			if (flags & VM_MAP_REMOVE_KUNWIRE) {
5019				entry->wired_count--;
5020			}
5021
5022			/*
5023			 *	Remove all user wirings for proper accounting
5024			 */
5025			if (entry->user_wired_count > 0) {
5026				while (entry->user_wired_count)
5027					subtract_wire_counts(map, entry, user_wire);
5028			}
5029
5030			if (entry->wired_count != 0) {
5031				assert(map != kernel_map);
5032				/*
5033				 * Cannot continue.  Typical case is when
5034				 * a user thread has physical io pending on
5035				 * on this page.  Either wait for the
5036				 * kernel wiring to go away or return an
5037				 * error.
5038				 */
5039				if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
5040					wait_result_t wait_result;
5041
5042					assert(s == entry->vme_start);
5043					entry->needs_wakeup = TRUE;
5044					wait_result = vm_map_entry_wait(map,
5045									interruptible);
5046
5047					if (interruptible &&
5048					    wait_result == THREAD_INTERRUPTED) {
5049						/*
5050						 * We do not clear the
5051						 * needs_wakeup flag, since we
5052						 * cannot tell if we were the
5053						 * only one.
5054						 */
5055						vm_map_unlock(map);
5056						return KERN_ABORTED;
5057					}
5058
5059					/*
5060					 * The entry could have been clipped or
5061					 * it may not exist anymore.  Look it
5062					 * up again.
5063					 */
5064					if (!vm_map_lookup_entry(map, s,
5065								 &first_entry)) {
5066						assert(map != kernel_map);
5067						/*
5068						 * User: use the next entry
5069						 */
5070						entry = first_entry->vme_next;
5071						s = entry->vme_start;
5072					} else {
5073						entry = first_entry;
5074						SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5075					}
5076					last_timestamp = map->timestamp;
5077					continue;
5078				}
5079				else {
5080					return KERN_FAILURE;
5081				}
5082			}
5083
5084			entry->in_transition = TRUE;
5085			/*
5086			 * copy current entry.  see comment in vm_map_wire()
5087			 */
5088			tmp_entry = *entry;
5089			assert(s == entry->vme_start);
5090
5091			/*
5092			 * We can unlock the map now. The in_transition
5093			 * state guarentees existance of the entry.
5094			 */
5095			vm_map_unlock(map);
5096
5097			if (tmp_entry.is_sub_map) {
5098				vm_map_t sub_map;
5099				vm_map_offset_t sub_start, sub_end;
5100				pmap_t pmap;
5101				vm_map_offset_t pmap_addr;
5102
5103
5104				sub_map = tmp_entry.object.sub_map;
5105				sub_start = tmp_entry.offset;
5106				sub_end = sub_start + (tmp_entry.vme_end -
5107						       tmp_entry.vme_start);
5108				if (tmp_entry.use_pmap) {
5109					pmap = sub_map->pmap;
5110					pmap_addr = tmp_entry.vme_start;
5111				} else {
5112					pmap = map->pmap;
5113					pmap_addr = tmp_entry.vme_start;
5114				}
5115				(void) vm_map_unwire_nested(sub_map,
5116							    sub_start, sub_end,
5117							    user_wire,
5118							    pmap, pmap_addr);
5119			} else {
5120
5121				vm_fault_unwire(map, &tmp_entry,
5122						tmp_entry.object.vm_object == kernel_object,
5123						map->pmap, tmp_entry.vme_start);
5124			}
5125
5126			vm_map_lock(map);
5127
5128			if (last_timestamp+1 != map->timestamp) {
5129				/*
5130				 * Find the entry again.  It could have
5131				 * been clipped after we unlocked the map.
5132				 */
5133				if (!vm_map_lookup_entry(map, s, &first_entry)){
5134					assert((map != kernel_map) &&
5135					       (!entry->is_sub_map));
5136					first_entry = first_entry->vme_next;
5137					s = first_entry->vme_start;
5138				} else {
5139					SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5140				}
5141			} else {
5142				SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5143				first_entry = entry;
5144			}
5145
5146			last_timestamp = map->timestamp;
5147
5148			entry = first_entry;
5149			while ((entry != vm_map_to_entry(map)) &&
5150			       (entry->vme_start < tmp_entry.vme_end)) {
5151				assert(entry->in_transition);
5152				entry->in_transition = FALSE;
5153				if (entry->needs_wakeup) {
5154					entry->needs_wakeup = FALSE;
5155					need_wakeup = TRUE;
5156				}
5157				entry = entry->vme_next;
5158			}
5159			/*
5160			 * We have unwired the entry(s).  Go back and
5161			 * delete them.
5162			 */
5163			entry = first_entry;
5164			continue;
5165		}
5166
5167		/* entry is unwired */
5168		assert(entry->wired_count == 0);
5169		assert(entry->user_wired_count == 0);
5170
5171		assert(s == entry->vme_start);
5172
5173		if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
5174			/*
5175			 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
5176			 * vm_map_delete(), some map entries might have been
5177			 * transferred to a "zap_map", which doesn't have a
5178			 * pmap.  The original pmap has already been flushed
5179			 * in the vm_map_delete() call targeting the original
5180			 * map, but when we get to destroying the "zap_map",
5181			 * we don't have any pmap to flush, so let's just skip
5182			 * all this.
5183			 */
5184		} else if (entry->is_sub_map) {
5185			if (entry->use_pmap) {
5186#ifndef NO_NESTED_PMAP
5187				pmap_unnest(map->pmap,
5188					    (addr64_t)entry->vme_start,
5189					    entry->vme_end - entry->vme_start);
5190#endif	/* NO_NESTED_PMAP */
5191				if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
5192					/* clean up parent map/maps */
5193					vm_map_submap_pmap_clean(
5194						map, entry->vme_start,
5195						entry->vme_end,
5196						entry->object.sub_map,
5197						entry->offset);
5198				}
5199			} else {
5200				vm_map_submap_pmap_clean(
5201					map, entry->vme_start, entry->vme_end,
5202					entry->object.sub_map,
5203					entry->offset);
5204			}
5205		} else if (entry->object.vm_object != kernel_object) {
5206			object = entry->object.vm_object;
5207			if((map->mapped_in_other_pmaps) && (map->ref_count)) {
5208				vm_object_pmap_protect(
5209					object, entry->offset,
5210					entry->vme_end - entry->vme_start,
5211					PMAP_NULL,
5212					entry->vme_start,
5213					VM_PROT_NONE);
5214			} else {
5215				pmap_remove(map->pmap,
5216					    (addr64_t)entry->vme_start,
5217					    (addr64_t)entry->vme_end);
5218			}
5219		}
5220
5221		/*
5222		 * All pmap mappings for this map entry must have been
5223		 * cleared by now.
5224		 */
5225		assert(vm_map_pmap_is_empty(map,
5226					    entry->vme_start,
5227					    entry->vme_end));
5228
5229		next = entry->vme_next;
5230		s = next->vme_start;
5231		last_timestamp = map->timestamp;
5232
5233		if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
5234		    zap_map != VM_MAP_NULL) {
5235			vm_map_size_t entry_size;
5236			/*
5237			 * The caller wants to save the affected VM map entries
5238			 * into the "zap_map".  The caller will take care of
5239			 * these entries.
5240			 */
5241			/* unlink the entry from "map" ... */
5242			vm_map_store_entry_unlink(map, entry);
5243			/* ... and add it to the end of the "zap_map" */
5244			vm_map_store_entry_link(zap_map,
5245					  vm_map_last_entry(zap_map),
5246					  entry);
5247			entry_size = entry->vme_end - entry->vme_start;
5248			map->size -= entry_size;
5249			zap_map->size += entry_size;
5250			/* we didn't unlock the map, so no timestamp increase */
5251			last_timestamp--;
5252		} else {
5253			vm_map_entry_delete(map, entry);
5254			/* vm_map_entry_delete unlocks the map */
5255			vm_map_lock(map);
5256		}
5257
5258		entry = next;
5259
5260		if(entry == vm_map_to_entry(map)) {
5261			break;
5262		}
5263		if (last_timestamp+1 != map->timestamp) {
5264			/*
5265			 * we are responsible for deleting everything
5266			 * from the give space, if someone has interfered
5267			 * we pick up where we left off, back fills should
5268			 * be all right for anyone except map_delete and
5269			 * we have to assume that the task has been fully
5270			 * disabled before we get here
5271			 */
5272        		if (!vm_map_lookup_entry(map, s, &entry)){
5273	               		entry = entry->vme_next;
5274				s = entry->vme_start;
5275        		} else {
5276				SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5277       		 	}
5278			/*
5279			 * others can not only allocate behind us, we can
5280			 * also see coalesce while we don't have the map lock
5281			 */
5282			if(entry == vm_map_to_entry(map)) {
5283				break;
5284			}
5285		}
5286		last_timestamp = map->timestamp;
5287	}
5288
5289	if (map->wait_for_space)
5290		thread_wakeup((event_t) map);
5291	/*
5292	 * wake up anybody waiting on entries that we have already deleted.
5293	 */
5294	if (need_wakeup)
5295		vm_map_entry_wakeup(map);
5296
5297	return KERN_SUCCESS;
5298}
5299
5300/*
5301 *	vm_map_remove:
5302 *
5303 *	Remove the given address range from the target map.
5304 *	This is the exported form of vm_map_delete.
5305 */
5306kern_return_t
5307vm_map_remove(
5308	register vm_map_t	map,
5309	register vm_map_offset_t	start,
5310	register vm_map_offset_t	end,
5311	register boolean_t	flags)
5312{
5313	register kern_return_t	result;
5314
5315	vm_map_lock(map);
5316	VM_MAP_RANGE_CHECK(map, start, end);
5317	result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
5318	vm_map_unlock(map);
5319
5320	return(result);
5321}
5322
5323
5324/*
5325 *	Routine:	vm_map_copy_discard
5326 *
5327 *	Description:
5328 *		Dispose of a map copy object (returned by
5329 *		vm_map_copyin).
5330 */
5331void
5332vm_map_copy_discard(
5333	vm_map_copy_t	copy)
5334{
5335	if (copy == VM_MAP_COPY_NULL)
5336		return;
5337
5338	switch (copy->type) {
5339	case VM_MAP_COPY_ENTRY_LIST:
5340		while (vm_map_copy_first_entry(copy) !=
5341		       vm_map_copy_to_entry(copy)) {
5342			vm_map_entry_t	entry = vm_map_copy_first_entry(copy);
5343
5344			vm_map_copy_entry_unlink(copy, entry);
5345			vm_object_deallocate(entry->object.vm_object);
5346			vm_map_copy_entry_dispose(copy, entry);
5347		}
5348		break;
5349        case VM_MAP_COPY_OBJECT:
5350		vm_object_deallocate(copy->cpy_object);
5351		break;
5352	case VM_MAP_COPY_KERNEL_BUFFER:
5353
5354		/*
5355		 * The vm_map_copy_t and possibly the data buffer were
5356		 * allocated by a single call to kalloc(), i.e. the
5357		 * vm_map_copy_t was not allocated out of the zone.
5358		 */
5359		kfree(copy, copy->cpy_kalloc_size);
5360		return;
5361	}
5362	zfree(vm_map_copy_zone, copy);
5363}
5364
5365/*
5366 *	Routine:	vm_map_copy_copy
5367 *
5368 *	Description:
5369 *			Move the information in a map copy object to
5370 *			a new map copy object, leaving the old one
5371 *			empty.
5372 *
5373 *			This is used by kernel routines that need
5374 *			to look at out-of-line data (in copyin form)
5375 *			before deciding whether to return SUCCESS.
5376 *			If the routine returns FAILURE, the original
5377 *			copy object will be deallocated; therefore,
5378 *			these routines must make a copy of the copy
5379 *			object and leave the original empty so that
5380 *			deallocation will not fail.
5381 */
5382vm_map_copy_t
5383vm_map_copy_copy(
5384	vm_map_copy_t	copy)
5385{
5386	vm_map_copy_t	new_copy;
5387
5388	if (copy == VM_MAP_COPY_NULL)
5389		return VM_MAP_COPY_NULL;
5390
5391	/*
5392	 * Allocate a new copy object, and copy the information
5393	 * from the old one into it.
5394	 */
5395
5396	new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5397	*new_copy = *copy;
5398
5399	if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
5400		/*
5401		 * The links in the entry chain must be
5402		 * changed to point to the new copy object.
5403		 */
5404		vm_map_copy_first_entry(copy)->vme_prev
5405			= vm_map_copy_to_entry(new_copy);
5406		vm_map_copy_last_entry(copy)->vme_next
5407			= vm_map_copy_to_entry(new_copy);
5408	}
5409
5410	/*
5411	 * Change the old copy object into one that contains
5412	 * nothing to be deallocated.
5413	 */
5414	copy->type = VM_MAP_COPY_OBJECT;
5415	copy->cpy_object = VM_OBJECT_NULL;
5416
5417	/*
5418	 * Return the new object.
5419	 */
5420	return new_copy;
5421}
5422
5423static kern_return_t
5424vm_map_overwrite_submap_recurse(
5425	vm_map_t	dst_map,
5426	vm_map_offset_t	dst_addr,
5427	vm_map_size_t	dst_size)
5428{
5429	vm_map_offset_t	dst_end;
5430	vm_map_entry_t	tmp_entry;
5431	vm_map_entry_t	entry;
5432	kern_return_t	result;
5433	boolean_t	encountered_sub_map = FALSE;
5434
5435
5436
5437	/*
5438	 *	Verify that the destination is all writeable
5439	 *	initially.  We have to trunc the destination
5440	 *	address and round the copy size or we'll end up
5441	 *	splitting entries in strange ways.
5442	 */
5443
5444	dst_end = vm_map_round_page(dst_addr + dst_size);
5445	vm_map_lock(dst_map);
5446
5447start_pass_1:
5448	if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5449		vm_map_unlock(dst_map);
5450		return(KERN_INVALID_ADDRESS);
5451	}
5452
5453	vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5454	assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */
5455
5456	for (entry = tmp_entry;;) {
5457		vm_map_entry_t	next;
5458
5459		next = entry->vme_next;
5460		while(entry->is_sub_map) {
5461			vm_map_offset_t	sub_start;
5462			vm_map_offset_t	sub_end;
5463			vm_map_offset_t	local_end;
5464
5465			if (entry->in_transition) {
5466				/*
5467				 * Say that we are waiting, and wait for entry.
5468				 */
5469                        	entry->needs_wakeup = TRUE;
5470                        	vm_map_entry_wait(dst_map, THREAD_UNINT);
5471
5472				goto start_pass_1;
5473			}
5474
5475			encountered_sub_map = TRUE;
5476			sub_start = entry->offset;
5477
5478			if(entry->vme_end < dst_end)
5479				sub_end = entry->vme_end;
5480			else
5481				sub_end = dst_end;
5482			sub_end -= entry->vme_start;
5483			sub_end += entry->offset;
5484			local_end = entry->vme_end;
5485			vm_map_unlock(dst_map);
5486
5487			result = vm_map_overwrite_submap_recurse(
5488				entry->object.sub_map,
5489				sub_start,
5490				sub_end - sub_start);
5491
5492			if(result != KERN_SUCCESS)
5493				return result;
5494			if (dst_end <= entry->vme_end)
5495				return KERN_SUCCESS;
5496			vm_map_lock(dst_map);
5497			if(!vm_map_lookup_entry(dst_map, local_end,
5498						&tmp_entry)) {
5499				vm_map_unlock(dst_map);
5500				return(KERN_INVALID_ADDRESS);
5501			}
5502			entry = tmp_entry;
5503			next = entry->vme_next;
5504		}
5505
5506		if ( ! (entry->protection & VM_PROT_WRITE)) {
5507			vm_map_unlock(dst_map);
5508			return(KERN_PROTECTION_FAILURE);
5509		}
5510
5511		/*
5512		 *	If the entry is in transition, we must wait
5513		 *	for it to exit that state.  Anything could happen
5514		 *	when we unlock the map, so start over.
5515		 */
5516                if (entry->in_transition) {
5517
5518                        /*
5519                         * Say that we are waiting, and wait for entry.
5520                         */
5521                        entry->needs_wakeup = TRUE;
5522                        vm_map_entry_wait(dst_map, THREAD_UNINT);
5523
5524			goto start_pass_1;
5525		}
5526
5527/*
5528 *		our range is contained completely within this map entry
5529 */
5530		if (dst_end <= entry->vme_end) {
5531			vm_map_unlock(dst_map);
5532			return KERN_SUCCESS;
5533		}
5534/*
5535 *		check that range specified is contiguous region
5536 */
5537		if ((next == vm_map_to_entry(dst_map)) ||
5538		    (next->vme_start != entry->vme_end)) {
5539			vm_map_unlock(dst_map);
5540			return(KERN_INVALID_ADDRESS);
5541		}
5542
5543		/*
5544		 *	Check for permanent objects in the destination.
5545		 */
5546		if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5547		    ((!entry->object.vm_object->internal) ||
5548		     (entry->object.vm_object->true_share))) {
5549			if(encountered_sub_map) {
5550				vm_map_unlock(dst_map);
5551				return(KERN_FAILURE);
5552			}
5553		}
5554
5555
5556		entry = next;
5557	}/* for */
5558	vm_map_unlock(dst_map);
5559	return(KERN_SUCCESS);
5560}
5561
5562/*
5563 *	Routine:	vm_map_copy_overwrite
5564 *
5565 *	Description:
5566 *		Copy the memory described by the map copy
5567 *		object (copy; returned by vm_map_copyin) onto
5568 *		the specified destination region (dst_map, dst_addr).
5569 *		The destination must be writeable.
5570 *
5571 *		Unlike vm_map_copyout, this routine actually
5572 *		writes over previously-mapped memory.  If the
5573 *		previous mapping was to a permanent (user-supplied)
5574 *		memory object, it is preserved.
5575 *
5576 *		The attributes (protection and inheritance) of the
5577 *		destination region are preserved.
5578 *
5579 *		If successful, consumes the copy object.
5580 *		Otherwise, the caller is responsible for it.
5581 *
5582 *	Implementation notes:
5583 *		To overwrite aligned temporary virtual memory, it is
5584 *		sufficient to remove the previous mapping and insert
5585 *		the new copy.  This replacement is done either on
5586 *		the whole region (if no permanent virtual memory
5587 *		objects are embedded in the destination region) or
5588 *		in individual map entries.
5589 *
5590 *		To overwrite permanent virtual memory , it is necessary
5591 *		to copy each page, as the external memory management
5592 *		interface currently does not provide any optimizations.
5593 *
5594 *		Unaligned memory also has to be copied.  It is possible
5595 *		to use 'vm_trickery' to copy the aligned data.  This is
5596 *		not done but not hard to implement.
5597 *
5598 *		Once a page of permanent memory has been overwritten,
5599 *		it is impossible to interrupt this function; otherwise,
5600 *		the call would be neither atomic nor location-independent.
5601 *		The kernel-state portion of a user thread must be
5602 *		interruptible.
5603 *
5604 *		It may be expensive to forward all requests that might
5605 *		overwrite permanent memory (vm_write, vm_copy) to
5606 *		uninterruptible kernel threads.  This routine may be
5607 *		called by interruptible threads; however, success is
5608 *		not guaranteed -- if the request cannot be performed
5609 *		atomically and interruptibly, an error indication is
5610 *		returned.
5611 */
5612
5613static kern_return_t
5614vm_map_copy_overwrite_nested(
5615	vm_map_t		dst_map,
5616	vm_map_address_t	dst_addr,
5617	vm_map_copy_t		copy,
5618	boolean_t		interruptible,
5619	pmap_t			pmap,
5620	boolean_t		discard_on_success)
5621{
5622	vm_map_offset_t		dst_end;
5623	vm_map_entry_t		tmp_entry;
5624	vm_map_entry_t		entry;
5625	kern_return_t		kr;
5626	boolean_t		aligned = TRUE;
5627	boolean_t		contains_permanent_objects = FALSE;
5628	boolean_t		encountered_sub_map = FALSE;
5629	vm_map_offset_t		base_addr;
5630	vm_map_size_t		copy_size;
5631	vm_map_size_t		total_size;
5632
5633
5634	/*
5635	 *	Check for null copy object.
5636	 */
5637
5638	if (copy == VM_MAP_COPY_NULL)
5639		return(KERN_SUCCESS);
5640
5641	/*
5642	 *	Check for special kernel buffer allocated
5643	 *	by new_ipc_kmsg_copyin.
5644	 */
5645
5646	if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
5647		return(vm_map_copyout_kernel_buffer(
5648			       dst_map, &dst_addr,
5649			       copy, TRUE));
5650	}
5651
5652	/*
5653	 *      Only works for entry lists at the moment.  Will
5654	 *	support page lists later.
5655	 */
5656
5657	assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
5658
5659	if (copy->size == 0) {
5660		if (discard_on_success)
5661			vm_map_copy_discard(copy);
5662		return(KERN_SUCCESS);
5663	}
5664
5665	/*
5666	 *	Verify that the destination is all writeable
5667	 *	initially.  We have to trunc the destination
5668	 *	address and round the copy size or we'll end up
5669	 *	splitting entries in strange ways.
5670	 */
5671
5672	if (!page_aligned(copy->size) ||
5673	    !page_aligned (copy->offset) ||
5674	    !page_aligned (dst_addr))
5675	{
5676		aligned = FALSE;
5677		dst_end = vm_map_round_page(dst_addr + copy->size);
5678	} else {
5679		dst_end = dst_addr + copy->size;
5680	}
5681
5682	vm_map_lock(dst_map);
5683
5684	/* LP64todo - remove this check when vm_map_commpage64()
5685	 * no longer has to stuff in a map_entry for the commpage
5686	 * above the map's max_offset.
5687	 */
5688	if (dst_addr >= dst_map->max_offset) {
5689		vm_map_unlock(dst_map);
5690		return(KERN_INVALID_ADDRESS);
5691	}
5692
5693start_pass_1:
5694	if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5695		vm_map_unlock(dst_map);
5696		return(KERN_INVALID_ADDRESS);
5697	}
5698	vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5699	for (entry = tmp_entry;;) {
5700		vm_map_entry_t	next = entry->vme_next;
5701
5702		while(entry->is_sub_map) {
5703			vm_map_offset_t	sub_start;
5704			vm_map_offset_t	sub_end;
5705			vm_map_offset_t	local_end;
5706
5707                	if (entry->in_transition) {
5708
5709				/*
5710				 * Say that we are waiting, and wait for entry.
5711				 */
5712                        	entry->needs_wakeup = TRUE;
5713                        	vm_map_entry_wait(dst_map, THREAD_UNINT);
5714
5715				goto start_pass_1;
5716			}
5717
5718			local_end = entry->vme_end;
5719		        if (!(entry->needs_copy)) {
5720				/* if needs_copy we are a COW submap */
5721				/* in such a case we just replace so */
5722				/* there is no need for the follow-  */
5723				/* ing check.                        */
5724				encountered_sub_map = TRUE;
5725				sub_start = entry->offset;
5726
5727				if(entry->vme_end < dst_end)
5728					sub_end = entry->vme_end;
5729				else
5730					sub_end = dst_end;
5731				sub_end -= entry->vme_start;
5732				sub_end += entry->offset;
5733				vm_map_unlock(dst_map);
5734
5735				kr = vm_map_overwrite_submap_recurse(
5736					entry->object.sub_map,
5737					sub_start,
5738					sub_end - sub_start);
5739				if(kr != KERN_SUCCESS)
5740					return kr;
5741				vm_map_lock(dst_map);
5742			}
5743
5744			if (dst_end <= entry->vme_end)
5745				goto start_overwrite;
5746			if(!vm_map_lookup_entry(dst_map, local_end,
5747						&entry)) {
5748				vm_map_unlock(dst_map);
5749				return(KERN_INVALID_ADDRESS);
5750			}
5751			next = entry->vme_next;
5752		}
5753
5754		if ( ! (entry->protection & VM_PROT_WRITE)) {
5755			vm_map_unlock(dst_map);
5756			return(KERN_PROTECTION_FAILURE);
5757		}
5758
5759		/*
5760		 *	If the entry is in transition, we must wait
5761		 *	for it to exit that state.  Anything could happen
5762		 *	when we unlock the map, so start over.
5763		 */
5764                if (entry->in_transition) {
5765
5766                        /*
5767                         * Say that we are waiting, and wait for entry.
5768                         */
5769                        entry->needs_wakeup = TRUE;
5770                        vm_map_entry_wait(dst_map, THREAD_UNINT);
5771
5772			goto start_pass_1;
5773		}
5774
5775/*
5776 *		our range is contained completely within this map entry
5777 */
5778		if (dst_end <= entry->vme_end)
5779			break;
5780/*
5781 *		check that range specified is contiguous region
5782 */
5783		if ((next == vm_map_to_entry(dst_map)) ||
5784		    (next->vme_start != entry->vme_end)) {
5785			vm_map_unlock(dst_map);
5786			return(KERN_INVALID_ADDRESS);
5787		}
5788
5789
5790		/*
5791		 *	Check for permanent objects in the destination.
5792		 */
5793		if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5794		    ((!entry->object.vm_object->internal) ||
5795		     (entry->object.vm_object->true_share))) {
5796			contains_permanent_objects = TRUE;
5797		}
5798
5799		entry = next;
5800	}/* for */
5801
5802start_overwrite:
5803	/*
5804	 *	If there are permanent objects in the destination, then
5805	 *	the copy cannot be interrupted.
5806	 */
5807
5808	if (interruptible && contains_permanent_objects) {
5809		vm_map_unlock(dst_map);
5810		return(KERN_FAILURE);	/* XXX */
5811	}
5812
5813	/*
5814 	 *
5815	 *	Make a second pass, overwriting the data
5816	 *	At the beginning of each loop iteration,
5817	 *	the next entry to be overwritten is "tmp_entry"
5818	 *	(initially, the value returned from the lookup above),
5819	 *	and the starting address expected in that entry
5820	 *	is "start".
5821	 */
5822
5823	total_size = copy->size;
5824	if(encountered_sub_map) {
5825		copy_size = 0;
5826		/* re-calculate tmp_entry since we've had the map */
5827		/* unlocked */
5828		if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
5829			vm_map_unlock(dst_map);
5830			return(KERN_INVALID_ADDRESS);
5831		}
5832	} else {
5833		copy_size = copy->size;
5834	}
5835
5836	base_addr = dst_addr;
5837	while(TRUE) {
5838		/* deconstruct the copy object and do in parts */
5839		/* only in sub_map, interruptable case */
5840		vm_map_entry_t	copy_entry;
5841		vm_map_entry_t	previous_prev = VM_MAP_ENTRY_NULL;
5842		vm_map_entry_t	next_copy = VM_MAP_ENTRY_NULL;
5843		int		nentries;
5844		int		remaining_entries = 0;
5845		vm_map_offset_t	new_offset = 0;
5846
5847		for (entry = tmp_entry; copy_size == 0;) {
5848			vm_map_entry_t	next;
5849
5850			next = entry->vme_next;
5851
5852			/* tmp_entry and base address are moved along */
5853			/* each time we encounter a sub-map.  Otherwise */
5854			/* entry can outpase tmp_entry, and the copy_size */
5855			/* may reflect the distance between them */
5856			/* if the current entry is found to be in transition */
5857			/* we will start over at the beginning or the last */
5858			/* encounter of a submap as dictated by base_addr */
5859			/* we will zero copy_size accordingly. */
5860			if (entry->in_transition) {
5861                       		/*
5862                       		 * Say that we are waiting, and wait for entry.
5863                       		 */
5864                       		entry->needs_wakeup = TRUE;
5865                       		vm_map_entry_wait(dst_map, THREAD_UNINT);
5866
5867				if(!vm_map_lookup_entry(dst_map, base_addr,
5868							&tmp_entry)) {
5869					vm_map_unlock(dst_map);
5870					return(KERN_INVALID_ADDRESS);
5871				}
5872				copy_size = 0;
5873				entry = tmp_entry;
5874				continue;
5875			}
5876			if(entry->is_sub_map) {
5877				vm_map_offset_t	sub_start;
5878				vm_map_offset_t	sub_end;
5879				vm_map_offset_t	local_end;
5880
5881		        	if (entry->needs_copy) {
5882					/* if this is a COW submap */
5883					/* just back the range with a */
5884					/* anonymous entry */
5885					if(entry->vme_end < dst_end)
5886						sub_end = entry->vme_end;
5887					else
5888						sub_end = dst_end;
5889					if(entry->vme_start < base_addr)
5890						sub_start = base_addr;
5891					else
5892						sub_start = entry->vme_start;
5893					vm_map_clip_end(
5894						dst_map, entry, sub_end);
5895					vm_map_clip_start(
5896						dst_map, entry, sub_start);
5897					assert(!entry->use_pmap);
5898					entry->is_sub_map = FALSE;
5899					vm_map_deallocate(
5900						entry->object.sub_map);
5901					entry->object.sub_map = NULL;
5902					entry->is_shared = FALSE;
5903					entry->needs_copy = FALSE;
5904					entry->offset = 0;
5905					/*
5906					 * XXX FBDP
5907					 * We should propagate the protections
5908					 * of the submap entry here instead
5909					 * of forcing them to VM_PROT_ALL...
5910					 * Or better yet, we should inherit
5911					 * the protection of the copy_entry.
5912					 */
5913					entry->protection = VM_PROT_ALL;
5914					entry->max_protection = VM_PROT_ALL;
5915					entry->wired_count = 0;
5916					entry->user_wired_count = 0;
5917					if(entry->inheritance
5918					   == VM_INHERIT_SHARE)
5919						entry->inheritance = VM_INHERIT_COPY;
5920					continue;
5921				}
5922				/* first take care of any non-sub_map */
5923				/* entries to send */
5924				if(base_addr < entry->vme_start) {
5925					/* stuff to send */
5926					copy_size =
5927						entry->vme_start - base_addr;
5928					break;
5929				}
5930				sub_start = entry->offset;
5931
5932				if(entry->vme_end < dst_end)
5933					sub_end = entry->vme_end;
5934				else
5935					sub_end = dst_end;
5936				sub_end -= entry->vme_start;
5937				sub_end += entry->offset;
5938				local_end = entry->vme_end;
5939				vm_map_unlock(dst_map);
5940				copy_size = sub_end - sub_start;
5941
5942				/* adjust the copy object */
5943				if (total_size > copy_size) {
5944					vm_map_size_t	local_size = 0;
5945					vm_map_size_t	entry_size;
5946
5947					nentries = 1;
5948					new_offset = copy->offset;
5949					copy_entry = vm_map_copy_first_entry(copy);
5950					while(copy_entry !=
5951					      vm_map_copy_to_entry(copy)){
5952						entry_size = copy_entry->vme_end -
5953							copy_entry->vme_start;
5954						if((local_size < copy_size) &&
5955						   ((local_size + entry_size)
5956						    >= copy_size)) {
5957							vm_map_copy_clip_end(copy,
5958									     copy_entry,
5959									     copy_entry->vme_start +
5960									     (copy_size - local_size));
5961							entry_size = copy_entry->vme_end -
5962								copy_entry->vme_start;
5963							local_size += entry_size;
5964							new_offset += entry_size;
5965						}
5966						if(local_size >= copy_size) {
5967							next_copy = copy_entry->vme_next;
5968							copy_entry->vme_next =
5969								vm_map_copy_to_entry(copy);
5970							previous_prev =
5971								copy->cpy_hdr.links.prev;
5972							copy->cpy_hdr.links.prev = copy_entry;
5973							copy->size = copy_size;
5974							remaining_entries =
5975								copy->cpy_hdr.nentries;
5976							remaining_entries -= nentries;
5977							copy->cpy_hdr.nentries = nentries;
5978							break;
5979						} else {
5980							local_size += entry_size;
5981							new_offset += entry_size;
5982							nentries++;
5983						}
5984						copy_entry = copy_entry->vme_next;
5985					}
5986				}
5987
5988				if((entry->use_pmap) && (pmap == NULL)) {
5989					kr = vm_map_copy_overwrite_nested(
5990						entry->object.sub_map,
5991						sub_start,
5992						copy,
5993						interruptible,
5994						entry->object.sub_map->pmap,
5995						TRUE);
5996				} else if (pmap != NULL) {
5997					kr = vm_map_copy_overwrite_nested(
5998						entry->object.sub_map,
5999						sub_start,
6000						copy,
6001						interruptible, pmap,
6002						TRUE);
6003				} else {
6004					kr = vm_map_copy_overwrite_nested(
6005						entry->object.sub_map,
6006						sub_start,
6007						copy,
6008						interruptible,
6009						dst_map->pmap,
6010						TRUE);
6011				}
6012				if(kr != KERN_SUCCESS) {
6013					if(next_copy != NULL) {
6014						copy->cpy_hdr.nentries +=
6015							remaining_entries;
6016						copy->cpy_hdr.links.prev->vme_next =
6017							next_copy;
6018						copy->cpy_hdr.links.prev
6019							= previous_prev;
6020						copy->size = total_size;
6021					}
6022					return kr;
6023				}
6024				if (dst_end <= local_end) {
6025					return(KERN_SUCCESS);
6026				}
6027				/* otherwise copy no longer exists, it was */
6028				/* destroyed after successful copy_overwrite */
6029			        copy = (vm_map_copy_t)
6030					zalloc(vm_map_copy_zone);
6031				vm_map_copy_first_entry(copy) =
6032					vm_map_copy_last_entry(copy) =
6033					vm_map_copy_to_entry(copy);
6034				copy->type = VM_MAP_COPY_ENTRY_LIST;
6035				copy->offset = new_offset;
6036
6037				/*
6038				 * XXX FBDP
6039				 * this does not seem to deal with
6040				 * the VM map store (R&B tree)
6041				 */
6042
6043				total_size -= copy_size;
6044				copy_size = 0;
6045				/* put back remainder of copy in container */
6046				if(next_copy != NULL) {
6047					copy->cpy_hdr.nentries = remaining_entries;
6048					copy->cpy_hdr.links.next = next_copy;
6049					copy->cpy_hdr.links.prev = previous_prev;
6050					copy->size = total_size;
6051					next_copy->vme_prev =
6052						vm_map_copy_to_entry(copy);
6053					next_copy = NULL;
6054				}
6055				base_addr = local_end;
6056				vm_map_lock(dst_map);
6057				if(!vm_map_lookup_entry(dst_map,
6058							local_end, &tmp_entry)) {
6059					vm_map_unlock(dst_map);
6060					return(KERN_INVALID_ADDRESS);
6061				}
6062				entry = tmp_entry;
6063				continue;
6064			}
6065			if (dst_end <= entry->vme_end) {
6066				copy_size = dst_end - base_addr;
6067				break;
6068			}
6069
6070			if ((next == vm_map_to_entry(dst_map)) ||
6071			    (next->vme_start != entry->vme_end)) {
6072				vm_map_unlock(dst_map);
6073				return(KERN_INVALID_ADDRESS);
6074			}
6075
6076			entry = next;
6077		}/* for */
6078
6079		next_copy = NULL;
6080		nentries = 1;
6081
6082		/* adjust the copy object */
6083		if (total_size > copy_size) {
6084			vm_map_size_t	local_size = 0;
6085			vm_map_size_t	entry_size;
6086
6087			new_offset = copy->offset;
6088			copy_entry = vm_map_copy_first_entry(copy);
6089			while(copy_entry != vm_map_copy_to_entry(copy)) {
6090				entry_size = copy_entry->vme_end -
6091					copy_entry->vme_start;
6092				if((local_size < copy_size) &&
6093				   ((local_size + entry_size)
6094				    >= copy_size)) {
6095					vm_map_copy_clip_end(copy, copy_entry,
6096							     copy_entry->vme_start +
6097							     (copy_size - local_size));
6098					entry_size = copy_entry->vme_end -
6099						copy_entry->vme_start;
6100					local_size += entry_size;
6101					new_offset += entry_size;
6102				}
6103				if(local_size >= copy_size) {
6104					next_copy = copy_entry->vme_next;
6105					copy_entry->vme_next =
6106						vm_map_copy_to_entry(copy);
6107					previous_prev =
6108						copy->cpy_hdr.links.prev;
6109					copy->cpy_hdr.links.prev = copy_entry;
6110					copy->size = copy_size;
6111					remaining_entries =
6112						copy->cpy_hdr.nentries;
6113					remaining_entries -= nentries;
6114					copy->cpy_hdr.nentries = nentries;
6115					break;
6116				} else {
6117					local_size += entry_size;
6118					new_offset += entry_size;
6119					nentries++;
6120				}
6121				copy_entry = copy_entry->vme_next;
6122			}
6123		}
6124
6125		if (aligned) {
6126			pmap_t	local_pmap;
6127
6128			if(pmap)
6129				local_pmap = pmap;
6130			else
6131				local_pmap = dst_map->pmap;
6132
6133			if ((kr =  vm_map_copy_overwrite_aligned(
6134				     dst_map, tmp_entry, copy,
6135				     base_addr, local_pmap)) != KERN_SUCCESS) {
6136				if(next_copy != NULL) {
6137					copy->cpy_hdr.nentries +=
6138						remaining_entries;
6139				        copy->cpy_hdr.links.prev->vme_next =
6140						next_copy;
6141			       		copy->cpy_hdr.links.prev =
6142						previous_prev;
6143					copy->size += copy_size;
6144				}
6145				return kr;
6146			}
6147			vm_map_unlock(dst_map);
6148		} else {
6149			/*
6150			 * Performance gain:
6151			 *
6152			 * if the copy and dst address are misaligned but the same
6153			 * offset within the page we can copy_not_aligned the
6154			 * misaligned parts and copy aligned the rest.  If they are
6155			 * aligned but len is unaligned we simply need to copy
6156			 * the end bit unaligned.  We'll need to split the misaligned
6157			 * bits of the region in this case !
6158			 */
6159			/* ALWAYS UNLOCKS THE dst_map MAP */
6160			if ((kr =  vm_map_copy_overwrite_unaligned( dst_map,
6161								    tmp_entry, copy, base_addr)) != KERN_SUCCESS) {
6162				if(next_copy != NULL) {
6163					copy->cpy_hdr.nentries +=
6164						remaining_entries;
6165			       		copy->cpy_hdr.links.prev->vme_next =
6166						next_copy;
6167			       		copy->cpy_hdr.links.prev =
6168						previous_prev;
6169					copy->size += copy_size;
6170				}
6171				return kr;
6172			}
6173		}
6174		total_size -= copy_size;
6175		if(total_size == 0)
6176			break;
6177		base_addr += copy_size;
6178		copy_size = 0;
6179		copy->offset = new_offset;
6180		if(next_copy != NULL) {
6181			copy->cpy_hdr.nentries = remaining_entries;
6182			copy->cpy_hdr.links.next = next_copy;
6183			copy->cpy_hdr.links.prev = previous_prev;
6184			next_copy->vme_prev = vm_map_copy_to_entry(copy);
6185			copy->size = total_size;
6186		}
6187		vm_map_lock(dst_map);
6188		while(TRUE) {
6189			if (!vm_map_lookup_entry(dst_map,
6190						 base_addr, &tmp_entry)) {
6191				vm_map_unlock(dst_map);
6192				return(KERN_INVALID_ADDRESS);
6193			}
6194                	if (tmp_entry->in_transition) {
6195                       		entry->needs_wakeup = TRUE;
6196                       		vm_map_entry_wait(dst_map, THREAD_UNINT);
6197			} else {
6198				break;
6199			}
6200		}
6201		vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr));
6202
6203		entry = tmp_entry;
6204	} /* while */
6205
6206	/*
6207	 *	Throw away the vm_map_copy object
6208	 */
6209	if (discard_on_success)
6210		vm_map_copy_discard(copy);
6211
6212	return(KERN_SUCCESS);
6213}/* vm_map_copy_overwrite */
6214
6215kern_return_t
6216vm_map_copy_overwrite(
6217	vm_map_t	dst_map,
6218	vm_map_offset_t	dst_addr,
6219	vm_map_copy_t	copy,
6220	boolean_t	interruptible)
6221{
6222	vm_map_size_t	head_size, tail_size;
6223	vm_map_copy_t	head_copy, tail_copy;
6224	vm_map_offset_t	head_addr, tail_addr;
6225	vm_map_entry_t	entry;
6226	kern_return_t	kr;
6227
6228	head_size = 0;
6229	tail_size = 0;
6230	head_copy = NULL;
6231	tail_copy = NULL;
6232	head_addr = 0;
6233	tail_addr = 0;
6234
6235	if (interruptible ||
6236	    copy == VM_MAP_COPY_NULL ||
6237	    copy->type != VM_MAP_COPY_ENTRY_LIST) {
6238		/*
6239		 * We can't split the "copy" map if we're interruptible
6240		 * or if we don't have a "copy" map...
6241		 */
6242	blunt_copy:
6243		return vm_map_copy_overwrite_nested(dst_map,
6244						    dst_addr,
6245						    copy,
6246						    interruptible,
6247						    (pmap_t) NULL,
6248						    TRUE);
6249	}
6250
6251	if (copy->size < 3 * PAGE_SIZE) {
6252		/*
6253		 * Too small to bother with optimizing...
6254		 */
6255		goto blunt_copy;
6256	}
6257
6258	if ((dst_addr & PAGE_MASK) != (copy->offset & PAGE_MASK)) {
6259		/*
6260		 * Incompatible mis-alignment of source and destination...
6261		 */
6262		goto blunt_copy;
6263	}
6264
6265	/*
6266	 * Proper alignment or identical mis-alignment at the beginning.
6267	 * Let's try and do a small unaligned copy first (if needed)
6268	 * and then an aligned copy for the rest.
6269	 */
6270	if (!page_aligned(dst_addr)) {
6271		head_addr = dst_addr;
6272		head_size = PAGE_SIZE - (copy->offset & PAGE_MASK);
6273	}
6274	if (!page_aligned(copy->offset + copy->size)) {
6275		/*
6276		 * Mis-alignment at the end.
6277		 * Do an aligned copy up to the last page and
6278		 * then an unaligned copy for the remaining bytes.
6279		 */
6280		tail_size = (copy->offset + copy->size) & PAGE_MASK;
6281		tail_addr = dst_addr + copy->size - tail_size;
6282	}
6283
6284	if (head_size + tail_size == copy->size) {
6285		/*
6286		 * It's all unaligned, no optimization possible...
6287		 */
6288		goto blunt_copy;
6289	}
6290
6291	/*
6292	 * Can't optimize if there are any submaps in the
6293	 * destination due to the way we free the "copy" map
6294	 * progressively in vm_map_copy_overwrite_nested()
6295	 * in that case.
6296	 */
6297	vm_map_lock_read(dst_map);
6298	if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
6299		vm_map_unlock_read(dst_map);
6300		goto blunt_copy;
6301	}
6302	for (;
6303	     (entry != vm_map_copy_to_entry(copy) &&
6304	      entry->vme_start < dst_addr + copy->size);
6305	     entry = entry->vme_next) {
6306		if (entry->is_sub_map) {
6307			vm_map_unlock_read(dst_map);
6308			goto blunt_copy;
6309		}
6310	}
6311	vm_map_unlock_read(dst_map);
6312
6313	if (head_size) {
6314		/*
6315		 * Unaligned copy of the first "head_size" bytes, to reach
6316		 * a page boundary.
6317		 */
6318
6319		/*
6320		 * Extract "head_copy" out of "copy".
6321		 */
6322		head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6323		vm_map_copy_first_entry(head_copy) =
6324			vm_map_copy_to_entry(head_copy);
6325		vm_map_copy_last_entry(head_copy) =
6326			vm_map_copy_to_entry(head_copy);
6327		head_copy->type = VM_MAP_COPY_ENTRY_LIST;
6328		head_copy->cpy_hdr.nentries = 0;
6329		head_copy->cpy_hdr.entries_pageable =
6330			copy->cpy_hdr.entries_pageable;
6331		vm_map_store_init(&head_copy->cpy_hdr);
6332
6333		head_copy->offset = copy->offset;
6334		head_copy->size = head_size;
6335
6336		copy->offset += head_size;
6337		copy->size -= head_size;
6338
6339		entry = vm_map_copy_first_entry(copy);
6340		vm_map_copy_clip_end(copy, entry, copy->offset);
6341		vm_map_copy_entry_unlink(copy, entry);
6342		vm_map_copy_entry_link(head_copy,
6343				       vm_map_copy_to_entry(head_copy),
6344				       entry);
6345
6346		/*
6347		 * Do the unaligned copy.
6348		 */
6349		kr = vm_map_copy_overwrite_nested(dst_map,
6350						  head_addr,
6351						  head_copy,
6352						  interruptible,
6353						  (pmap_t) NULL,
6354						  FALSE);
6355		if (kr != KERN_SUCCESS)
6356			goto done;
6357	}
6358
6359	if (tail_size) {
6360		/*
6361		 * Extract "tail_copy" out of "copy".
6362		 */
6363		tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6364		vm_map_copy_first_entry(tail_copy) =
6365			vm_map_copy_to_entry(tail_copy);
6366		vm_map_copy_last_entry(tail_copy) =
6367			vm_map_copy_to_entry(tail_copy);
6368		tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
6369		tail_copy->cpy_hdr.nentries = 0;
6370		tail_copy->cpy_hdr.entries_pageable =
6371			copy->cpy_hdr.entries_pageable;
6372		vm_map_store_init(&tail_copy->cpy_hdr);
6373
6374		tail_copy->offset = copy->offset + copy->size - tail_size;
6375		tail_copy->size = tail_size;
6376
6377		copy->size -= tail_size;
6378
6379		entry = vm_map_copy_last_entry(copy);
6380		vm_map_copy_clip_start(copy, entry, tail_copy->offset);
6381		entry = vm_map_copy_last_entry(copy);
6382		vm_map_copy_entry_unlink(copy, entry);
6383		vm_map_copy_entry_link(tail_copy,
6384				       vm_map_copy_last_entry(tail_copy),
6385				       entry);
6386	}
6387
6388	/*
6389	 * Copy most (or possibly all) of the data.
6390	 */
6391	kr = vm_map_copy_overwrite_nested(dst_map,
6392					  dst_addr + head_size,
6393					  copy,
6394					  interruptible,
6395					  (pmap_t) NULL,
6396					  FALSE);
6397	if (kr != KERN_SUCCESS) {
6398		goto done;
6399	}
6400
6401	if (tail_size) {
6402		kr = vm_map_copy_overwrite_nested(dst_map,
6403						  tail_addr,
6404						  tail_copy,
6405						  interruptible,
6406						  (pmap_t) NULL,
6407						  FALSE);
6408	}
6409
6410done:
6411	assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
6412	if (kr == KERN_SUCCESS) {
6413		/*
6414		 * Discard all the copy maps.
6415		 */
6416		if (head_copy) {
6417			vm_map_copy_discard(head_copy);
6418			head_copy = NULL;
6419		}
6420		vm_map_copy_discard(copy);
6421		if (tail_copy) {
6422			vm_map_copy_discard(tail_copy);
6423			tail_copy = NULL;
6424		}
6425	} else {
6426		/*
6427		 * Re-assemble the original copy map.
6428		 */
6429		if (head_copy) {
6430			entry = vm_map_copy_first_entry(head_copy);
6431			vm_map_copy_entry_unlink(head_copy, entry);
6432			vm_map_copy_entry_link(copy,
6433					       vm_map_copy_to_entry(copy),
6434					       entry);
6435			copy->offset -= head_size;
6436			copy->size += head_size;
6437			vm_map_copy_discard(head_copy);
6438			head_copy = NULL;
6439		}
6440		if (tail_copy) {
6441			entry = vm_map_copy_last_entry(tail_copy);
6442			vm_map_copy_entry_unlink(tail_copy, entry);
6443			vm_map_copy_entry_link(copy,
6444					       vm_map_copy_last_entry(copy),
6445					       entry);
6446			copy->size += tail_size;
6447			vm_map_copy_discard(tail_copy);
6448			tail_copy = NULL;
6449		}
6450	}
6451	return kr;
6452}
6453
6454
6455/*
6456 *	Routine: vm_map_copy_overwrite_unaligned	[internal use only]
6457 *
6458 *	Decription:
6459 *	Physically copy unaligned data
6460 *
6461 *	Implementation:
6462 *	Unaligned parts of pages have to be physically copied.  We use
6463 *	a modified form of vm_fault_copy (which understands none-aligned
6464 *	page offsets and sizes) to do the copy.  We attempt to copy as
6465 *	much memory in one go as possibly, however vm_fault_copy copies
6466 *	within 1 memory object so we have to find the smaller of "amount left"
6467 *	"source object data size" and "target object data size".  With
6468 *	unaligned data we don't need to split regions, therefore the source
6469 *	(copy) object should be one map entry, the target range may be split
6470 *	over multiple map entries however.  In any event we are pessimistic
6471 *	about these assumptions.
6472 *
6473 *	Assumptions:
6474 *	dst_map is locked on entry and is return locked on success,
6475 *	unlocked on error.
6476 */
6477
6478static kern_return_t
6479vm_map_copy_overwrite_unaligned(
6480	vm_map_t	dst_map,
6481	vm_map_entry_t	entry,
6482	vm_map_copy_t	copy,
6483	vm_map_offset_t	start)
6484{
6485	vm_map_entry_t		copy_entry = vm_map_copy_first_entry(copy);
6486	vm_map_version_t	version;
6487	vm_object_t		dst_object;
6488	vm_object_offset_t	dst_offset;
6489	vm_object_offset_t	src_offset;
6490	vm_object_offset_t	entry_offset;
6491	vm_map_offset_t		entry_end;
6492	vm_map_size_t		src_size,
6493				dst_size,
6494				copy_size,
6495				amount_left;
6496	kern_return_t		kr = KERN_SUCCESS;
6497
6498	vm_map_lock_write_to_read(dst_map);
6499
6500	src_offset = copy->offset - vm_object_trunc_page(copy->offset);
6501	amount_left = copy->size;
6502/*
6503 *	unaligned so we never clipped this entry, we need the offset into
6504 *	the vm_object not just the data.
6505 */
6506	while (amount_left > 0) {
6507
6508		if (entry == vm_map_to_entry(dst_map)) {
6509			vm_map_unlock_read(dst_map);
6510			return KERN_INVALID_ADDRESS;
6511		}
6512
6513		/* "start" must be within the current map entry */
6514		assert ((start>=entry->vme_start) && (start<entry->vme_end));
6515
6516		dst_offset = start - entry->vme_start;
6517
6518		dst_size = entry->vme_end - start;
6519
6520		src_size = copy_entry->vme_end -
6521			(copy_entry->vme_start + src_offset);
6522
6523		if (dst_size < src_size) {
6524/*
6525 *			we can only copy dst_size bytes before
6526 *			we have to get the next destination entry
6527 */
6528			copy_size = dst_size;
6529		} else {
6530/*
6531 *			we can only copy src_size bytes before
6532 *			we have to get the next source copy entry
6533 */
6534			copy_size = src_size;
6535		}
6536
6537		if (copy_size > amount_left) {
6538			copy_size = amount_left;
6539		}
6540/*
6541 *		Entry needs copy, create a shadow shadow object for
6542 *		Copy on write region.
6543 */
6544		if (entry->needs_copy &&
6545		    ((entry->protection & VM_PROT_WRITE) != 0))
6546		{
6547			if (vm_map_lock_read_to_write(dst_map)) {
6548				vm_map_lock_read(dst_map);
6549				goto RetryLookup;
6550			}
6551			vm_object_shadow(&entry->object.vm_object,
6552					 &entry->offset,
6553					 (vm_map_size_t)(entry->vme_end
6554							 - entry->vme_start));
6555			entry->needs_copy = FALSE;
6556			vm_map_lock_write_to_read(dst_map);
6557		}
6558		dst_object = entry->object.vm_object;
6559/*
6560 *		unlike with the virtual (aligned) copy we're going
6561 *		to fault on it therefore we need a target object.
6562 */
6563                if (dst_object == VM_OBJECT_NULL) {
6564			if (vm_map_lock_read_to_write(dst_map)) {
6565				vm_map_lock_read(dst_map);
6566				goto RetryLookup;
6567			}
6568			dst_object = vm_object_allocate((vm_map_size_t)
6569							entry->vme_end - entry->vme_start);
6570			entry->object.vm_object = dst_object;
6571			entry->offset = 0;
6572			vm_map_lock_write_to_read(dst_map);
6573		}
6574/*
6575 *		Take an object reference and unlock map. The "entry" may
6576 *		disappear or change when the map is unlocked.
6577 */
6578		vm_object_reference(dst_object);
6579		version.main_timestamp = dst_map->timestamp;
6580		entry_offset = entry->offset;
6581		entry_end = entry->vme_end;
6582		vm_map_unlock_read(dst_map);
6583/*
6584 *		Copy as much as possible in one pass
6585 */
6586		kr = vm_fault_copy(
6587			copy_entry->object.vm_object,
6588			copy_entry->offset + src_offset,
6589			&copy_size,
6590			dst_object,
6591			entry_offset + dst_offset,
6592			dst_map,
6593			&version,
6594			THREAD_UNINT );
6595
6596		start += copy_size;
6597		src_offset += copy_size;
6598		amount_left -= copy_size;
6599/*
6600 *		Release the object reference
6601 */
6602		vm_object_deallocate(dst_object);
6603/*
6604 *		If a hard error occurred, return it now
6605 */
6606		if (kr != KERN_SUCCESS)
6607			return kr;
6608
6609		if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
6610		    || amount_left == 0)
6611		{
6612/*
6613 *			all done with this copy entry, dispose.
6614 */
6615			vm_map_copy_entry_unlink(copy, copy_entry);
6616			vm_object_deallocate(copy_entry->object.vm_object);
6617			vm_map_copy_entry_dispose(copy, copy_entry);
6618
6619			if ((copy_entry = vm_map_copy_first_entry(copy))
6620			    == vm_map_copy_to_entry(copy) && amount_left) {
6621/*
6622 *				not finished copying but run out of source
6623 */
6624				return KERN_INVALID_ADDRESS;
6625			}
6626			src_offset = 0;
6627		}
6628
6629		if (amount_left == 0)
6630			return KERN_SUCCESS;
6631
6632		vm_map_lock_read(dst_map);
6633		if (version.main_timestamp == dst_map->timestamp) {
6634			if (start == entry_end) {
6635/*
6636 *				destination region is split.  Use the version
6637 *				information to avoid a lookup in the normal
6638 *				case.
6639 */
6640				entry = entry->vme_next;
6641/*
6642 *				should be contiguous. Fail if we encounter
6643 *				a hole in the destination.
6644 */
6645				if (start != entry->vme_start) {
6646					vm_map_unlock_read(dst_map);
6647					return KERN_INVALID_ADDRESS ;
6648				}
6649			}
6650		} else {
6651/*
6652 *			Map version check failed.
6653 *			we must lookup the entry because somebody
6654 *			might have changed the map behind our backs.
6655 */
6656		RetryLookup:
6657			if (!vm_map_lookup_entry(dst_map, start, &entry))
6658			{
6659				vm_map_unlock_read(dst_map);
6660				return KERN_INVALID_ADDRESS ;
6661			}
6662		}
6663	}/* while */
6664
6665	return KERN_SUCCESS;
6666}/* vm_map_copy_overwrite_unaligned */
6667
6668/*
6669 *	Routine: vm_map_copy_overwrite_aligned	[internal use only]
6670 *
6671 *	Description:
6672 *	Does all the vm_trickery possible for whole pages.
6673 *
6674 *	Implementation:
6675 *
6676 *	If there are no permanent objects in the destination,
6677 *	and the source and destination map entry zones match,
6678 *	and the destination map entry is not shared,
6679 *	then the map entries can be deleted and replaced
6680 *	with those from the copy.  The following code is the
6681 *	basic idea of what to do, but there are lots of annoying
6682 *	little details about getting protection and inheritance
6683 *	right.  Should add protection, inheritance, and sharing checks
6684 *	to the above pass and make sure that no wiring is involved.
6685 */
6686
6687int vm_map_copy_overwrite_aligned_src_not_internal = 0;
6688int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
6689int vm_map_copy_overwrite_aligned_src_large = 0;
6690
6691static kern_return_t
6692vm_map_copy_overwrite_aligned(
6693	vm_map_t	dst_map,
6694	vm_map_entry_t	tmp_entry,
6695	vm_map_copy_t	copy,
6696	vm_map_offset_t	start,
6697	__unused pmap_t	pmap)
6698{
6699	vm_object_t	object;
6700	vm_map_entry_t	copy_entry;
6701	vm_map_size_t	copy_size;
6702	vm_map_size_t	size;
6703	vm_map_entry_t	entry;
6704
6705	while ((copy_entry = vm_map_copy_first_entry(copy))
6706	       != vm_map_copy_to_entry(copy))
6707	{
6708		copy_size = (copy_entry->vme_end - copy_entry->vme_start);
6709
6710		entry = tmp_entry;
6711		assert(!entry->use_pmap); /* unnested when clipped earlier */
6712		if (entry == vm_map_to_entry(dst_map)) {
6713			vm_map_unlock(dst_map);
6714			return KERN_INVALID_ADDRESS;
6715		}
6716		size = (entry->vme_end - entry->vme_start);
6717		/*
6718		 *	Make sure that no holes popped up in the
6719		 *	address map, and that the protection is
6720		 *	still valid, in case the map was unlocked
6721		 *	earlier.
6722		 */
6723
6724		if ((entry->vme_start != start) || ((entry->is_sub_map)
6725						    && !entry->needs_copy)) {
6726			vm_map_unlock(dst_map);
6727			return(KERN_INVALID_ADDRESS);
6728		}
6729		assert(entry != vm_map_to_entry(dst_map));
6730
6731		/*
6732		 *	Check protection again
6733		 */
6734
6735		if ( ! (entry->protection & VM_PROT_WRITE)) {
6736			vm_map_unlock(dst_map);
6737			return(KERN_PROTECTION_FAILURE);
6738		}
6739
6740		/*
6741		 *	Adjust to source size first
6742		 */
6743
6744		if (copy_size < size) {
6745			vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
6746			size = copy_size;
6747		}
6748
6749		/*
6750		 *	Adjust to destination size
6751		 */
6752
6753		if (size < copy_size) {
6754			vm_map_copy_clip_end(copy, copy_entry,
6755					     copy_entry->vme_start + size);
6756			copy_size = size;
6757		}
6758
6759		assert((entry->vme_end - entry->vme_start) == size);
6760		assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
6761		assert((copy_entry->vme_end - copy_entry->vme_start) == size);
6762
6763		/*
6764		 *	If the destination contains temporary unshared memory,
6765		 *	we can perform the copy by throwing it away and
6766		 *	installing the source data.
6767		 */
6768
6769		object = entry->object.vm_object;
6770		if ((!entry->is_shared &&
6771		     ((object == VM_OBJECT_NULL) ||
6772		      (object->internal && !object->true_share))) ||
6773		    entry->needs_copy) {
6774			vm_object_t	old_object = entry->object.vm_object;
6775			vm_object_offset_t	old_offset = entry->offset;
6776			vm_object_offset_t	offset;
6777
6778			/*
6779			 * Ensure that the source and destination aren't
6780			 * identical
6781			 */
6782			if (old_object == copy_entry->object.vm_object &&
6783			    old_offset == copy_entry->offset) {
6784				vm_map_copy_entry_unlink(copy, copy_entry);
6785				vm_map_copy_entry_dispose(copy, copy_entry);
6786
6787				if (old_object != VM_OBJECT_NULL)
6788					vm_object_deallocate(old_object);
6789
6790				start = tmp_entry->vme_end;
6791				tmp_entry = tmp_entry->vme_next;
6792				continue;
6793			}
6794
6795#if !CONFIG_EMBEDDED
6796#define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024)	/* 64 MB */
6797#define __TRADEOFF1_COPY_SIZE (128 * 1024)	/* 128 KB */
6798			if (copy_entry->object.vm_object != VM_OBJECT_NULL &&
6799			    copy_entry->object.vm_object->vo_size >= __TRADEOFF1_OBJ_SIZE &&
6800			    copy_size <= __TRADEOFF1_COPY_SIZE) {
6801				/*
6802				 * Virtual vs. Physical copy tradeoff #1.
6803				 *
6804				 * Copying only a few pages out of a large
6805				 * object:  do a physical copy instead of
6806				 * a virtual copy, to avoid possibly keeping
6807				 * the entire large object alive because of
6808				 * those few copy-on-write pages.
6809				 */
6810				vm_map_copy_overwrite_aligned_src_large++;
6811				goto slow_copy;
6812			}
6813#endif /* !CONFIG_EMBEDDED */
6814
6815			if (entry->alias >= VM_MEMORY_MALLOC &&
6816			    entry->alias <= VM_MEMORY_MALLOC_LARGE_REUSED) {
6817				vm_object_t new_object, new_shadow;
6818
6819				/*
6820				 * We're about to map something over a mapping
6821				 * established by malloc()...
6822				 */
6823				new_object = copy_entry->object.vm_object;
6824				if (new_object != VM_OBJECT_NULL) {
6825					vm_object_lock_shared(new_object);
6826				}
6827				while (new_object != VM_OBJECT_NULL &&
6828#if !CONFIG_EMBEDDED
6829				       !new_object->true_share &&
6830				       new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
6831#endif /* !CONFIG_EMBEDDED */
6832				       new_object->internal) {
6833					new_shadow = new_object->shadow;
6834					if (new_shadow == VM_OBJECT_NULL) {
6835						break;
6836					}
6837					vm_object_lock_shared(new_shadow);
6838					vm_object_unlock(new_object);
6839					new_object = new_shadow;
6840				}
6841				if (new_object != VM_OBJECT_NULL) {
6842					if (!new_object->internal) {
6843						/*
6844						 * The new mapping is backed
6845						 * by an external object.  We
6846						 * don't want malloc'ed memory
6847						 * to be replaced with such a
6848						 * non-anonymous mapping, so
6849						 * let's go off the optimized
6850						 * path...
6851						 */
6852						vm_map_copy_overwrite_aligned_src_not_internal++;
6853						vm_object_unlock(new_object);
6854						goto slow_copy;
6855					}
6856#if !CONFIG_EMBEDDED
6857					if (new_object->true_share ||
6858					    new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
6859						/*
6860						 * Same if there's a "true_share"
6861						 * object in the shadow chain, or
6862						 * an object with a non-default
6863						 * (SYMMETRIC) copy strategy.
6864						 */
6865						vm_map_copy_overwrite_aligned_src_not_symmetric++;
6866						vm_object_unlock(new_object);
6867						goto slow_copy;
6868					}
6869#endif /* !CONFIG_EMBEDDED */
6870					vm_object_unlock(new_object);
6871				}
6872				/*
6873				 * The new mapping is still backed by
6874				 * anonymous (internal) memory, so it's
6875				 * OK to substitute it for the original
6876				 * malloc() mapping.
6877				 */
6878			}
6879
6880			if (old_object != VM_OBJECT_NULL) {
6881				if(entry->is_sub_map) {
6882					if(entry->use_pmap) {
6883#ifndef NO_NESTED_PMAP
6884						pmap_unnest(dst_map->pmap,
6885							    (addr64_t)entry->vme_start,
6886							    entry->vme_end - entry->vme_start);
6887#endif	/* NO_NESTED_PMAP */
6888						if(dst_map->mapped_in_other_pmaps) {
6889							/* clean up parent */
6890							/* map/maps */
6891							vm_map_submap_pmap_clean(
6892								dst_map, entry->vme_start,
6893								entry->vme_end,
6894								entry->object.sub_map,
6895								entry->offset);
6896						}
6897					} else {
6898						vm_map_submap_pmap_clean(
6899							dst_map, entry->vme_start,
6900							entry->vme_end,
6901							entry->object.sub_map,
6902							entry->offset);
6903					}
6904				   	vm_map_deallocate(
6905						entry->object.sub_map);
6906			   	} else {
6907					if(dst_map->mapped_in_other_pmaps) {
6908						vm_object_pmap_protect(
6909							entry->object.vm_object,
6910							entry->offset,
6911							entry->vme_end
6912							- entry->vme_start,
6913							PMAP_NULL,
6914							entry->vme_start,
6915							VM_PROT_NONE);
6916					} else {
6917						pmap_remove(dst_map->pmap,
6918							    (addr64_t)(entry->vme_start),
6919							    (addr64_t)(entry->vme_end));
6920					}
6921					vm_object_deallocate(old_object);
6922			   	}
6923			}
6924
6925			entry->is_sub_map = FALSE;
6926			entry->object = copy_entry->object;
6927			object = entry->object.vm_object;
6928			entry->needs_copy = copy_entry->needs_copy;
6929			entry->wired_count = 0;
6930			entry->user_wired_count = 0;
6931			offset = entry->offset = copy_entry->offset;
6932
6933			vm_map_copy_entry_unlink(copy, copy_entry);
6934			vm_map_copy_entry_dispose(copy, copy_entry);
6935
6936			/*
6937			 * we could try to push pages into the pmap at this point, BUT
6938			 * this optimization only saved on average 2 us per page if ALL
6939			 * the pages in the source were currently mapped
6940			 * and ALL the pages in the dest were touched, if there were fewer
6941			 * than 2/3 of the pages touched, this optimization actually cost more cycles
6942			 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
6943			 */
6944
6945			/*
6946			 *	Set up for the next iteration.  The map
6947			 *	has not been unlocked, so the next
6948			 *	address should be at the end of this
6949			 *	entry, and the next map entry should be
6950			 *	the one following it.
6951			 */
6952
6953			start = tmp_entry->vme_end;
6954			tmp_entry = tmp_entry->vme_next;
6955		} else {
6956			vm_map_version_t	version;
6957			vm_object_t		dst_object;
6958			vm_object_offset_t	dst_offset;
6959			kern_return_t		r;
6960
6961		slow_copy:
6962			if (entry->needs_copy) {
6963				vm_object_shadow(&entry->object.vm_object,
6964						 &entry->offset,
6965						 (entry->vme_end -
6966						  entry->vme_start));
6967				entry->needs_copy = FALSE;
6968			}
6969
6970			dst_object = entry->object.vm_object;
6971			dst_offset = entry->offset;
6972
6973			/*
6974			 *	Take an object reference, and record
6975			 *	the map version information so that the
6976			 *	map can be safely unlocked.
6977			 */
6978
6979			if (dst_object == VM_OBJECT_NULL) {
6980				/*
6981				 * We would usually have just taken the
6982				 * optimized path above if the destination
6983				 * object has not been allocated yet.  But we
6984				 * now disable that optimization if the copy
6985				 * entry's object is not backed by anonymous
6986				 * memory to avoid replacing malloc'ed
6987				 * (i.e. re-usable) anonymous memory with a
6988				 * not-so-anonymous mapping.
6989				 * So we have to handle this case here and
6990				 * allocate a new VM object for this map entry.
6991				 */
6992				dst_object = vm_object_allocate(
6993					entry->vme_end - entry->vme_start);
6994				dst_offset = 0;
6995				entry->object.vm_object = dst_object;
6996				entry->offset = dst_offset;
6997
6998			}
6999
7000			vm_object_reference(dst_object);
7001
7002			/* account for unlock bumping up timestamp */
7003			version.main_timestamp = dst_map->timestamp + 1;
7004
7005			vm_map_unlock(dst_map);
7006
7007			/*
7008			 *	Copy as much as possible in one pass
7009			 */
7010
7011			copy_size = size;
7012			r = vm_fault_copy(
7013				copy_entry->object.vm_object,
7014				copy_entry->offset,
7015				&copy_size,
7016				dst_object,
7017				dst_offset,
7018				dst_map,
7019				&version,
7020				THREAD_UNINT );
7021
7022			/*
7023			 *	Release the object reference
7024			 */
7025
7026			vm_object_deallocate(dst_object);
7027
7028			/*
7029			 *	If a hard error occurred, return it now
7030			 */
7031
7032			if (r != KERN_SUCCESS)
7033				return(r);
7034
7035			if (copy_size != 0) {
7036				/*
7037				 *	Dispose of the copied region
7038				 */
7039
7040				vm_map_copy_clip_end(copy, copy_entry,
7041						     copy_entry->vme_start + copy_size);
7042				vm_map_copy_entry_unlink(copy, copy_entry);
7043				vm_object_deallocate(copy_entry->object.vm_object);
7044				vm_map_copy_entry_dispose(copy, copy_entry);
7045			}
7046
7047			/*
7048			 *	Pick up in the destination map where we left off.
7049			 *
7050			 *	Use the version information to avoid a lookup
7051			 *	in the normal case.
7052			 */
7053
7054			start += copy_size;
7055			vm_map_lock(dst_map);
7056			if (version.main_timestamp == dst_map->timestamp &&
7057			    copy_size != 0) {
7058				/* We can safely use saved tmp_entry value */
7059
7060				vm_map_clip_end(dst_map, tmp_entry, start);
7061				tmp_entry = tmp_entry->vme_next;
7062			} else {
7063				/* Must do lookup of tmp_entry */
7064
7065				if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
7066					vm_map_unlock(dst_map);
7067					return(KERN_INVALID_ADDRESS);
7068				}
7069				vm_map_clip_start(dst_map, tmp_entry, start);
7070			}
7071		}
7072	}/* while */
7073
7074	return(KERN_SUCCESS);
7075}/* vm_map_copy_overwrite_aligned */
7076
7077/*
7078 *	Routine: vm_map_copyin_kernel_buffer [internal use only]
7079 *
7080 *	Description:
7081 *		Copy in data to a kernel buffer from space in the
7082 *		source map. The original space may be optionally
7083 *		deallocated.
7084 *
7085 *		If successful, returns a new copy object.
7086 */
7087static kern_return_t
7088vm_map_copyin_kernel_buffer(
7089	vm_map_t	src_map,
7090	vm_map_offset_t	src_addr,
7091	vm_map_size_t	len,
7092	boolean_t	src_destroy,
7093	vm_map_copy_t	*copy_result)
7094{
7095	kern_return_t kr;
7096	vm_map_copy_t copy;
7097	vm_size_t kalloc_size;
7098
7099	if ((vm_size_t) len != len) {
7100		/* "len" is too big and doesn't fit in a "vm_size_t" */
7101		return KERN_RESOURCE_SHORTAGE;
7102	}
7103	kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len);
7104	assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len);
7105
7106	copy = (vm_map_copy_t) kalloc(kalloc_size);
7107	if (copy == VM_MAP_COPY_NULL) {
7108		return KERN_RESOURCE_SHORTAGE;
7109	}
7110	copy->type = VM_MAP_COPY_KERNEL_BUFFER;
7111	copy->size = len;
7112	copy->offset = 0;
7113	copy->cpy_kdata = (void *) (copy + 1);
7114	copy->cpy_kalloc_size = kalloc_size;
7115
7116	kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len);
7117	if (kr != KERN_SUCCESS) {
7118		kfree(copy, kalloc_size);
7119		return kr;
7120	}
7121	if (src_destroy) {
7122		(void) vm_map_remove(src_map, vm_map_trunc_page(src_addr),
7123				     vm_map_round_page(src_addr + len),
7124				     VM_MAP_REMOVE_INTERRUPTIBLE |
7125				     VM_MAP_REMOVE_WAIT_FOR_KWIRE |
7126				     (src_map == kernel_map) ?
7127				     VM_MAP_REMOVE_KUNWIRE : 0);
7128	}
7129	*copy_result = copy;
7130	return KERN_SUCCESS;
7131}
7132
7133/*
7134 *	Routine: vm_map_copyout_kernel_buffer	[internal use only]
7135 *
7136 *	Description:
7137 *		Copy out data from a kernel buffer into space in the
7138 *		destination map. The space may be otpionally dynamically
7139 *		allocated.
7140 *
7141 *		If successful, consumes the copy object.
7142 *		Otherwise, the caller is responsible for it.
7143 */
7144static int vm_map_copyout_kernel_buffer_failures = 0;
7145static kern_return_t
7146vm_map_copyout_kernel_buffer(
7147	vm_map_t		map,
7148	vm_map_address_t	*addr,	/* IN/OUT */
7149	vm_map_copy_t		copy,
7150	boolean_t		overwrite)
7151{
7152	kern_return_t kr = KERN_SUCCESS;
7153	thread_t thread = current_thread();
7154
7155	if (!overwrite) {
7156
7157		/*
7158		 * Allocate space in the target map for the data
7159		 */
7160		*addr = 0;
7161		kr = vm_map_enter(map,
7162				  addr,
7163				  vm_map_round_page(copy->size),
7164				  (vm_map_offset_t) 0,
7165				  VM_FLAGS_ANYWHERE,
7166				  VM_OBJECT_NULL,
7167				  (vm_object_offset_t) 0,
7168				  FALSE,
7169				  VM_PROT_DEFAULT,
7170				  VM_PROT_ALL,
7171				  VM_INHERIT_DEFAULT);
7172		if (kr != KERN_SUCCESS)
7173			return kr;
7174	}
7175
7176	/*
7177	 * Copyout the data from the kernel buffer to the target map.
7178	 */
7179	if (thread->map == map) {
7180
7181		/*
7182		 * If the target map is the current map, just do
7183		 * the copy.
7184		 */
7185		assert((vm_size_t) copy->size == copy->size);
7186		if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
7187			kr = KERN_INVALID_ADDRESS;
7188		}
7189	}
7190	else {
7191		vm_map_t oldmap;
7192
7193		/*
7194		 * If the target map is another map, assume the
7195		 * target's address space identity for the duration
7196		 * of the copy.
7197		 */
7198		vm_map_reference(map);
7199		oldmap = vm_map_switch(map);
7200
7201		assert((vm_size_t) copy->size == copy->size);
7202		if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
7203			vm_map_copyout_kernel_buffer_failures++;
7204			kr = KERN_INVALID_ADDRESS;
7205		}
7206
7207		(void) vm_map_switch(oldmap);
7208		vm_map_deallocate(map);
7209	}
7210
7211	if (kr != KERN_SUCCESS) {
7212		/* the copy failed, clean up */
7213		if (!overwrite) {
7214			/*
7215			 * Deallocate the space we allocated in the target map.
7216			 */
7217			(void) vm_map_remove(map,
7218					     vm_map_trunc_page(*addr),
7219					     vm_map_round_page(*addr +
7220							       vm_map_round_page(copy->size)),
7221					     VM_MAP_NO_FLAGS);
7222			*addr = 0;
7223		}
7224	} else {
7225		/* copy was successful, dicard the copy structure */
7226		kfree(copy, copy->cpy_kalloc_size);
7227	}
7228
7229	return kr;
7230}
7231
7232/*
7233 *	Macro:		vm_map_copy_insert
7234 *
7235 *	Description:
7236 *		Link a copy chain ("copy") into a map at the
7237 *		specified location (after "where").
7238 *	Side effects:
7239 *		The copy chain is destroyed.
7240 *	Warning:
7241 *		The arguments are evaluated multiple times.
7242 */
7243#define	vm_map_copy_insert(map, where, copy)				\
7244MACRO_BEGIN								\
7245	vm_map_store_copy_insert(map, where, copy);	  \
7246	zfree(vm_map_copy_zone, copy);		\
7247MACRO_END
7248
7249/*
7250 *	Routine:	vm_map_copyout
7251 *
7252 *	Description:
7253 *		Copy out a copy chain ("copy") into newly-allocated
7254 *		space in the destination map.
7255 *
7256 *		If successful, consumes the copy object.
7257 *		Otherwise, the caller is responsible for it.
7258 */
7259kern_return_t
7260vm_map_copyout(
7261	vm_map_t		dst_map,
7262	vm_map_address_t	*dst_addr,	/* OUT */
7263	vm_map_copy_t		copy)
7264{
7265	vm_map_size_t		size;
7266	vm_map_size_t		adjustment;
7267	vm_map_offset_t		start;
7268	vm_object_offset_t	vm_copy_start;
7269	vm_map_entry_t		last;
7270	register
7271	vm_map_entry_t		entry;
7272
7273	/*
7274	 *	Check for null copy object.
7275	 */
7276
7277	if (copy == VM_MAP_COPY_NULL) {
7278		*dst_addr = 0;
7279		return(KERN_SUCCESS);
7280	}
7281
7282	/*
7283	 *	Check for special copy object, created
7284	 *	by vm_map_copyin_object.
7285	 */
7286
7287	if (copy->type == VM_MAP_COPY_OBJECT) {
7288		vm_object_t 		object = copy->cpy_object;
7289		kern_return_t 		kr;
7290		vm_object_offset_t	offset;
7291
7292		offset = vm_object_trunc_page(copy->offset);
7293		size = vm_map_round_page(copy->size +
7294					 (vm_map_size_t)(copy->offset - offset));
7295		*dst_addr = 0;
7296		kr = vm_map_enter(dst_map, dst_addr, size,
7297				  (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
7298				  object, offset, FALSE,
7299				  VM_PROT_DEFAULT, VM_PROT_ALL,
7300				  VM_INHERIT_DEFAULT);
7301		if (kr != KERN_SUCCESS)
7302			return(kr);
7303		/* Account for non-pagealigned copy object */
7304		*dst_addr += (vm_map_offset_t)(copy->offset - offset);
7305		zfree(vm_map_copy_zone, copy);
7306		return(KERN_SUCCESS);
7307	}
7308
7309	/*
7310	 *	Check for special kernel buffer allocated
7311	 *	by new_ipc_kmsg_copyin.
7312	 */
7313
7314	if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
7315		return(vm_map_copyout_kernel_buffer(dst_map, dst_addr,
7316						    copy, FALSE));
7317	}
7318
7319	/*
7320	 *	Find space for the data
7321	 */
7322
7323	vm_copy_start = vm_object_trunc_page(copy->offset);
7324	size =	vm_map_round_page((vm_map_size_t)copy->offset + copy->size)
7325		- vm_copy_start;
7326
7327StartAgain: ;
7328
7329	vm_map_lock(dst_map);
7330	if( dst_map->disable_vmentry_reuse == TRUE) {
7331		VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
7332		last = entry;
7333	} else {
7334		assert(first_free_is_valid(dst_map));
7335		start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
7336		vm_map_min(dst_map) : last->vme_end;
7337	}
7338
7339	while (TRUE) {
7340		vm_map_entry_t	next = last->vme_next;
7341		vm_map_offset_t	end = start + size;
7342
7343		if ((end > dst_map->max_offset) || (end < start)) {
7344			if (dst_map->wait_for_space) {
7345				if (size <= (dst_map->max_offset - dst_map->min_offset)) {
7346					assert_wait((event_t) dst_map,
7347						    THREAD_INTERRUPTIBLE);
7348					vm_map_unlock(dst_map);
7349					thread_block(THREAD_CONTINUE_NULL);
7350					goto StartAgain;
7351				}
7352			}
7353			vm_map_unlock(dst_map);
7354			return(KERN_NO_SPACE);
7355		}
7356
7357		if ((next == vm_map_to_entry(dst_map)) ||
7358		    (next->vme_start >= end))
7359			break;
7360
7361		last = next;
7362		start = last->vme_end;
7363	}
7364
7365	/*
7366	 *	Since we're going to just drop the map
7367	 *	entries from the copy into the destination
7368	 *	map, they must come from the same pool.
7369	 */
7370
7371	if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
7372		/*
7373		 * Mismatches occur when dealing with the default
7374		 * pager.
7375		 */
7376		zone_t		old_zone;
7377		vm_map_entry_t	next, new;
7378
7379		/*
7380		 * Find the zone that the copies were allocated from
7381		 */
7382
7383		entry = vm_map_copy_first_entry(copy);
7384
7385		/*
7386		 * Reinitialize the copy so that vm_map_copy_entry_link
7387		 * will work.
7388		 */
7389		vm_map_store_copy_reset(copy, entry);
7390		copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
7391
7392		/*
7393		 * Copy each entry.
7394		 */
7395		while (entry != vm_map_copy_to_entry(copy)) {
7396			new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
7397			vm_map_entry_copy_full(new, entry);
7398			new->use_pmap = FALSE;	/* clr address space specifics */
7399			vm_map_copy_entry_link(copy,
7400					       vm_map_copy_last_entry(copy),
7401					       new);
7402			next = entry->vme_next;
7403			old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
7404			zfree(old_zone, entry);
7405			entry = next;
7406		}
7407	}
7408
7409	/*
7410	 *	Adjust the addresses in the copy chain, and
7411	 *	reset the region attributes.
7412	 */
7413
7414	adjustment = start - vm_copy_start;
7415	for (entry = vm_map_copy_first_entry(copy);
7416	     entry != vm_map_copy_to_entry(copy);
7417	     entry = entry->vme_next) {
7418		entry->vme_start += adjustment;
7419		entry->vme_end += adjustment;
7420
7421		entry->inheritance = VM_INHERIT_DEFAULT;
7422		entry->protection = VM_PROT_DEFAULT;
7423		entry->max_protection = VM_PROT_ALL;
7424		entry->behavior = VM_BEHAVIOR_DEFAULT;
7425
7426		/*
7427		 * If the entry is now wired,
7428		 * map the pages into the destination map.
7429		 */
7430		if (entry->wired_count != 0) {
7431			register vm_map_offset_t va;
7432			vm_object_offset_t	 offset;
7433			register vm_object_t object;
7434			vm_prot_t prot;
7435			int	type_of_fault;
7436
7437			object = entry->object.vm_object;
7438			offset = entry->offset;
7439			va = entry->vme_start;
7440
7441			pmap_pageable(dst_map->pmap,
7442				      entry->vme_start,
7443				      entry->vme_end,
7444				      TRUE);
7445
7446			while (va < entry->vme_end) {
7447				register vm_page_t	m;
7448
7449				/*
7450				 * Look up the page in the object.
7451				 * Assert that the page will be found in the
7452				 * top object:
7453				 * either
7454				 *	the object was newly created by
7455				 *	vm_object_copy_slowly, and has
7456				 *	copies of all of the pages from
7457				 *	the source object
7458				 * or
7459				 *	the object was moved from the old
7460				 *	map entry; because the old map
7461				 *	entry was wired, all of the pages
7462				 *	were in the top-level object.
7463				 *	(XXX not true if we wire pages for
7464				 *	 reading)
7465				 */
7466				vm_object_lock(object);
7467
7468				m = vm_page_lookup(object, offset);
7469				if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
7470				    m->absent)
7471					panic("vm_map_copyout: wiring %p", m);
7472
7473				/*
7474				 * ENCRYPTED SWAP:
7475				 * The page is assumed to be wired here, so it
7476				 * shouldn't be encrypted.  Otherwise, we
7477				 * couldn't enter it in the page table, since
7478				 * we don't want the user to see the encrypted
7479				 * data.
7480				 */
7481				ASSERT_PAGE_DECRYPTED(m);
7482
7483				prot = entry->protection;
7484
7485				if (override_nx(dst_map, entry->alias) && prot)
7486				        prot |= VM_PROT_EXECUTE;
7487
7488				type_of_fault = DBG_CACHE_HIT_FAULT;
7489
7490				vm_fault_enter(m, dst_map->pmap, va, prot, prot,
7491					       VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL,
7492					       &type_of_fault);
7493
7494				vm_object_unlock(object);
7495
7496				offset += PAGE_SIZE_64;
7497				va += PAGE_SIZE;
7498			}
7499		}
7500	}
7501
7502	/*
7503	 *	Correct the page alignment for the result
7504	 */
7505
7506	*dst_addr = start + (copy->offset - vm_copy_start);
7507
7508	/*
7509	 *	Update the hints and the map size
7510	 */
7511
7512	SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
7513
7514	dst_map->size += size;
7515
7516	/*
7517	 *	Link in the copy
7518	 */
7519
7520	vm_map_copy_insert(dst_map, last, copy);
7521
7522	vm_map_unlock(dst_map);
7523
7524	/*
7525	 * XXX	If wiring_required, call vm_map_pageable
7526	 */
7527
7528	return(KERN_SUCCESS);
7529}
7530
7531/*
7532 *	Routine:	vm_map_copyin
7533 *
7534 *	Description:
7535 *		see vm_map_copyin_common.  Exported via Unsupported.exports.
7536 *
7537 */
7538
7539#undef vm_map_copyin
7540
7541kern_return_t
7542vm_map_copyin(
7543	vm_map_t			src_map,
7544	vm_map_address_t	src_addr,
7545	vm_map_size_t		len,
7546	boolean_t			src_destroy,
7547	vm_map_copy_t		*copy_result)	/* OUT */
7548{
7549	return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
7550					FALSE, copy_result, FALSE));
7551}
7552
7553/*
7554 *	Routine:	vm_map_copyin_common
7555 *
7556 *	Description:
7557 *		Copy the specified region (src_addr, len) from the
7558 *		source address space (src_map), possibly removing
7559 *		the region from the source address space (src_destroy).
7560 *
7561 *	Returns:
7562 *		A vm_map_copy_t object (copy_result), suitable for
7563 *		insertion into another address space (using vm_map_copyout),
7564 *		copying over another address space region (using
7565 *		vm_map_copy_overwrite).  If the copy is unused, it
7566 *		should be destroyed (using vm_map_copy_discard).
7567 *
7568 *	In/out conditions:
7569 *		The source map should not be locked on entry.
7570 */
7571
7572typedef struct submap_map {
7573	vm_map_t	parent_map;
7574	vm_map_offset_t	base_start;
7575	vm_map_offset_t	base_end;
7576	vm_map_size_t	base_len;
7577	struct submap_map *next;
7578} submap_map_t;
7579
7580kern_return_t
7581vm_map_copyin_common(
7582	vm_map_t	src_map,
7583	vm_map_address_t src_addr,
7584	vm_map_size_t	len,
7585	boolean_t	src_destroy,
7586	__unused boolean_t	src_volatile,
7587	vm_map_copy_t	*copy_result,	/* OUT */
7588	boolean_t	use_maxprot)
7589{
7590	vm_map_entry_t	tmp_entry;	/* Result of last map lookup --
7591					 * in multi-level lookup, this
7592					 * entry contains the actual
7593					 * vm_object/offset.
7594					 */
7595	register
7596	vm_map_entry_t	new_entry = VM_MAP_ENTRY_NULL;	/* Map entry for copy */
7597
7598	vm_map_offset_t	src_start;	/* Start of current entry --
7599					 * where copy is taking place now
7600					 */
7601	vm_map_offset_t	src_end;	/* End of entire region to be
7602					 * copied */
7603	vm_map_offset_t src_base;
7604	vm_map_t	base_map = src_map;
7605	boolean_t	map_share=FALSE;
7606	submap_map_t	*parent_maps = NULL;
7607
7608	register
7609	vm_map_copy_t	copy;		/* Resulting copy */
7610	vm_map_address_t	copy_addr;
7611
7612	/*
7613	 *	Check for copies of zero bytes.
7614	 */
7615
7616	if (len == 0) {
7617		*copy_result = VM_MAP_COPY_NULL;
7618		return(KERN_SUCCESS);
7619	}
7620
7621	/*
7622	 *	Check that the end address doesn't overflow
7623	 */
7624	src_end = src_addr + len;
7625	if (src_end < src_addr)
7626		return KERN_INVALID_ADDRESS;
7627
7628	/*
7629	 * If the copy is sufficiently small, use a kernel buffer instead
7630	 * of making a virtual copy.  The theory being that the cost of
7631	 * setting up VM (and taking C-O-W faults) dominates the copy costs
7632	 * for small regions.
7633	 */
7634	if ((len < msg_ool_size_small) && !use_maxprot)
7635		return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
7636						   src_destroy, copy_result);
7637
7638	/*
7639	 *	Compute (page aligned) start and end of region
7640	 */
7641	src_start = vm_map_trunc_page(src_addr);
7642	src_end = vm_map_round_page(src_end);
7643
7644	XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
7645
7646	/*
7647	 *	Allocate a header element for the list.
7648	 *
7649	 *	Use the start and end in the header to
7650	 *	remember the endpoints prior to rounding.
7651	 */
7652
7653	copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7654	vm_map_copy_first_entry(copy) =
7655		vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
7656	copy->type = VM_MAP_COPY_ENTRY_LIST;
7657	copy->cpy_hdr.nentries = 0;
7658	copy->cpy_hdr.entries_pageable = TRUE;
7659
7660	vm_map_store_init( &(copy->cpy_hdr) );
7661
7662	copy->offset = src_addr;
7663	copy->size = len;
7664
7665	new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
7666
7667#define	RETURN(x)						\
7668	MACRO_BEGIN						\
7669	vm_map_unlock(src_map);					\
7670	if(src_map != base_map)					\
7671		vm_map_deallocate(src_map);			\
7672	if (new_entry != VM_MAP_ENTRY_NULL)			\
7673		vm_map_copy_entry_dispose(copy,new_entry);	\
7674	vm_map_copy_discard(copy);				\
7675	{							\
7676		submap_map_t	*_ptr;				\
7677								\
7678		for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
7679			parent_maps=parent_maps->next;		\
7680			if (_ptr->parent_map != base_map)	\
7681				vm_map_deallocate(_ptr->parent_map);	\
7682			kfree(_ptr, sizeof(submap_map_t));	\
7683		}						\
7684	}							\
7685	MACRO_RETURN(x);					\
7686	MACRO_END
7687
7688	/*
7689	 *	Find the beginning of the region.
7690	 */
7691
7692 	vm_map_lock(src_map);
7693
7694	if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
7695		RETURN(KERN_INVALID_ADDRESS);
7696	if(!tmp_entry->is_sub_map) {
7697		vm_map_clip_start(src_map, tmp_entry, src_start);
7698	}
7699	/* set for later submap fix-up */
7700	copy_addr = src_start;
7701
7702	/*
7703	 *	Go through entries until we get to the end.
7704	 */
7705
7706	while (TRUE) {
7707		register
7708		vm_map_entry_t	src_entry = tmp_entry;	/* Top-level entry */
7709		vm_map_size_t	src_size;		/* Size of source
7710							 * map entry (in both
7711							 * maps)
7712							 */
7713
7714		register
7715		vm_object_t		src_object;	/* Object to copy */
7716		vm_object_offset_t	src_offset;
7717
7718		boolean_t	src_needs_copy;		/* Should source map
7719							 * be made read-only
7720							 * for copy-on-write?
7721							 */
7722
7723		boolean_t	new_entry_needs_copy;	/* Will new entry be COW? */
7724
7725		boolean_t	was_wired;		/* Was source wired? */
7726		vm_map_version_t version;		/* Version before locks
7727							 * dropped to make copy
7728							 */
7729		kern_return_t	result;			/* Return value from
7730							 * copy_strategically.
7731							 */
7732		while(tmp_entry->is_sub_map) {
7733			vm_map_size_t submap_len;
7734			submap_map_t *ptr;
7735
7736			ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
7737			ptr->next = parent_maps;
7738			parent_maps = ptr;
7739			ptr->parent_map = src_map;
7740			ptr->base_start = src_start;
7741			ptr->base_end = src_end;
7742			submap_len = tmp_entry->vme_end - src_start;
7743			if(submap_len > (src_end-src_start))
7744				submap_len = src_end-src_start;
7745			ptr->base_len = submap_len;
7746
7747			src_start -= tmp_entry->vme_start;
7748			src_start += tmp_entry->offset;
7749			src_end = src_start + submap_len;
7750			src_map = tmp_entry->object.sub_map;
7751			vm_map_lock(src_map);
7752			/* keep an outstanding reference for all maps in */
7753			/* the parents tree except the base map */
7754			vm_map_reference(src_map);
7755			vm_map_unlock(ptr->parent_map);
7756			if (!vm_map_lookup_entry(
7757				    src_map, src_start, &tmp_entry))
7758				RETURN(KERN_INVALID_ADDRESS);
7759			map_share = TRUE;
7760			if(!tmp_entry->is_sub_map)
7761				vm_map_clip_start(src_map, tmp_entry, src_start);
7762			src_entry = tmp_entry;
7763		}
7764		/* we are now in the lowest level submap... */
7765
7766		if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
7767		    (tmp_entry->object.vm_object->phys_contiguous)) {
7768			/* This is not, supported for now.In future */
7769			/* we will need to detect the phys_contig   */
7770			/* condition and then upgrade copy_slowly   */
7771			/* to do physical copy from the device mem  */
7772			/* based object. We can piggy-back off of   */
7773			/* the was wired boolean to set-up the      */
7774			/* proper handling */
7775			RETURN(KERN_PROTECTION_FAILURE);
7776		}
7777		/*
7778		 *	Create a new address map entry to hold the result.
7779		 *	Fill in the fields from the appropriate source entries.
7780		 *	We must unlock the source map to do this if we need
7781		 *	to allocate a map entry.
7782		 */
7783		if (new_entry == VM_MAP_ENTRY_NULL) {
7784			version.main_timestamp = src_map->timestamp;
7785			vm_map_unlock(src_map);
7786
7787			new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
7788
7789			vm_map_lock(src_map);
7790			if ((version.main_timestamp + 1) != src_map->timestamp) {
7791				if (!vm_map_lookup_entry(src_map, src_start,
7792							 &tmp_entry)) {
7793					RETURN(KERN_INVALID_ADDRESS);
7794				}
7795				if (!tmp_entry->is_sub_map)
7796					vm_map_clip_start(src_map, tmp_entry, src_start);
7797				continue; /* restart w/ new tmp_entry */
7798			}
7799		}
7800
7801		/*
7802		 *	Verify that the region can be read.
7803		 */
7804		if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
7805		     !use_maxprot) ||
7806		    (src_entry->max_protection & VM_PROT_READ) == 0)
7807			RETURN(KERN_PROTECTION_FAILURE);
7808
7809		/*
7810		 *	Clip against the endpoints of the entire region.
7811		 */
7812
7813		vm_map_clip_end(src_map, src_entry, src_end);
7814
7815		src_size = src_entry->vme_end - src_start;
7816		src_object = src_entry->object.vm_object;
7817		src_offset = src_entry->offset;
7818		was_wired = (src_entry->wired_count != 0);
7819
7820		vm_map_entry_copy(new_entry, src_entry);
7821		new_entry->use_pmap = FALSE; /* clr address space specifics */
7822
7823		/*
7824		 *	Attempt non-blocking copy-on-write optimizations.
7825		 */
7826
7827		if (src_destroy &&
7828		    (src_object == VM_OBJECT_NULL ||
7829		     (src_object->internal && !src_object->true_share
7830		      && !map_share))) {
7831			/*
7832			 * If we are destroying the source, and the object
7833			 * is internal, we can move the object reference
7834			 * from the source to the copy.  The copy is
7835			 * copy-on-write only if the source is.
7836			 * We make another reference to the object, because
7837			 * destroying the source entry will deallocate it.
7838			 */
7839			vm_object_reference(src_object);
7840
7841			/*
7842			 * Copy is always unwired.  vm_map_copy_entry
7843			 * set its wired count to zero.
7844			 */
7845
7846			goto CopySuccessful;
7847		}
7848
7849
7850	RestartCopy:
7851		XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
7852		    src_object, new_entry, new_entry->object.vm_object,
7853		    was_wired, 0);
7854		if ((src_object == VM_OBJECT_NULL ||
7855		     (!was_wired && !map_share && !tmp_entry->is_shared)) &&
7856		    vm_object_copy_quickly(
7857			    &new_entry->object.vm_object,
7858			    src_offset,
7859			    src_size,
7860			    &src_needs_copy,
7861			    &new_entry_needs_copy)) {
7862
7863			new_entry->needs_copy = new_entry_needs_copy;
7864
7865			/*
7866			 *	Handle copy-on-write obligations
7867			 */
7868
7869			if (src_needs_copy && !tmp_entry->needs_copy) {
7870			        vm_prot_t prot;
7871
7872				prot = src_entry->protection & ~VM_PROT_WRITE;
7873
7874				if (override_nx(src_map, src_entry->alias) && prot)
7875				        prot |= VM_PROT_EXECUTE;
7876
7877				vm_object_pmap_protect(
7878					src_object,
7879					src_offset,
7880					src_size,
7881			      		(src_entry->is_shared ?
7882					 PMAP_NULL
7883					 : src_map->pmap),
7884					src_entry->vme_start,
7885					prot);
7886
7887				tmp_entry->needs_copy = TRUE;
7888			}
7889
7890			/*
7891			 *	The map has never been unlocked, so it's safe
7892			 *	to move to the next entry rather than doing
7893			 *	another lookup.
7894			 */
7895
7896			goto CopySuccessful;
7897		}
7898
7899		/*
7900		 *	Take an object reference, so that we may
7901		 *	release the map lock(s).
7902		 */
7903
7904		assert(src_object != VM_OBJECT_NULL);
7905		vm_object_reference(src_object);
7906
7907		/*
7908		 *	Record the timestamp for later verification.
7909		 *	Unlock the map.
7910		 */
7911
7912		version.main_timestamp = src_map->timestamp;
7913		vm_map_unlock(src_map);	/* Increments timestamp once! */
7914
7915		/*
7916		 *	Perform the copy
7917		 */
7918
7919		if (was_wired) {
7920		CopySlowly:
7921			vm_object_lock(src_object);
7922			result = vm_object_copy_slowly(
7923				src_object,
7924				src_offset,
7925				src_size,
7926				THREAD_UNINT,
7927				&new_entry->object.vm_object);
7928			new_entry->offset = 0;
7929			new_entry->needs_copy = FALSE;
7930
7931		}
7932		else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
7933			 (tmp_entry->is_shared  || map_share)) {
7934		  	vm_object_t new_object;
7935
7936			vm_object_lock_shared(src_object);
7937			new_object = vm_object_copy_delayed(
7938				src_object,
7939				src_offset,
7940				src_size,
7941				TRUE);
7942			if (new_object == VM_OBJECT_NULL)
7943			  	goto CopySlowly;
7944
7945			new_entry->object.vm_object = new_object;
7946			new_entry->needs_copy = TRUE;
7947			result = KERN_SUCCESS;
7948
7949		} else {
7950			result = vm_object_copy_strategically(src_object,
7951							      src_offset,
7952							      src_size,
7953							      &new_entry->object.vm_object,
7954							      &new_entry->offset,
7955							      &new_entry_needs_copy);
7956
7957			new_entry->needs_copy = new_entry_needs_copy;
7958		}
7959
7960		if (result != KERN_SUCCESS &&
7961		    result != KERN_MEMORY_RESTART_COPY) {
7962			vm_map_lock(src_map);
7963			RETURN(result);
7964		}
7965
7966		/*
7967		 *	Throw away the extra reference
7968		 */
7969
7970		vm_object_deallocate(src_object);
7971
7972		/*
7973		 *	Verify that the map has not substantially
7974		 *	changed while the copy was being made.
7975		 */
7976
7977		vm_map_lock(src_map);
7978
7979		if ((version.main_timestamp + 1) == src_map->timestamp)
7980			goto VerificationSuccessful;
7981
7982		/*
7983		 *	Simple version comparison failed.
7984		 *
7985		 *	Retry the lookup and verify that the
7986		 *	same object/offset are still present.
7987		 *
7988		 *	[Note: a memory manager that colludes with
7989		 *	the calling task can detect that we have
7990		 *	cheated.  While the map was unlocked, the
7991		 *	mapping could have been changed and restored.]
7992		 */
7993
7994		if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
7995			RETURN(KERN_INVALID_ADDRESS);
7996		}
7997
7998		src_entry = tmp_entry;
7999		vm_map_clip_start(src_map, src_entry, src_start);
8000
8001		if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
8002		     !use_maxprot) ||
8003		    ((src_entry->max_protection & VM_PROT_READ) == 0))
8004			goto VerificationFailed;
8005
8006		if (src_entry->vme_end < new_entry->vme_end)
8007			src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
8008
8009		if ((src_entry->object.vm_object != src_object) ||
8010		    (src_entry->offset != src_offset) ) {
8011
8012			/*
8013			 *	Verification failed.
8014			 *
8015			 *	Start over with this top-level entry.
8016			 */
8017
8018		VerificationFailed: ;
8019
8020			vm_object_deallocate(new_entry->object.vm_object);
8021			tmp_entry = src_entry;
8022			continue;
8023		}
8024
8025		/*
8026		 *	Verification succeeded.
8027		 */
8028
8029	VerificationSuccessful: ;
8030
8031		if (result == KERN_MEMORY_RESTART_COPY)
8032			goto RestartCopy;
8033
8034		/*
8035		 *	Copy succeeded.
8036		 */
8037
8038	CopySuccessful: ;
8039
8040		/*
8041		 *	Link in the new copy entry.
8042		 */
8043
8044		vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
8045				       new_entry);
8046
8047		/*
8048		 *	Determine whether the entire region
8049		 *	has been copied.
8050		 */
8051		src_base = src_start;
8052		src_start = new_entry->vme_end;
8053		new_entry = VM_MAP_ENTRY_NULL;
8054		while ((src_start >= src_end) && (src_end != 0)) {
8055			if (src_map != base_map) {
8056				submap_map_t	*ptr;
8057
8058				ptr = parent_maps;
8059				assert(ptr != NULL);
8060				parent_maps = parent_maps->next;
8061
8062				/* fix up the damage we did in that submap */
8063				vm_map_simplify_range(src_map,
8064						      src_base,
8065						      src_end);
8066
8067				vm_map_unlock(src_map);
8068				vm_map_deallocate(src_map);
8069				vm_map_lock(ptr->parent_map);
8070				src_map = ptr->parent_map;
8071				src_base = ptr->base_start;
8072				src_start = ptr->base_start + ptr->base_len;
8073				src_end = ptr->base_end;
8074				if ((src_end > src_start) &&
8075				    !vm_map_lookup_entry(
8076					    src_map, src_start, &tmp_entry))
8077					RETURN(KERN_INVALID_ADDRESS);
8078				kfree(ptr, sizeof(submap_map_t));
8079				if(parent_maps == NULL)
8080					map_share = FALSE;
8081				src_entry = tmp_entry->vme_prev;
8082			} else
8083				break;
8084		}
8085		if ((src_start >= src_end) && (src_end != 0))
8086			break;
8087
8088		/*
8089		 *	Verify that there are no gaps in the region
8090		 */
8091
8092		tmp_entry = src_entry->vme_next;
8093		if ((tmp_entry->vme_start != src_start) ||
8094		    (tmp_entry == vm_map_to_entry(src_map)))
8095			RETURN(KERN_INVALID_ADDRESS);
8096	}
8097
8098	/*
8099	 * If the source should be destroyed, do it now, since the
8100	 * copy was successful.
8101	 */
8102	if (src_destroy) {
8103		(void) vm_map_delete(src_map,
8104				     vm_map_trunc_page(src_addr),
8105				     src_end,
8106				     (src_map == kernel_map) ?
8107				     VM_MAP_REMOVE_KUNWIRE :
8108				     VM_MAP_NO_FLAGS,
8109				     VM_MAP_NULL);
8110	} else {
8111		/* fix up the damage we did in the base map */
8112		vm_map_simplify_range(src_map,
8113				      vm_map_trunc_page(src_addr),
8114				      vm_map_round_page(src_end));
8115	}
8116
8117	vm_map_unlock(src_map);
8118
8119	/* Fix-up start and end points in copy.  This is necessary */
8120	/* when the various entries in the copy object were picked */
8121	/* up from different sub-maps */
8122
8123	tmp_entry = vm_map_copy_first_entry(copy);
8124	while (tmp_entry != vm_map_copy_to_entry(copy)) {
8125		tmp_entry->vme_end = copy_addr +
8126			(tmp_entry->vme_end - tmp_entry->vme_start);
8127		tmp_entry->vme_start = copy_addr;
8128		assert(tmp_entry->vme_start < tmp_entry->vme_end);
8129		copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
8130		tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
8131	}
8132
8133	*copy_result = copy;
8134	return(KERN_SUCCESS);
8135
8136#undef	RETURN
8137}
8138
8139/*
8140 *	vm_map_copyin_object:
8141 *
8142 *	Create a copy object from an object.
8143 *	Our caller donates an object reference.
8144 */
8145
8146kern_return_t
8147vm_map_copyin_object(
8148	vm_object_t		object,
8149	vm_object_offset_t	offset,	/* offset of region in object */
8150	vm_object_size_t	size,	/* size of region in object */
8151	vm_map_copy_t	*copy_result)	/* OUT */
8152{
8153	vm_map_copy_t	copy;		/* Resulting copy */
8154
8155	/*
8156	 *	We drop the object into a special copy object
8157	 *	that contains the object directly.
8158	 */
8159
8160	copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8161	copy->type = VM_MAP_COPY_OBJECT;
8162	copy->cpy_object = object;
8163	copy->offset = offset;
8164	copy->size = size;
8165
8166	*copy_result = copy;
8167	return(KERN_SUCCESS);
8168}
8169
8170static void
8171vm_map_fork_share(
8172	vm_map_t	old_map,
8173	vm_map_entry_t	old_entry,
8174	vm_map_t	new_map)
8175{
8176	vm_object_t 	object;
8177	vm_map_entry_t 	new_entry;
8178
8179	/*
8180	 *	New sharing code.  New map entry
8181	 *	references original object.  Internal
8182	 *	objects use asynchronous copy algorithm for
8183	 *	future copies.  First make sure we have
8184	 *	the right object.  If we need a shadow,
8185	 *	or someone else already has one, then
8186	 *	make a new shadow and share it.
8187	 */
8188
8189	object = old_entry->object.vm_object;
8190	if (old_entry->is_sub_map) {
8191		assert(old_entry->wired_count == 0);
8192#ifndef NO_NESTED_PMAP
8193		if(old_entry->use_pmap) {
8194			kern_return_t	result;
8195
8196			result = pmap_nest(new_map->pmap,
8197					   (old_entry->object.sub_map)->pmap,
8198					   (addr64_t)old_entry->vme_start,
8199					   (addr64_t)old_entry->vme_start,
8200					   (uint64_t)(old_entry->vme_end - old_entry->vme_start));
8201			if(result)
8202				panic("vm_map_fork_share: pmap_nest failed!");
8203		}
8204#endif	/* NO_NESTED_PMAP */
8205	} else if (object == VM_OBJECT_NULL) {
8206		object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
8207							    old_entry->vme_start));
8208		old_entry->offset = 0;
8209		old_entry->object.vm_object = object;
8210		assert(!old_entry->needs_copy);
8211	} else if (object->copy_strategy !=
8212		   MEMORY_OBJECT_COPY_SYMMETRIC) {
8213
8214		/*
8215		 *	We are already using an asymmetric
8216		 *	copy, and therefore we already have
8217		 *	the right object.
8218		 */
8219
8220		assert(! old_entry->needs_copy);
8221	}
8222	else if (old_entry->needs_copy ||	/* case 1 */
8223		 object->shadowed ||		/* case 2 */
8224		 (!object->true_share && 	/* case 3 */
8225		  !old_entry->is_shared &&
8226		  (object->vo_size >
8227		   (vm_map_size_t)(old_entry->vme_end -
8228				   old_entry->vme_start)))) {
8229
8230		/*
8231		 *	We need to create a shadow.
8232		 *	There are three cases here.
8233		 *	In the first case, we need to
8234		 *	complete a deferred symmetrical
8235		 *	copy that we participated in.
8236		 *	In the second and third cases,
8237		 *	we need to create the shadow so
8238		 *	that changes that we make to the
8239		 *	object do not interfere with
8240		 *	any symmetrical copies which
8241		 *	have occured (case 2) or which
8242		 *	might occur (case 3).
8243		 *
8244		 *	The first case is when we had
8245		 *	deferred shadow object creation
8246		 *	via the entry->needs_copy mechanism.
8247		 *	This mechanism only works when
8248		 *	only one entry points to the source
8249		 *	object, and we are about to create
8250		 *	a second entry pointing to the
8251		 *	same object. The problem is that
8252		 *	there is no way of mapping from
8253		 *	an object to the entries pointing
8254		 *	to it. (Deferred shadow creation
8255		 *	works with one entry because occurs
8256		 *	at fault time, and we walk from the
8257		 *	entry to the object when handling
8258		 *	the fault.)
8259		 *
8260		 *	The second case is when the object
8261		 *	to be shared has already been copied
8262		 *	with a symmetric copy, but we point
8263		 *	directly to the object without
8264		 *	needs_copy set in our entry. (This
8265		 *	can happen because different ranges
8266		 *	of an object can be pointed to by
8267		 *	different entries. In particular,
8268		 *	a single entry pointing to an object
8269		 *	can be split by a call to vm_inherit,
8270		 *	which, combined with task_create, can
8271		 *	result in the different entries
8272		 *	having different needs_copy values.)
8273		 *	The shadowed flag in the object allows
8274		 *	us to detect this case. The problem
8275		 *	with this case is that if this object
8276		 *	has or will have shadows, then we
8277		 *	must not perform an asymmetric copy
8278		 *	of this object, since such a copy
8279		 *	allows the object to be changed, which
8280		 *	will break the previous symmetrical
8281		 *	copies (which rely upon the object
8282		 *	not changing). In a sense, the shadowed
8283		 *	flag says "don't change this object".
8284		 *	We fix this by creating a shadow
8285		 *	object for this object, and sharing
8286		 *	that. This works because we are free
8287		 *	to change the shadow object (and thus
8288		 *	to use an asymmetric copy strategy);
8289		 *	this is also semantically correct,
8290		 *	since this object is temporary, and
8291		 *	therefore a copy of the object is
8292		 *	as good as the object itself. (This
8293		 *	is not true for permanent objects,
8294		 *	since the pager needs to see changes,
8295		 *	which won't happen if the changes
8296		 *	are made to a copy.)
8297		 *
8298		 *	The third case is when the object
8299		 *	to be shared has parts sticking
8300		 *	outside of the entry we're working
8301		 *	with, and thus may in the future
8302		 *	be subject to a symmetrical copy.
8303		 *	(This is a preemptive version of
8304		 *	case 2.)
8305		 */
8306		vm_object_shadow(&old_entry->object.vm_object,
8307				 &old_entry->offset,
8308				 (vm_map_size_t) (old_entry->vme_end -
8309						  old_entry->vme_start));
8310
8311		/*
8312		 *	If we're making a shadow for other than
8313		 *	copy on write reasons, then we have
8314		 *	to remove write permission.
8315		 */
8316
8317		if (!old_entry->needs_copy &&
8318		    (old_entry->protection & VM_PROT_WRITE)) {
8319		        vm_prot_t prot;
8320
8321			prot = old_entry->protection & ~VM_PROT_WRITE;
8322
8323			if (override_nx(old_map, old_entry->alias) && prot)
8324			        prot |= VM_PROT_EXECUTE;
8325
8326			if (old_map->mapped_in_other_pmaps) {
8327				vm_object_pmap_protect(
8328					old_entry->object.vm_object,
8329					old_entry->offset,
8330					(old_entry->vme_end -
8331					 old_entry->vme_start),
8332					PMAP_NULL,
8333					old_entry->vme_start,
8334					prot);
8335			} else {
8336				pmap_protect(old_map->pmap,
8337					     old_entry->vme_start,
8338					     old_entry->vme_end,
8339					     prot);
8340			}
8341		}
8342
8343		old_entry->needs_copy = FALSE;
8344		object = old_entry->object.vm_object;
8345	}
8346
8347
8348	/*
8349	 *	If object was using a symmetric copy strategy,
8350	 *	change its copy strategy to the default
8351	 *	asymmetric copy strategy, which is copy_delay
8352	 *	in the non-norma case and copy_call in the
8353	 *	norma case. Bump the reference count for the
8354	 *	new entry.
8355	 */
8356
8357	if(old_entry->is_sub_map) {
8358		vm_map_lock(old_entry->object.sub_map);
8359		vm_map_reference(old_entry->object.sub_map);
8360		vm_map_unlock(old_entry->object.sub_map);
8361	} else {
8362		vm_object_lock(object);
8363		vm_object_reference_locked(object);
8364		if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
8365			object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8366		}
8367		vm_object_unlock(object);
8368	}
8369
8370	/*
8371	 *	Clone the entry, using object ref from above.
8372	 *	Mark both entries as shared.
8373	 */
8374
8375	new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
8376							  * map or descendants */
8377	vm_map_entry_copy(new_entry, old_entry);
8378	old_entry->is_shared = TRUE;
8379	new_entry->is_shared = TRUE;
8380
8381	/*
8382	 *	Insert the entry into the new map -- we
8383	 *	know we're inserting at the end of the new
8384	 *	map.
8385	 */
8386
8387	vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
8388
8389	/*
8390	 *	Update the physical map
8391	 */
8392
8393	if (old_entry->is_sub_map) {
8394		/* Bill Angell pmap support goes here */
8395	} else {
8396		pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
8397			  old_entry->vme_end - old_entry->vme_start,
8398			  old_entry->vme_start);
8399	}
8400}
8401
8402static boolean_t
8403vm_map_fork_copy(
8404	vm_map_t	old_map,
8405	vm_map_entry_t	*old_entry_p,
8406	vm_map_t	new_map)
8407{
8408	vm_map_entry_t old_entry = *old_entry_p;
8409	vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
8410	vm_map_offset_t start = old_entry->vme_start;
8411	vm_map_copy_t copy;
8412	vm_map_entry_t last = vm_map_last_entry(new_map);
8413
8414	vm_map_unlock(old_map);
8415	/*
8416	 *	Use maxprot version of copyin because we
8417	 *	care about whether this memory can ever
8418	 *	be accessed, not just whether it's accessible
8419	 *	right now.
8420	 */
8421	if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
8422	    != KERN_SUCCESS) {
8423		/*
8424		 *	The map might have changed while it
8425		 *	was unlocked, check it again.  Skip
8426		 *	any blank space or permanently
8427		 *	unreadable region.
8428		 */
8429		vm_map_lock(old_map);
8430		if (!vm_map_lookup_entry(old_map, start, &last) ||
8431		    (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
8432			last = last->vme_next;
8433		}
8434		*old_entry_p = last;
8435
8436		/*
8437		 * XXX	For some error returns, want to
8438		 * XXX	skip to the next element.  Note
8439		 *	that INVALID_ADDRESS and
8440		 *	PROTECTION_FAILURE are handled above.
8441		 */
8442
8443		return FALSE;
8444	}
8445
8446	/*
8447	 *	Insert the copy into the new map
8448	 */
8449
8450	vm_map_copy_insert(new_map, last, copy);
8451
8452	/*
8453	 *	Pick up the traversal at the end of
8454	 *	the copied region.
8455	 */
8456
8457	vm_map_lock(old_map);
8458	start += entry_size;
8459	if (! vm_map_lookup_entry(old_map, start, &last)) {
8460		last = last->vme_next;
8461	} else {
8462		if (last->vme_start == start) {
8463			/*
8464			 * No need to clip here and we don't
8465			 * want to cause any unnecessary
8466			 * unnesting...
8467			 */
8468		} else {
8469			vm_map_clip_start(old_map, last, start);
8470		}
8471	}
8472	*old_entry_p = last;
8473
8474	return TRUE;
8475}
8476
8477/*
8478 *	vm_map_fork:
8479 *
8480 *	Create and return a new map based on the old
8481 *	map, according to the inheritance values on the
8482 *	regions in that map.
8483 *
8484 *	The source map must not be locked.
8485 */
8486vm_map_t
8487vm_map_fork(
8488	ledger_t	ledger,
8489	vm_map_t	old_map)
8490{
8491	pmap_t		new_pmap;
8492	vm_map_t	new_map;
8493	vm_map_entry_t	old_entry;
8494	vm_map_size_t	new_size = 0, entry_size;
8495	vm_map_entry_t	new_entry;
8496	boolean_t	src_needs_copy;
8497	boolean_t	new_entry_needs_copy;
8498
8499	new_pmap = pmap_create(ledger, (vm_map_size_t) 0,
8500#if defined(__i386__) || defined(__x86_64__)
8501			       old_map->pmap->pm_task_map != TASK_MAP_32BIT
8502#elif defined(__arm__)
8503                    0
8504#else
8505#error Unknown architecture.
8506#endif
8507			       );
8508#if defined(__i386__)
8509	if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED)
8510		pmap_set_4GB_pagezero(new_pmap);
8511#endif
8512
8513	vm_map_reference_swap(old_map);
8514	vm_map_lock(old_map);
8515
8516	new_map = vm_map_create(new_pmap,
8517				old_map->min_offset,
8518				old_map->max_offset,
8519				old_map->hdr.entries_pageable);
8520	for (
8521		old_entry = vm_map_first_entry(old_map);
8522		old_entry != vm_map_to_entry(old_map);
8523		) {
8524
8525		entry_size = old_entry->vme_end - old_entry->vme_start;
8526
8527		switch (old_entry->inheritance) {
8528		case VM_INHERIT_NONE:
8529			break;
8530
8531		case VM_INHERIT_SHARE:
8532			vm_map_fork_share(old_map, old_entry, new_map);
8533			new_size += entry_size;
8534			break;
8535
8536		case VM_INHERIT_COPY:
8537
8538			/*
8539			 *	Inline the copy_quickly case;
8540			 *	upon failure, fall back on call
8541			 *	to vm_map_fork_copy.
8542			 */
8543
8544			if(old_entry->is_sub_map)
8545				break;
8546			if ((old_entry->wired_count != 0) ||
8547			    ((old_entry->object.vm_object != NULL) &&
8548			     (old_entry->object.vm_object->true_share))) {
8549				goto slow_vm_map_fork_copy;
8550			}
8551
8552			new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
8553			vm_map_entry_copy(new_entry, old_entry);
8554			/* clear address space specifics */
8555			new_entry->use_pmap = FALSE;
8556
8557			if (! vm_object_copy_quickly(
8558				    &new_entry->object.vm_object,
8559				    old_entry->offset,
8560				    (old_entry->vme_end -
8561				     old_entry->vme_start),
8562				    &src_needs_copy,
8563				    &new_entry_needs_copy)) {
8564				vm_map_entry_dispose(new_map, new_entry);
8565				goto slow_vm_map_fork_copy;
8566			}
8567
8568			/*
8569			 *	Handle copy-on-write obligations
8570			 */
8571
8572			if (src_needs_copy && !old_entry->needs_copy) {
8573			        vm_prot_t prot;
8574
8575				prot = old_entry->protection & ~VM_PROT_WRITE;
8576
8577				if (override_nx(old_map, old_entry->alias) && prot)
8578				        prot |= VM_PROT_EXECUTE;
8579
8580				vm_object_pmap_protect(
8581					old_entry->object.vm_object,
8582					old_entry->offset,
8583					(old_entry->vme_end -
8584					 old_entry->vme_start),
8585					((old_entry->is_shared
8586					  || old_map->mapped_in_other_pmaps)
8587					 ? PMAP_NULL :
8588					 old_map->pmap),
8589					old_entry->vme_start,
8590					prot);
8591
8592				old_entry->needs_copy = TRUE;
8593			}
8594			new_entry->needs_copy = new_entry_needs_copy;
8595
8596			/*
8597			 *	Insert the entry at the end
8598			 *	of the map.
8599			 */
8600
8601			vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
8602					  new_entry);
8603			new_size += entry_size;
8604			break;
8605
8606		slow_vm_map_fork_copy:
8607			if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
8608				new_size += entry_size;
8609			}
8610			continue;
8611		}
8612		old_entry = old_entry->vme_next;
8613	}
8614
8615	new_map->size = new_size;
8616	vm_map_unlock(old_map);
8617	vm_map_deallocate(old_map);
8618
8619	return(new_map);
8620}
8621
8622/*
8623 * vm_map_exec:
8624 *
8625 * 	Setup the "new_map" with the proper execution environment according
8626 *	to the type of executable (platform, 64bit, chroot environment).
8627 *	Map the comm page and shared region, etc...
8628 */
8629kern_return_t
8630vm_map_exec(
8631	vm_map_t	new_map,
8632	task_t		task,
8633	void		*fsroot,
8634	cpu_type_t	cpu)
8635{
8636	SHARED_REGION_TRACE_DEBUG(
8637		("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
8638		 current_task(), new_map, task, fsroot, cpu));
8639	(void) vm_commpage_enter(new_map, task);
8640	(void) vm_shared_region_enter(new_map, task, fsroot, cpu);
8641	SHARED_REGION_TRACE_DEBUG(
8642		("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
8643		 current_task(), new_map, task, fsroot, cpu));
8644	return KERN_SUCCESS;
8645}
8646
8647/*
8648 *	vm_map_lookup_locked:
8649 *
8650 *	Finds the VM object, offset, and
8651 *	protection for a given virtual address in the
8652 *	specified map, assuming a page fault of the
8653 *	type specified.
8654 *
8655 *	Returns the (object, offset, protection) for
8656 *	this address, whether it is wired down, and whether
8657 *	this map has the only reference to the data in question.
8658 *	In order to later verify this lookup, a "version"
8659 *	is returned.
8660 *
8661 *	The map MUST be locked by the caller and WILL be
8662 *	locked on exit.  In order to guarantee the
8663 *	existence of the returned object, it is returned
8664 *	locked.
8665 *
8666 *	If a lookup is requested with "write protection"
8667 *	specified, the map may be changed to perform virtual
8668 *	copying operations, although the data referenced will
8669 *	remain the same.
8670 */
8671kern_return_t
8672vm_map_lookup_locked(
8673	vm_map_t		*var_map,	/* IN/OUT */
8674	vm_map_offset_t		vaddr,
8675	vm_prot_t		fault_type,
8676	int			object_lock_type,
8677	vm_map_version_t	*out_version,	/* OUT */
8678	vm_object_t		*object,	/* OUT */
8679	vm_object_offset_t	*offset,	/* OUT */
8680	vm_prot_t		*out_prot,	/* OUT */
8681	boolean_t		*wired,		/* OUT */
8682	vm_object_fault_info_t	fault_info,	/* OUT */
8683	vm_map_t		*real_map)
8684{
8685	vm_map_entry_t			entry;
8686	register vm_map_t		map = *var_map;
8687	vm_map_t			old_map = *var_map;
8688	vm_map_t			cow_sub_map_parent = VM_MAP_NULL;
8689	vm_map_offset_t			cow_parent_vaddr = 0;
8690	vm_map_offset_t			old_start = 0;
8691	vm_map_offset_t			old_end = 0;
8692	register vm_prot_t		prot;
8693	boolean_t			mask_protections;
8694	vm_prot_t			original_fault_type;
8695
8696	/*
8697	 * VM_PROT_MASK means that the caller wants us to use "fault_type"
8698	 * as a mask against the mapping's actual protections, not as an
8699	 * absolute value.
8700	 */
8701	mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
8702	fault_type &= ~VM_PROT_IS_MASK;
8703	original_fault_type = fault_type;
8704
8705	*real_map = map;
8706
8707RetryLookup:
8708	fault_type = original_fault_type;
8709
8710	/*
8711	 *	If the map has an interesting hint, try it before calling
8712	 *	full blown lookup routine.
8713	 */
8714	entry = map->hint;
8715
8716	if ((entry == vm_map_to_entry(map)) ||
8717	    (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
8718		vm_map_entry_t	tmp_entry;
8719
8720		/*
8721		 *	Entry was either not a valid hint, or the vaddr
8722		 *	was not contained in the entry, so do a full lookup.
8723		 */
8724		if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
8725			if((cow_sub_map_parent) && (cow_sub_map_parent != map))
8726				vm_map_unlock(cow_sub_map_parent);
8727			if((*real_map != map)
8728			   && (*real_map != cow_sub_map_parent))
8729				vm_map_unlock(*real_map);
8730			return KERN_INVALID_ADDRESS;
8731		}
8732
8733		entry = tmp_entry;
8734	}
8735	if(map == old_map) {
8736		old_start = entry->vme_start;
8737		old_end = entry->vme_end;
8738	}
8739
8740	/*
8741	 *	Handle submaps.  Drop lock on upper map, submap is
8742	 *	returned locked.
8743	 */
8744
8745submap_recurse:
8746	if (entry->is_sub_map) {
8747		vm_map_offset_t		local_vaddr;
8748		vm_map_offset_t		end_delta;
8749		vm_map_offset_t		start_delta;
8750		vm_map_entry_t		submap_entry;
8751		boolean_t		mapped_needs_copy=FALSE;
8752
8753		local_vaddr = vaddr;
8754
8755		if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
8756			/* if real_map equals map we unlock below */
8757			if ((*real_map != map) &&
8758			    (*real_map != cow_sub_map_parent))
8759				vm_map_unlock(*real_map);
8760			*real_map = entry->object.sub_map;
8761		}
8762
8763		if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
8764			if (!mapped_needs_copy) {
8765				if (vm_map_lock_read_to_write(map)) {
8766					vm_map_lock_read(map);
8767					*real_map = map;
8768					goto RetryLookup;
8769				}
8770				vm_map_lock_read(entry->object.sub_map);
8771				*var_map = entry->object.sub_map;
8772				cow_sub_map_parent = map;
8773				/* reset base to map before cow object */
8774				/* this is the map which will accept   */
8775				/* the new cow object */
8776				old_start = entry->vme_start;
8777				old_end = entry->vme_end;
8778				cow_parent_vaddr = vaddr;
8779				mapped_needs_copy = TRUE;
8780			} else {
8781				vm_map_lock_read(entry->object.sub_map);
8782				*var_map = entry->object.sub_map;
8783				if((cow_sub_map_parent != map) &&
8784				   (*real_map != map))
8785					vm_map_unlock(map);
8786			}
8787		} else {
8788			vm_map_lock_read(entry->object.sub_map);
8789			*var_map = entry->object.sub_map;
8790			/* leave map locked if it is a target */
8791			/* cow sub_map above otherwise, just  */
8792			/* follow the maps down to the object */
8793			/* here we unlock knowing we are not  */
8794			/* revisiting the map.  */
8795			if((*real_map != map) && (map != cow_sub_map_parent))
8796				vm_map_unlock_read(map);
8797		}
8798
8799		map = *var_map;
8800
8801		/* calculate the offset in the submap for vaddr */
8802		local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
8803
8804	RetrySubMap:
8805		if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
8806			if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
8807				vm_map_unlock(cow_sub_map_parent);
8808			}
8809			if((*real_map != map)
8810			   && (*real_map != cow_sub_map_parent)) {
8811				vm_map_unlock(*real_map);
8812			}
8813			*real_map = map;
8814			return KERN_INVALID_ADDRESS;
8815		}
8816
8817		/* find the attenuated shadow of the underlying object */
8818		/* on our target map */
8819
8820		/* in english the submap object may extend beyond the     */
8821		/* region mapped by the entry or, may only fill a portion */
8822		/* of it.  For our purposes, we only care if the object   */
8823		/* doesn't fill.  In this case the area which will        */
8824		/* ultimately be clipped in the top map will only need    */
8825		/* to be as big as the portion of the underlying entry    */
8826		/* which is mapped */
8827		start_delta = submap_entry->vme_start > entry->offset ?
8828			submap_entry->vme_start - entry->offset : 0;
8829
8830		end_delta =
8831			(entry->offset + start_delta + (old_end - old_start)) <=
8832			submap_entry->vme_end ?
8833			0 : (entry->offset +
8834			     (old_end - old_start))
8835			- submap_entry->vme_end;
8836
8837		old_start += start_delta;
8838		old_end -= end_delta;
8839
8840		if(submap_entry->is_sub_map) {
8841			entry = submap_entry;
8842			vaddr = local_vaddr;
8843			goto submap_recurse;
8844		}
8845
8846		if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
8847
8848			vm_object_t	sub_object, copy_object;
8849			vm_object_offset_t copy_offset;
8850			vm_map_offset_t	local_start;
8851			vm_map_offset_t	local_end;
8852			boolean_t		copied_slowly = FALSE;
8853
8854			if (vm_map_lock_read_to_write(map)) {
8855				vm_map_lock_read(map);
8856				old_start -= start_delta;
8857				old_end += end_delta;
8858				goto RetrySubMap;
8859			}
8860
8861
8862			sub_object = submap_entry->object.vm_object;
8863			if (sub_object == VM_OBJECT_NULL) {
8864				sub_object =
8865					vm_object_allocate(
8866						(vm_map_size_t)
8867						(submap_entry->vme_end -
8868						 submap_entry->vme_start));
8869				submap_entry->object.vm_object = sub_object;
8870				submap_entry->offset = 0;
8871			}
8872			local_start =  local_vaddr -
8873				(cow_parent_vaddr - old_start);
8874			local_end = local_vaddr +
8875				(old_end - cow_parent_vaddr);
8876			vm_map_clip_start(map, submap_entry, local_start);
8877			vm_map_clip_end(map, submap_entry, local_end);
8878			/* unnesting was done in vm_map_clip_start/end() */
8879			assert(!submap_entry->use_pmap);
8880
8881			/* This is the COW case, lets connect */
8882			/* an entry in our space to the underlying */
8883			/* object in the submap, bypassing the  */
8884			/* submap. */
8885
8886
8887			if(submap_entry->wired_count != 0 ||
8888			   (sub_object->copy_strategy ==
8889			    MEMORY_OBJECT_COPY_NONE)) {
8890				vm_object_lock(sub_object);
8891				vm_object_copy_slowly(sub_object,
8892						      submap_entry->offset,
8893						      (submap_entry->vme_end -
8894						       submap_entry->vme_start),
8895						      FALSE,
8896						      &copy_object);
8897				copied_slowly = TRUE;
8898			} else {
8899
8900				/* set up shadow object */
8901				copy_object = sub_object;
8902				vm_object_reference(copy_object);
8903				sub_object->shadowed = TRUE;
8904				submap_entry->needs_copy = TRUE;
8905
8906				prot = submap_entry->protection & ~VM_PROT_WRITE;
8907
8908				if (override_nx(old_map, submap_entry->alias) && prot)
8909				        prot |= VM_PROT_EXECUTE;
8910
8911				vm_object_pmap_protect(
8912					sub_object,
8913					submap_entry->offset,
8914					submap_entry->vme_end -
8915					submap_entry->vme_start,
8916					(submap_entry->is_shared
8917					 || map->mapped_in_other_pmaps) ?
8918					PMAP_NULL : map->pmap,
8919					submap_entry->vme_start,
8920					prot);
8921			}
8922
8923			/*
8924			 * Adjust the fault offset to the submap entry.
8925			 */
8926			copy_offset = (local_vaddr -
8927				       submap_entry->vme_start +
8928				       submap_entry->offset);
8929
8930			/* This works diffently than the   */
8931			/* normal submap case. We go back  */
8932			/* to the parent of the cow map and*/
8933			/* clip out the target portion of  */
8934			/* the sub_map, substituting the   */
8935			/* new copy object,                */
8936
8937			vm_map_unlock(map);
8938			local_start = old_start;
8939			local_end = old_end;
8940			map = cow_sub_map_parent;
8941			*var_map = cow_sub_map_parent;
8942			vaddr = cow_parent_vaddr;
8943			cow_sub_map_parent = NULL;
8944
8945			if(!vm_map_lookup_entry(map,
8946						vaddr, &entry)) {
8947				vm_object_deallocate(
8948					copy_object);
8949				vm_map_lock_write_to_read(map);
8950				return KERN_INVALID_ADDRESS;
8951			}
8952
8953			/* clip out the portion of space */
8954			/* mapped by the sub map which   */
8955			/* corresponds to the underlying */
8956			/* object */
8957
8958			/*
8959			 * Clip (and unnest) the smallest nested chunk
8960			 * possible around the faulting address...
8961			 */
8962			local_start = vaddr & ~(pmap_nesting_size_min - 1);
8963			local_end = local_start + pmap_nesting_size_min;
8964			/*
8965			 * ... but don't go beyond the "old_start" to "old_end"
8966			 * range, to avoid spanning over another VM region
8967			 * with a possibly different VM object and/or offset.
8968			 */
8969			if (local_start < old_start) {
8970				local_start = old_start;
8971			}
8972			if (local_end > old_end) {
8973				local_end = old_end;
8974			}
8975			/*
8976			 * Adjust copy_offset to the start of the range.
8977			 */
8978			copy_offset -= (vaddr - local_start);
8979
8980			vm_map_clip_start(map, entry, local_start);
8981			vm_map_clip_end(map, entry, local_end);
8982			/* unnesting was done in vm_map_clip_start/end() */
8983			assert(!entry->use_pmap);
8984
8985			/* substitute copy object for */
8986			/* shared map entry           */
8987			vm_map_deallocate(entry->object.sub_map);
8988			entry->is_sub_map = FALSE;
8989			entry->object.vm_object = copy_object;
8990
8991			/* propagate the submap entry's protections */
8992			entry->protection |= submap_entry->protection;
8993			entry->max_protection |= submap_entry->max_protection;
8994
8995			if(copied_slowly) {
8996				entry->offset = local_start - old_start;
8997				entry->needs_copy = FALSE;
8998				entry->is_shared = FALSE;
8999			} else {
9000				entry->offset = copy_offset;
9001				entry->needs_copy = TRUE;
9002				if(entry->inheritance == VM_INHERIT_SHARE)
9003					entry->inheritance = VM_INHERIT_COPY;
9004				if (map != old_map)
9005					entry->is_shared = TRUE;
9006			}
9007			if(entry->inheritance == VM_INHERIT_SHARE)
9008				entry->inheritance = VM_INHERIT_COPY;
9009
9010			vm_map_lock_write_to_read(map);
9011		} else {
9012			if((cow_sub_map_parent)
9013			   && (cow_sub_map_parent != *real_map)
9014			   && (cow_sub_map_parent != map)) {
9015				vm_map_unlock(cow_sub_map_parent);
9016			}
9017			entry = submap_entry;
9018			vaddr = local_vaddr;
9019		}
9020	}
9021
9022	/*
9023	 *	Check whether this task is allowed to have
9024	 *	this page.
9025	 */
9026
9027	prot = entry->protection;
9028
9029	if (override_nx(old_map, entry->alias) && prot) {
9030	        /*
9031		 * HACK -- if not a stack, then allow execution
9032		 */
9033	        prot |= VM_PROT_EXECUTE;
9034	}
9035
9036	if (mask_protections) {
9037		fault_type &= prot;
9038		if (fault_type == VM_PROT_NONE) {
9039			goto protection_failure;
9040		}
9041	}
9042	if ((fault_type & (prot)) != fault_type) {
9043	protection_failure:
9044		if (*real_map != map) {
9045			vm_map_unlock(*real_map);
9046		}
9047		*real_map = map;
9048
9049		if ((fault_type & VM_PROT_EXECUTE) && prot)
9050		        log_stack_execution_failure((addr64_t)vaddr, prot);
9051
9052		DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
9053		return KERN_PROTECTION_FAILURE;
9054	}
9055
9056	/*
9057	 *	If this page is not pageable, we have to get
9058	 *	it for all possible accesses.
9059	 */
9060
9061	*wired = (entry->wired_count != 0);
9062	if (*wired)
9063	        fault_type = prot;
9064
9065	/*
9066	 *	If the entry was copy-on-write, we either ...
9067	 */
9068
9069	if (entry->needs_copy) {
9070	    	/*
9071		 *	If we want to write the page, we may as well
9072		 *	handle that now since we've got the map locked.
9073		 *
9074		 *	If we don't need to write the page, we just
9075		 *	demote the permissions allowed.
9076		 */
9077
9078		if ((fault_type & VM_PROT_WRITE) || *wired) {
9079			/*
9080			 *	Make a new object, and place it in the
9081			 *	object chain.  Note that no new references
9082			 *	have appeared -- one just moved from the
9083			 *	map to the new object.
9084			 */
9085
9086			if (vm_map_lock_read_to_write(map)) {
9087				vm_map_lock_read(map);
9088				goto RetryLookup;
9089			}
9090			vm_object_shadow(&entry->object.vm_object,
9091					 &entry->offset,
9092					 (vm_map_size_t) (entry->vme_end -
9093							  entry->vme_start));
9094
9095			entry->object.vm_object->shadowed = TRUE;
9096			entry->needs_copy = FALSE;
9097			vm_map_lock_write_to_read(map);
9098		}
9099		else {
9100			/*
9101			 *	We're attempting to read a copy-on-write
9102			 *	page -- don't allow writes.
9103			 */
9104
9105			prot &= (~VM_PROT_WRITE);
9106		}
9107	}
9108
9109	/*
9110	 *	Create an object if necessary.
9111	 */
9112	if (entry->object.vm_object == VM_OBJECT_NULL) {
9113
9114		if (vm_map_lock_read_to_write(map)) {
9115			vm_map_lock_read(map);
9116			goto RetryLookup;
9117		}
9118
9119		entry->object.vm_object = vm_object_allocate(
9120			(vm_map_size_t)(entry->vme_end - entry->vme_start));
9121		entry->offset = 0;
9122		vm_map_lock_write_to_read(map);
9123	}
9124
9125	/*
9126	 *	Return the object/offset from this entry.  If the entry
9127	 *	was copy-on-write or empty, it has been fixed up.  Also
9128	 *	return the protection.
9129	 */
9130
9131        *offset = (vaddr - entry->vme_start) + entry->offset;
9132        *object = entry->object.vm_object;
9133	*out_prot = prot;
9134
9135	if (fault_info) {
9136		fault_info->interruptible = THREAD_UNINT; /* for now... */
9137		/* ... the caller will change "interruptible" if needed */
9138	        fault_info->cluster_size = 0;
9139		fault_info->user_tag = entry->alias;
9140	        fault_info->behavior = entry->behavior;
9141		fault_info->lo_offset = entry->offset;
9142		fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
9143		fault_info->no_cache  = entry->no_cache;
9144		fault_info->stealth = FALSE;
9145		fault_info->io_sync = FALSE;
9146		fault_info->cs_bypass = (entry->used_for_jit)? TRUE : FALSE;
9147		fault_info->mark_zf_absent = FALSE;
9148		fault_info->batch_pmap_op = FALSE;
9149	}
9150
9151	/*
9152	 *	Lock the object to prevent it from disappearing
9153	 */
9154	if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
9155	        vm_object_lock(*object);
9156	else
9157	        vm_object_lock_shared(*object);
9158
9159	/*
9160	 *	Save the version number
9161	 */
9162
9163	out_version->main_timestamp = map->timestamp;
9164
9165	return KERN_SUCCESS;
9166}
9167
9168
9169/*
9170 *	vm_map_verify:
9171 *
9172 *	Verifies that the map in question has not changed
9173 *	since the given version.  If successful, the map
9174 *	will not change until vm_map_verify_done() is called.
9175 */
9176boolean_t
9177vm_map_verify(
9178	register vm_map_t		map,
9179	register vm_map_version_t	*version)	/* REF */
9180{
9181	boolean_t	result;
9182
9183	vm_map_lock_read(map);
9184	result = (map->timestamp == version->main_timestamp);
9185
9186	if (!result)
9187		vm_map_unlock_read(map);
9188
9189	return(result);
9190}
9191
9192/*
9193 *	vm_map_verify_done:
9194 *
9195 *	Releases locks acquired by a vm_map_verify.
9196 *
9197 *	This is now a macro in vm/vm_map.h.  It does a
9198 *	vm_map_unlock_read on the map.
9199 */
9200
9201
9202/*
9203 *	TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
9204 *	Goes away after regular vm_region_recurse function migrates to
9205 *	64 bits
9206 *	vm_region_recurse: A form of vm_region which follows the
9207 *	submaps in a target map
9208 *
9209 */
9210
9211kern_return_t
9212vm_map_region_recurse_64(
9213	vm_map_t		 map,
9214	vm_map_offset_t	*address,		/* IN/OUT */
9215	vm_map_size_t		*size,			/* OUT */
9216	natural_t	 	*nesting_depth,	/* IN/OUT */
9217	vm_region_submap_info_64_t	submap_info,	/* IN/OUT */
9218	mach_msg_type_number_t	*count)	/* IN/OUT */
9219{
9220	vm_region_extended_info_data_t	extended;
9221	vm_map_entry_t			tmp_entry;
9222	vm_map_offset_t			user_address;
9223	unsigned int			user_max_depth;
9224
9225	/*
9226	 * "curr_entry" is the VM map entry preceding or including the
9227	 * address we're looking for.
9228	 * "curr_map" is the map or sub-map containing "curr_entry".
9229	 * "curr_address" is the equivalent of the top map's "user_address"
9230	 * in the current map.
9231	 * "curr_offset" is the cumulated offset of "curr_map" in the
9232	 * target task's address space.
9233	 * "curr_depth" is the depth of "curr_map" in the chain of
9234	 * sub-maps.
9235	 *
9236	 * "curr_max_below" and "curr_max_above" limit the range (around
9237	 * "curr_address") we should take into account in the current (sub)map.
9238	 * They limit the range to what's visible through the map entries
9239	 * we've traversed from the top map to the current map.
9240
9241	 */
9242	vm_map_entry_t			curr_entry;
9243	vm_map_address_t		curr_address;
9244	vm_map_offset_t			curr_offset;
9245	vm_map_t			curr_map;
9246	unsigned int			curr_depth;
9247	vm_map_offset_t			curr_max_below, curr_max_above;
9248	vm_map_offset_t			curr_skip;
9249
9250	/*
9251	 * "next_" is the same as "curr_" but for the VM region immediately
9252	 * after the address we're looking for.  We need to keep track of this
9253	 * too because we want to return info about that region if the
9254	 * address we're looking for is not mapped.
9255	 */
9256	vm_map_entry_t			next_entry;
9257	vm_map_offset_t			next_offset;
9258	vm_map_offset_t			next_address;
9259	vm_map_t			next_map;
9260	unsigned int			next_depth;
9261	vm_map_offset_t			next_max_below, next_max_above;
9262	vm_map_offset_t			next_skip;
9263
9264	boolean_t			look_for_pages;
9265	vm_region_submap_short_info_64_t short_info;
9266
9267	if (map == VM_MAP_NULL) {
9268		/* no address space to work on */
9269		return KERN_INVALID_ARGUMENT;
9270	}
9271
9272	if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) {
9273		if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
9274			/*
9275			 * "info" structure is not big enough and
9276			 * would overflow
9277			 */
9278			return KERN_INVALID_ARGUMENT;
9279		} else {
9280			look_for_pages = FALSE;
9281			*count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
9282			short_info = (vm_region_submap_short_info_64_t) submap_info;
9283			submap_info = NULL;
9284		}
9285	} else {
9286		look_for_pages = TRUE;
9287		*count = VM_REGION_SUBMAP_INFO_COUNT_64;
9288		short_info = NULL;
9289	}
9290
9291
9292	user_address = *address;
9293	user_max_depth = *nesting_depth;
9294
9295	curr_entry = NULL;
9296	curr_map = map;
9297	curr_address = user_address;
9298	curr_offset = 0;
9299	curr_skip = 0;
9300	curr_depth = 0;
9301	curr_max_above = ((vm_map_offset_t) -1) - curr_address;
9302	curr_max_below = curr_address;
9303
9304	next_entry = NULL;
9305	next_map = NULL;
9306	next_address = 0;
9307	next_offset = 0;
9308	next_skip = 0;
9309	next_depth = 0;
9310	next_max_above = (vm_map_offset_t) -1;
9311	next_max_below = (vm_map_offset_t) -1;
9312
9313	if (not_in_kdp) {
9314		vm_map_lock_read(curr_map);
9315	}
9316
9317	for (;;) {
9318		if (vm_map_lookup_entry(curr_map,
9319					curr_address,
9320					&tmp_entry)) {
9321			/* tmp_entry contains the address we're looking for */
9322			curr_entry = tmp_entry;
9323		} else {
9324			vm_map_offset_t skip;
9325			/*
9326			 * The address is not mapped.  "tmp_entry" is the
9327			 * map entry preceding the address.  We want the next
9328			 * one, if it exists.
9329			 */
9330			curr_entry = tmp_entry->vme_next;
9331
9332			if (curr_entry == vm_map_to_entry(curr_map) ||
9333			    (curr_entry->vme_start >=
9334			     curr_address + curr_max_above)) {
9335				/* no next entry at this level: stop looking */
9336				if (not_in_kdp) {
9337					vm_map_unlock_read(curr_map);
9338				}
9339				curr_entry = NULL;
9340				curr_map = NULL;
9341				curr_offset = 0;
9342				curr_depth = 0;
9343				curr_max_above = 0;
9344				curr_max_below = 0;
9345				break;
9346			}
9347
9348			/* adjust current address and offset */
9349			skip = curr_entry->vme_start - curr_address;
9350			curr_address = curr_entry->vme_start;
9351			curr_skip = skip;
9352			curr_offset += skip;
9353			curr_max_above -= skip;
9354			curr_max_below = 0;
9355		}
9356
9357		/*
9358		 * Is the next entry at this level closer to the address (or
9359		 * deeper in the submap chain) than the one we had
9360		 * so far ?
9361		 */
9362		tmp_entry = curr_entry->vme_next;
9363		if (tmp_entry == vm_map_to_entry(curr_map)) {
9364			/* no next entry at this level */
9365		} else if (tmp_entry->vme_start >=
9366			   curr_address + curr_max_above) {
9367			/*
9368			 * tmp_entry is beyond the scope of what we mapped of
9369			 * this submap in the upper level: ignore it.
9370			 */
9371		} else if ((next_entry == NULL) ||
9372			   (tmp_entry->vme_start + curr_offset <=
9373			    next_entry->vme_start + next_offset)) {
9374			/*
9375			 * We didn't have a "next_entry" or this one is
9376			 * closer to the address we're looking for:
9377			 * use this "tmp_entry" as the new "next_entry".
9378			 */
9379			if (next_entry != NULL) {
9380				/* unlock the last "next_map" */
9381				if (next_map != curr_map && not_in_kdp) {
9382					vm_map_unlock_read(next_map);
9383				}
9384			}
9385			next_entry = tmp_entry;
9386			next_map = curr_map;
9387			next_depth = curr_depth;
9388			next_address = next_entry->vme_start;
9389			next_skip = curr_skip;
9390			next_offset = curr_offset;
9391			next_offset += (next_address - curr_address);
9392			next_max_above = MIN(next_max_above, curr_max_above);
9393			next_max_above = MIN(next_max_above,
9394					     next_entry->vme_end - next_address);
9395			next_max_below = MIN(next_max_below, curr_max_below);
9396			next_max_below = MIN(next_max_below,
9397					     next_address - next_entry->vme_start);
9398		}
9399
9400		/*
9401		 * "curr_max_{above,below}" allow us to keep track of the
9402		 * portion of the submap that is actually mapped at this level:
9403		 * the rest of that submap is irrelevant to us, since it's not
9404		 * mapped here.
9405		 * The relevant portion of the map starts at
9406		 * "curr_entry->offset" up to the size of "curr_entry".
9407		 */
9408		curr_max_above = MIN(curr_max_above,
9409				     curr_entry->vme_end - curr_address);
9410		curr_max_below = MIN(curr_max_below,
9411				     curr_address - curr_entry->vme_start);
9412
9413		if (!curr_entry->is_sub_map ||
9414		    curr_depth >= user_max_depth) {
9415			/*
9416			 * We hit a leaf map or we reached the maximum depth
9417			 * we could, so stop looking.  Keep the current map
9418			 * locked.
9419			 */
9420			break;
9421		}
9422
9423		/*
9424		 * Get down to the next submap level.
9425		 */
9426
9427		/*
9428		 * Lock the next level and unlock the current level,
9429		 * unless we need to keep it locked to access the "next_entry"
9430		 * later.
9431		 */
9432		if (not_in_kdp) {
9433			vm_map_lock_read(curr_entry->object.sub_map);
9434		}
9435		if (curr_map == next_map) {
9436			/* keep "next_map" locked in case we need it */
9437		} else {
9438			/* release this map */
9439			if (not_in_kdp)
9440				vm_map_unlock_read(curr_map);
9441		}
9442
9443		/*
9444		 * Adjust the offset.  "curr_entry" maps the submap
9445		 * at relative address "curr_entry->vme_start" in the
9446		 * curr_map but skips the first "curr_entry->offset"
9447		 * bytes of the submap.
9448		 * "curr_offset" always represents the offset of a virtual
9449		 * address in the curr_map relative to the absolute address
9450		 * space (i.e. the top-level VM map).
9451		 */
9452		curr_offset +=
9453			(curr_entry->offset - curr_entry->vme_start);
9454		curr_address = user_address + curr_offset;
9455		/* switch to the submap */
9456		curr_map = curr_entry->object.sub_map;
9457		curr_depth++;
9458		curr_entry = NULL;
9459	}
9460
9461	if (curr_entry == NULL) {
9462		/* no VM region contains the address... */
9463		if (next_entry == NULL) {
9464			/* ... and no VM region follows it either */
9465			return KERN_INVALID_ADDRESS;
9466		}
9467		/* ... gather info about the next VM region */
9468		curr_entry = next_entry;
9469		curr_map = next_map;	/* still locked ... */
9470		curr_address = next_address;
9471		curr_skip = next_skip;
9472		curr_offset = next_offset;
9473		curr_depth = next_depth;
9474		curr_max_above = next_max_above;
9475		curr_max_below = next_max_below;
9476		if (curr_map == map) {
9477			user_address = curr_address;
9478		}
9479	} else {
9480		/* we won't need "next_entry" after all */
9481		if (next_entry != NULL) {
9482			/* release "next_map" */
9483			if (next_map != curr_map && not_in_kdp) {
9484				vm_map_unlock_read(next_map);
9485			}
9486		}
9487	}
9488	next_entry = NULL;
9489	next_map = NULL;
9490	next_offset = 0;
9491	next_skip = 0;
9492	next_depth = 0;
9493	next_max_below = -1;
9494	next_max_above = -1;
9495
9496	*nesting_depth = curr_depth;
9497	*size = curr_max_above + curr_max_below;
9498	*address = user_address + curr_skip - curr_max_below;
9499
9500// LP64todo: all the current tools are 32bit, obviously never worked for 64b
9501// so probably should be a real 32b ID vs. ptr.
9502// Current users just check for equality
9503#define INFO_MAKE_OBJECT_ID(p)	((uint32_t)(uintptr_t)p)
9504
9505	if (look_for_pages) {
9506		submap_info->user_tag = curr_entry->alias;
9507		submap_info->offset = curr_entry->offset;
9508		submap_info->protection = curr_entry->protection;
9509		submap_info->inheritance = curr_entry->inheritance;
9510		submap_info->max_protection = curr_entry->max_protection;
9511		submap_info->behavior = curr_entry->behavior;
9512		submap_info->user_wired_count = curr_entry->user_wired_count;
9513		submap_info->is_submap = curr_entry->is_sub_map;
9514		submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9515	} else {
9516		short_info->user_tag = curr_entry->alias;
9517		short_info->offset = curr_entry->offset;
9518		short_info->protection = curr_entry->protection;
9519		short_info->inheritance = curr_entry->inheritance;
9520		short_info->max_protection = curr_entry->max_protection;
9521		short_info->behavior = curr_entry->behavior;
9522		short_info->user_wired_count = curr_entry->user_wired_count;
9523		short_info->is_submap = curr_entry->is_sub_map;
9524		short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9525	}
9526
9527	extended.pages_resident = 0;
9528	extended.pages_swapped_out = 0;
9529	extended.pages_shared_now_private = 0;
9530	extended.pages_dirtied = 0;
9531	extended.external_pager = 0;
9532	extended.shadow_depth = 0;
9533
9534	if (not_in_kdp) {
9535		if (!curr_entry->is_sub_map) {
9536			vm_map_offset_t range_start, range_end;
9537			range_start = MAX((curr_address - curr_max_below),
9538					  curr_entry->vme_start);
9539			range_end = MIN((curr_address + curr_max_above),
9540					curr_entry->vme_end);
9541			vm_map_region_walk(curr_map,
9542					   range_start,
9543					   curr_entry,
9544					   (curr_entry->offset +
9545					    (range_start -
9546					     curr_entry->vme_start)),
9547					   range_end - range_start,
9548					   &extended,
9549					   look_for_pages);
9550			if (extended.external_pager &&
9551			    extended.ref_count == 2 &&
9552			    extended.share_mode == SM_SHARED) {
9553				extended.share_mode = SM_PRIVATE;
9554			}
9555		} else {
9556			if (curr_entry->use_pmap) {
9557				extended.share_mode = SM_TRUESHARED;
9558			} else {
9559				extended.share_mode = SM_PRIVATE;
9560			}
9561			extended.ref_count =
9562				curr_entry->object.sub_map->ref_count;
9563		}
9564	}
9565
9566	if (look_for_pages) {
9567		submap_info->pages_resident = extended.pages_resident;
9568		submap_info->pages_swapped_out = extended.pages_swapped_out;
9569		submap_info->pages_shared_now_private =
9570			extended.pages_shared_now_private;
9571		submap_info->pages_dirtied = extended.pages_dirtied;
9572		submap_info->external_pager = extended.external_pager;
9573		submap_info->shadow_depth = extended.shadow_depth;
9574		submap_info->share_mode = extended.share_mode;
9575		submap_info->ref_count = extended.ref_count;
9576	} else {
9577		short_info->external_pager = extended.external_pager;
9578		short_info->shadow_depth = extended.shadow_depth;
9579		short_info->share_mode = extended.share_mode;
9580		short_info->ref_count = extended.ref_count;
9581	}
9582
9583	if (not_in_kdp) {
9584		vm_map_unlock_read(curr_map);
9585	}
9586
9587	return KERN_SUCCESS;
9588}
9589
9590/*
9591 *	vm_region:
9592 *
9593 *	User call to obtain information about a region in
9594 *	a task's address map. Currently, only one flavor is
9595 *	supported.
9596 *
9597 *	XXX The reserved and behavior fields cannot be filled
9598 *	    in until the vm merge from the IK is completed, and
9599 *	    vm_reserve is implemented.
9600 */
9601
9602kern_return_t
9603vm_map_region(
9604	vm_map_t		 map,
9605	vm_map_offset_t	*address,		/* IN/OUT */
9606	vm_map_size_t		*size,			/* OUT */
9607	vm_region_flavor_t	 flavor,		/* IN */
9608	vm_region_info_t	 info,			/* OUT */
9609	mach_msg_type_number_t	*count,	/* IN/OUT */
9610	mach_port_t		*object_name)		/* OUT */
9611{
9612	vm_map_entry_t		tmp_entry;
9613	vm_map_entry_t		entry;
9614	vm_map_offset_t		start;
9615
9616	if (map == VM_MAP_NULL)
9617		return(KERN_INVALID_ARGUMENT);
9618
9619	switch (flavor) {
9620
9621	case VM_REGION_BASIC_INFO:
9622		/* legacy for old 32-bit objects info */
9623	{
9624		vm_region_basic_info_t	basic;
9625
9626		if (*count < VM_REGION_BASIC_INFO_COUNT)
9627			return(KERN_INVALID_ARGUMENT);
9628
9629		basic = (vm_region_basic_info_t) info;
9630		*count = VM_REGION_BASIC_INFO_COUNT;
9631
9632		vm_map_lock_read(map);
9633
9634		start = *address;
9635		if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9636			if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9637				vm_map_unlock_read(map);
9638				return(KERN_INVALID_ADDRESS);
9639			}
9640		} else {
9641			entry = tmp_entry;
9642		}
9643
9644		start = entry->vme_start;
9645
9646		basic->offset = (uint32_t)entry->offset;
9647		basic->protection = entry->protection;
9648		basic->inheritance = entry->inheritance;
9649		basic->max_protection = entry->max_protection;
9650		basic->behavior = entry->behavior;
9651		basic->user_wired_count = entry->user_wired_count;
9652		basic->reserved = entry->is_sub_map;
9653		*address = start;
9654		*size = (entry->vme_end - start);
9655
9656		if (object_name) *object_name = IP_NULL;
9657		if (entry->is_sub_map) {
9658			basic->shared = FALSE;
9659		} else {
9660			basic->shared = entry->is_shared;
9661		}
9662
9663		vm_map_unlock_read(map);
9664		return(KERN_SUCCESS);
9665	}
9666
9667	case VM_REGION_BASIC_INFO_64:
9668	{
9669		vm_region_basic_info_64_t	basic;
9670
9671		if (*count < VM_REGION_BASIC_INFO_COUNT_64)
9672			return(KERN_INVALID_ARGUMENT);
9673
9674		basic = (vm_region_basic_info_64_t) info;
9675		*count = VM_REGION_BASIC_INFO_COUNT_64;
9676
9677		vm_map_lock_read(map);
9678
9679		start = *address;
9680		if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9681			if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9682				vm_map_unlock_read(map);
9683				return(KERN_INVALID_ADDRESS);
9684			}
9685		} else {
9686			entry = tmp_entry;
9687		}
9688
9689		start = entry->vme_start;
9690
9691		basic->offset = entry->offset;
9692		basic->protection = entry->protection;
9693		basic->inheritance = entry->inheritance;
9694		basic->max_protection = entry->max_protection;
9695		basic->behavior = entry->behavior;
9696		basic->user_wired_count = entry->user_wired_count;
9697		basic->reserved = entry->is_sub_map;
9698		*address = start;
9699		*size = (entry->vme_end - start);
9700
9701		if (object_name) *object_name = IP_NULL;
9702		if (entry->is_sub_map) {
9703			basic->shared = FALSE;
9704		} else {
9705			basic->shared = entry->is_shared;
9706		}
9707
9708		vm_map_unlock_read(map);
9709		return(KERN_SUCCESS);
9710	}
9711	case VM_REGION_EXTENDED_INFO:
9712	{
9713		vm_region_extended_info_t	extended;
9714
9715		if (*count < VM_REGION_EXTENDED_INFO_COUNT)
9716			return(KERN_INVALID_ARGUMENT);
9717
9718		extended = (vm_region_extended_info_t) info;
9719		*count = VM_REGION_EXTENDED_INFO_COUNT;
9720
9721		vm_map_lock_read(map);
9722
9723		start = *address;
9724		if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9725			if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9726				vm_map_unlock_read(map);
9727				return(KERN_INVALID_ADDRESS);
9728			}
9729		} else {
9730			entry = tmp_entry;
9731		}
9732		start = entry->vme_start;
9733
9734		extended->protection = entry->protection;
9735		extended->user_tag = entry->alias;
9736		extended->pages_resident = 0;
9737		extended->pages_swapped_out = 0;
9738		extended->pages_shared_now_private = 0;
9739		extended->pages_dirtied = 0;
9740		extended->external_pager = 0;
9741		extended->shadow_depth = 0;
9742
9743		vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE);
9744
9745		if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
9746			extended->share_mode = SM_PRIVATE;
9747
9748		if (object_name)
9749			*object_name = IP_NULL;
9750		*address = start;
9751		*size = (entry->vme_end - start);
9752
9753		vm_map_unlock_read(map);
9754		return(KERN_SUCCESS);
9755	}
9756	case VM_REGION_TOP_INFO:
9757	{
9758		vm_region_top_info_t	top;
9759
9760		if (*count < VM_REGION_TOP_INFO_COUNT)
9761			return(KERN_INVALID_ARGUMENT);
9762
9763		top = (vm_region_top_info_t) info;
9764		*count = VM_REGION_TOP_INFO_COUNT;
9765
9766		vm_map_lock_read(map);
9767
9768		start = *address;
9769		if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9770			if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9771				vm_map_unlock_read(map);
9772				return(KERN_INVALID_ADDRESS);
9773			}
9774		} else {
9775			entry = tmp_entry;
9776
9777		}
9778		start = entry->vme_start;
9779
9780		top->private_pages_resident = 0;
9781		top->shared_pages_resident = 0;
9782
9783		vm_map_region_top_walk(entry, top);
9784
9785		if (object_name)
9786			*object_name = IP_NULL;
9787		*address = start;
9788		*size = (entry->vme_end - start);
9789
9790		vm_map_unlock_read(map);
9791		return(KERN_SUCCESS);
9792	}
9793	default:
9794		return(KERN_INVALID_ARGUMENT);
9795	}
9796}
9797
9798#define OBJ_RESIDENT_COUNT(obj, entry_size)				\
9799	MIN((entry_size),						\
9800	    ((obj)->all_reusable ?					\
9801	     (obj)->wired_page_count :					\
9802	     (obj)->resident_page_count - (obj)->reusable_page_count))
9803
9804void
9805vm_map_region_top_walk(
9806        vm_map_entry_t		   entry,
9807	vm_region_top_info_t       top)
9808{
9809
9810	if (entry->object.vm_object == 0 || entry->is_sub_map) {
9811		top->share_mode = SM_EMPTY;
9812		top->ref_count = 0;
9813		top->obj_id = 0;
9814		return;
9815	}
9816
9817	{
9818	        struct	vm_object *obj, *tmp_obj;
9819		int		ref_count;
9820		uint32_t	entry_size;
9821
9822		entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
9823
9824		obj = entry->object.vm_object;
9825
9826		vm_object_lock(obj);
9827
9828		if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9829			ref_count--;
9830
9831		assert(obj->reusable_page_count <= obj->resident_page_count);
9832		if (obj->shadow) {
9833			if (ref_count == 1)
9834				top->private_pages_resident =
9835					OBJ_RESIDENT_COUNT(obj, entry_size);
9836			else
9837				top->shared_pages_resident =
9838					OBJ_RESIDENT_COUNT(obj, entry_size);
9839			top->ref_count  = ref_count;
9840			top->share_mode = SM_COW;
9841
9842			while ((tmp_obj = obj->shadow)) {
9843				vm_object_lock(tmp_obj);
9844				vm_object_unlock(obj);
9845				obj = tmp_obj;
9846
9847				if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9848					ref_count--;
9849
9850				assert(obj->reusable_page_count <= obj->resident_page_count);
9851				top->shared_pages_resident +=
9852					OBJ_RESIDENT_COUNT(obj, entry_size);
9853				top->ref_count += ref_count - 1;
9854			}
9855		} else {
9856			if (entry->superpage_size) {
9857				top->share_mode = SM_LARGE_PAGE;
9858				top->shared_pages_resident = 0;
9859				top->private_pages_resident = entry_size;
9860			} else if (entry->needs_copy) {
9861				top->share_mode = SM_COW;
9862				top->shared_pages_resident =
9863					OBJ_RESIDENT_COUNT(obj, entry_size);
9864			} else {
9865				if (ref_count == 1 ||
9866				    (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
9867					top->share_mode = SM_PRIVATE;
9868					top->private_pages_resident =
9869						OBJ_RESIDENT_COUNT(obj,
9870								   entry_size);
9871				} else {
9872					top->share_mode = SM_SHARED;
9873					top->shared_pages_resident =
9874						OBJ_RESIDENT_COUNT(obj,
9875								  entry_size);
9876				}
9877			}
9878			top->ref_count = ref_count;
9879		}
9880		/* XXX K64: obj_id will be truncated */
9881		top->obj_id = (unsigned int) (uintptr_t)obj;
9882
9883		vm_object_unlock(obj);
9884	}
9885}
9886
9887void
9888vm_map_region_walk(
9889	vm_map_t		   	map,
9890	vm_map_offset_t			va,
9891	vm_map_entry_t			entry,
9892	vm_object_offset_t		offset,
9893	vm_object_size_t		range,
9894	vm_region_extended_info_t	extended,
9895	boolean_t			look_for_pages)
9896{
9897        register struct vm_object *obj, *tmp_obj;
9898	register vm_map_offset_t       last_offset;
9899	register int               i;
9900	register int               ref_count;
9901	struct vm_object	*shadow_object;
9902	int			shadow_depth;
9903
9904	if ((entry->object.vm_object == 0) ||
9905	    (entry->is_sub_map) ||
9906	    (entry->object.vm_object->phys_contiguous &&
9907	     !entry->superpage_size)) {
9908		extended->share_mode = SM_EMPTY;
9909		extended->ref_count = 0;
9910		return;
9911	}
9912
9913	if (entry->superpage_size) {
9914		extended->shadow_depth = 0;
9915		extended->share_mode = SM_LARGE_PAGE;
9916		extended->ref_count = 1;
9917		extended->external_pager = 0;
9918		extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
9919		extended->shadow_depth = 0;
9920		return;
9921	}
9922
9923	{
9924		obj = entry->object.vm_object;
9925
9926		vm_object_lock(obj);
9927
9928		if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9929			ref_count--;
9930
9931		if (look_for_pages) {
9932			for (last_offset = offset + range;
9933			     offset < last_offset;
9934			     offset += PAGE_SIZE_64, va += PAGE_SIZE)
9935				vm_map_region_look_for_page(map, va, obj,
9936							    offset, ref_count,
9937							    0, extended);
9938		} else {
9939			shadow_object = obj->shadow;
9940			shadow_depth = 0;
9941
9942			if ( !(obj->pager_trusted) && !(obj->internal))
9943				extended->external_pager = 1;
9944
9945			if (shadow_object != VM_OBJECT_NULL) {
9946				vm_object_lock(shadow_object);
9947				for (;
9948				     shadow_object != VM_OBJECT_NULL;
9949				     shadow_depth++) {
9950					vm_object_t	next_shadow;
9951
9952					if ( !(shadow_object->pager_trusted) &&
9953					     !(shadow_object->internal))
9954						extended->external_pager = 1;
9955
9956					next_shadow = shadow_object->shadow;
9957					if (next_shadow) {
9958						vm_object_lock(next_shadow);
9959					}
9960					vm_object_unlock(shadow_object);
9961					shadow_object = next_shadow;
9962				}
9963			}
9964			extended->shadow_depth = shadow_depth;
9965		}
9966
9967		if (extended->shadow_depth || entry->needs_copy)
9968			extended->share_mode = SM_COW;
9969		else {
9970			if (ref_count == 1)
9971				extended->share_mode = SM_PRIVATE;
9972			else {
9973				if (obj->true_share)
9974					extended->share_mode = SM_TRUESHARED;
9975				else
9976					extended->share_mode = SM_SHARED;
9977			}
9978		}
9979		extended->ref_count = ref_count - extended->shadow_depth;
9980
9981		for (i = 0; i < extended->shadow_depth; i++) {
9982			if ((tmp_obj = obj->shadow) == 0)
9983				break;
9984			vm_object_lock(tmp_obj);
9985			vm_object_unlock(obj);
9986
9987			if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
9988				ref_count--;
9989
9990			extended->ref_count += ref_count;
9991			obj = tmp_obj;
9992		}
9993		vm_object_unlock(obj);
9994
9995		if (extended->share_mode == SM_SHARED) {
9996			register vm_map_entry_t	     cur;
9997			register vm_map_entry_t	     last;
9998			int      my_refs;
9999
10000			obj = entry->object.vm_object;
10001			last = vm_map_to_entry(map);
10002			my_refs = 0;
10003
10004			if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
10005				ref_count--;
10006			for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
10007				my_refs += vm_map_region_count_obj_refs(cur, obj);
10008
10009			if (my_refs == ref_count)
10010				extended->share_mode = SM_PRIVATE_ALIASED;
10011			else if (my_refs > 1)
10012				extended->share_mode = SM_SHARED_ALIASED;
10013		}
10014	}
10015}
10016
10017
10018/* object is locked on entry and locked on return */
10019
10020
10021static void
10022vm_map_region_look_for_page(
10023	__unused vm_map_t		map,
10024	__unused vm_map_offset_t	va,
10025	vm_object_t			object,
10026	vm_object_offset_t		offset,
10027	int				max_refcnt,
10028	int				depth,
10029	vm_region_extended_info_t	extended)
10030{
10031        register vm_page_t	p;
10032        register vm_object_t	shadow;
10033	register int            ref_count;
10034	vm_object_t		caller_object;
10035#if	MACH_PAGEMAP
10036	kern_return_t		kr;
10037#endif
10038	shadow = object->shadow;
10039	caller_object = object;
10040
10041
10042	while (TRUE) {
10043
10044		if ( !(object->pager_trusted) && !(object->internal))
10045			extended->external_pager = 1;
10046
10047		if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
10048	        	if (shadow && (max_refcnt == 1))
10049		    		extended->pages_shared_now_private++;
10050
10051			if (!p->fictitious &&
10052			    (p->dirty || pmap_is_modified(p->phys_page)))
10053		    		extended->pages_dirtied++;
10054
10055	        	extended->pages_resident++;
10056
10057			if(object != caller_object)
10058				vm_object_unlock(object);
10059
10060			return;
10061		}
10062#if	MACH_PAGEMAP
10063		if (object->existence_map) {
10064	    		if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
10065
10066	        		extended->pages_swapped_out++;
10067
10068				if(object != caller_object)
10069					vm_object_unlock(object);
10070
10071				return;
10072	    		}
10073		} else if (object->internal &&
10074			   object->alive &&
10075			   !object->terminating &&
10076			   object->pager_ready) {
10077
10078			memory_object_t pager;
10079
10080			vm_object_paging_begin(object);
10081			pager = object->pager;
10082			vm_object_unlock(object);
10083
10084			kr = memory_object_data_request(
10085				pager,
10086				offset + object->paging_offset,
10087				0, /* just poke the pager */
10088				VM_PROT_READ,
10089				NULL);
10090
10091			vm_object_lock(object);
10092			vm_object_paging_end(object);
10093
10094			if (kr == KERN_SUCCESS) {
10095				/* the pager has that page */
10096				extended->pages_swapped_out++;
10097				if (object != caller_object)
10098					vm_object_unlock(object);
10099				return;
10100			}
10101		}
10102#endif /* MACH_PAGEMAP */
10103
10104		if (shadow) {
10105			vm_object_lock(shadow);
10106
10107			if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
10108			        ref_count--;
10109
10110	    		if (++depth > extended->shadow_depth)
10111	        		extended->shadow_depth = depth;
10112
10113	    		if (ref_count > max_refcnt)
10114	        		max_refcnt = ref_count;
10115
10116			if(object != caller_object)
10117				vm_object_unlock(object);
10118
10119			offset = offset + object->vo_shadow_offset;
10120			object = shadow;
10121			shadow = object->shadow;
10122			continue;
10123		}
10124		if(object != caller_object)
10125			vm_object_unlock(object);
10126		break;
10127	}
10128}
10129
10130static int
10131vm_map_region_count_obj_refs(
10132        vm_map_entry_t    entry,
10133	vm_object_t       object)
10134{
10135        register int ref_count;
10136	register vm_object_t chk_obj;
10137	register vm_object_t tmp_obj;
10138
10139	if (entry->object.vm_object == 0)
10140		return(0);
10141
10142        if (entry->is_sub_map)
10143		return(0);
10144	else {
10145		ref_count = 0;
10146
10147		chk_obj = entry->object.vm_object;
10148		vm_object_lock(chk_obj);
10149
10150		while (chk_obj) {
10151			if (chk_obj == object)
10152				ref_count++;
10153			tmp_obj = chk_obj->shadow;
10154			if (tmp_obj)
10155				vm_object_lock(tmp_obj);
10156			vm_object_unlock(chk_obj);
10157
10158			chk_obj = tmp_obj;
10159		}
10160	}
10161	return(ref_count);
10162}
10163
10164
10165/*
10166 *	Routine:	vm_map_simplify
10167 *
10168 *	Description:
10169 *		Attempt to simplify the map representation in
10170 *		the vicinity of the given starting address.
10171 *	Note:
10172 *		This routine is intended primarily to keep the
10173 *		kernel maps more compact -- they generally don't
10174 *		benefit from the "expand a map entry" technology
10175 *		at allocation time because the adjacent entry
10176 *		is often wired down.
10177 */
10178void
10179vm_map_simplify_entry(
10180	vm_map_t	map,
10181	vm_map_entry_t	this_entry)
10182{
10183	vm_map_entry_t	prev_entry;
10184
10185	counter(c_vm_map_simplify_entry_called++);
10186
10187	prev_entry = this_entry->vme_prev;
10188
10189	if ((this_entry != vm_map_to_entry(map)) &&
10190	    (prev_entry != vm_map_to_entry(map)) &&
10191
10192	    (prev_entry->vme_end == this_entry->vme_start) &&
10193
10194	    (prev_entry->is_sub_map == this_entry->is_sub_map) &&
10195
10196	    (prev_entry->object.vm_object == this_entry->object.vm_object) &&
10197	    ((prev_entry->offset + (prev_entry->vme_end -
10198				    prev_entry->vme_start))
10199	     == this_entry->offset) &&
10200
10201	    (prev_entry->inheritance == this_entry->inheritance) &&
10202	    (prev_entry->protection == this_entry->protection) &&
10203	    (prev_entry->max_protection == this_entry->max_protection) &&
10204	    (prev_entry->behavior == this_entry->behavior) &&
10205	    (prev_entry->alias == this_entry->alias) &&
10206	    (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
10207	    (prev_entry->no_cache == this_entry->no_cache) &&
10208	    (prev_entry->wired_count == this_entry->wired_count) &&
10209	    (prev_entry->user_wired_count == this_entry->user_wired_count) &&
10210
10211	    (prev_entry->needs_copy == this_entry->needs_copy) &&
10212	    (prev_entry->permanent == this_entry->permanent) &&
10213
10214	    (prev_entry->use_pmap == FALSE) &&
10215	    (this_entry->use_pmap == FALSE) &&
10216	    (prev_entry->in_transition == FALSE) &&
10217	    (this_entry->in_transition == FALSE) &&
10218	    (prev_entry->needs_wakeup == FALSE) &&
10219	    (this_entry->needs_wakeup == FALSE) &&
10220	    (prev_entry->is_shared == FALSE) &&
10221	    (this_entry->is_shared == FALSE)
10222		) {
10223		vm_map_store_entry_unlink(map, prev_entry);
10224		assert(prev_entry->vme_start < this_entry->vme_end);
10225		this_entry->vme_start = prev_entry->vme_start;
10226		this_entry->offset = prev_entry->offset;
10227		if (prev_entry->is_sub_map) {
10228			vm_map_deallocate(prev_entry->object.sub_map);
10229		} else {
10230			vm_object_deallocate(prev_entry->object.vm_object);
10231		}
10232		vm_map_entry_dispose(map, prev_entry);
10233		SAVE_HINT_MAP_WRITE(map, this_entry);
10234		counter(c_vm_map_simplified++);
10235	}
10236}
10237
10238void
10239vm_map_simplify(
10240	vm_map_t	map,
10241	vm_map_offset_t	start)
10242{
10243	vm_map_entry_t	this_entry;
10244
10245	vm_map_lock(map);
10246	if (vm_map_lookup_entry(map, start, &this_entry)) {
10247		vm_map_simplify_entry(map, this_entry);
10248		vm_map_simplify_entry(map, this_entry->vme_next);
10249	}
10250	counter(c_vm_map_simplify_called++);
10251	vm_map_unlock(map);
10252}
10253
10254static void
10255vm_map_simplify_range(
10256	vm_map_t	map,
10257	vm_map_offset_t	start,
10258	vm_map_offset_t	end)
10259{
10260	vm_map_entry_t	entry;
10261
10262	/*
10263	 * The map should be locked (for "write") by the caller.
10264	 */
10265
10266	if (start >= end) {
10267		/* invalid address range */
10268		return;
10269	}
10270
10271	start = vm_map_trunc_page(start);
10272	end = vm_map_round_page(end);
10273
10274	if (!vm_map_lookup_entry(map, start, &entry)) {
10275		/* "start" is not mapped and "entry" ends before "start" */
10276		if (entry == vm_map_to_entry(map)) {
10277			/* start with first entry in the map */
10278			entry = vm_map_first_entry(map);
10279		} else {
10280			/* start with next entry */
10281			entry = entry->vme_next;
10282		}
10283	}
10284
10285	while (entry != vm_map_to_entry(map) &&
10286	       entry->vme_start <= end) {
10287		/* try and coalesce "entry" with its previous entry */
10288		vm_map_simplify_entry(map, entry);
10289		entry = entry->vme_next;
10290	}
10291}
10292
10293
10294/*
10295 *	Routine:	vm_map_machine_attribute
10296 *	Purpose:
10297 *		Provide machine-specific attributes to mappings,
10298 *		such as cachability etc. for machines that provide
10299 *		them.  NUMA architectures and machines with big/strange
10300 *		caches will use this.
10301 *	Note:
10302 *		Responsibilities for locking and checking are handled here,
10303 *		everything else in the pmap module. If any non-volatile
10304 *		information must be kept, the pmap module should handle
10305 *		it itself. [This assumes that attributes do not
10306 *		need to be inherited, which seems ok to me]
10307 */
10308kern_return_t
10309vm_map_machine_attribute(
10310	vm_map_t			map,
10311	vm_map_offset_t		start,
10312	vm_map_offset_t		end,
10313	vm_machine_attribute_t	attribute,
10314	vm_machine_attribute_val_t* value)		/* IN/OUT */
10315{
10316	kern_return_t	ret;
10317	vm_map_size_t sync_size;
10318	vm_map_entry_t entry;
10319
10320	if (start < vm_map_min(map) || end > vm_map_max(map))
10321		return KERN_INVALID_ADDRESS;
10322
10323	/* Figure how much memory we need to flush (in page increments) */
10324	sync_size = end - start;
10325
10326	vm_map_lock(map);
10327
10328	if (attribute != MATTR_CACHE) {
10329		/* If we don't have to find physical addresses, we */
10330		/* don't have to do an explicit traversal here.    */
10331		ret = pmap_attribute(map->pmap, start, end-start,
10332				     attribute, value);
10333		vm_map_unlock(map);
10334		return ret;
10335	}
10336
10337	ret = KERN_SUCCESS;										/* Assume it all worked */
10338
10339	while(sync_size) {
10340		if (vm_map_lookup_entry(map, start, &entry)) {
10341			vm_map_size_t	sub_size;
10342			if((entry->vme_end - start) > sync_size) {
10343				sub_size = sync_size;
10344				sync_size = 0;
10345			} else {
10346				sub_size = entry->vme_end - start;
10347				sync_size -= sub_size;
10348			}
10349			if(entry->is_sub_map) {
10350				vm_map_offset_t sub_start;
10351				vm_map_offset_t sub_end;
10352
10353				sub_start = (start - entry->vme_start)
10354					+ entry->offset;
10355				sub_end = sub_start + sub_size;
10356				vm_map_machine_attribute(
10357					entry->object.sub_map,
10358					sub_start,
10359					sub_end,
10360					attribute, value);
10361			} else {
10362				if(entry->object.vm_object) {
10363					vm_page_t		m;
10364					vm_object_t		object;
10365					vm_object_t		base_object;
10366					vm_object_t		last_object;
10367					vm_object_offset_t	offset;
10368					vm_object_offset_t	base_offset;
10369					vm_map_size_t		range;
10370					range = sub_size;
10371					offset = (start - entry->vme_start)
10372						+ entry->offset;
10373					base_offset = offset;
10374					object = entry->object.vm_object;
10375					base_object = object;
10376					last_object = NULL;
10377
10378					vm_object_lock(object);
10379
10380					while (range) {
10381						m = vm_page_lookup(
10382							object, offset);
10383
10384						if (m && !m->fictitious) {
10385						        ret =
10386								pmap_attribute_cache_sync(
10387									m->phys_page,
10388									PAGE_SIZE,
10389									attribute, value);
10390
10391						} else if (object->shadow) {
10392						        offset = offset + object->vo_shadow_offset;
10393							last_object = object;
10394							object = object->shadow;
10395							vm_object_lock(last_object->shadow);
10396							vm_object_unlock(last_object);
10397							continue;
10398						}
10399						range -= PAGE_SIZE;
10400
10401						if (base_object != object) {
10402						        vm_object_unlock(object);
10403							vm_object_lock(base_object);
10404							object = base_object;
10405						}
10406						/* Bump to the next page */
10407						base_offset += PAGE_SIZE;
10408						offset = base_offset;
10409					}
10410					vm_object_unlock(object);
10411				}
10412			}
10413			start += sub_size;
10414		} else {
10415			vm_map_unlock(map);
10416			return KERN_FAILURE;
10417		}
10418
10419	}
10420
10421	vm_map_unlock(map);
10422
10423	return ret;
10424}
10425
10426/*
10427 *	vm_map_behavior_set:
10428 *
10429 *	Sets the paging reference behavior of the specified address
10430 *	range in the target map.  Paging reference behavior affects
10431 *	how pagein operations resulting from faults on the map will be
10432 *	clustered.
10433 */
10434kern_return_t
10435vm_map_behavior_set(
10436	vm_map_t	map,
10437	vm_map_offset_t	start,
10438	vm_map_offset_t	end,
10439	vm_behavior_t	new_behavior)
10440{
10441	register vm_map_entry_t	entry;
10442	vm_map_entry_t	temp_entry;
10443
10444	XPR(XPR_VM_MAP,
10445	    "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
10446	    map, start, end, new_behavior, 0);
10447
10448	if (start > end ||
10449	    start < vm_map_min(map) ||
10450	    end > vm_map_max(map)) {
10451		return KERN_NO_SPACE;
10452	}
10453
10454	switch (new_behavior) {
10455
10456	/*
10457	 * This first block of behaviors all set a persistent state on the specified
10458	 * memory range.  All we have to do here is to record the desired behavior
10459	 * in the vm_map_entry_t's.
10460	 */
10461
10462	case VM_BEHAVIOR_DEFAULT:
10463	case VM_BEHAVIOR_RANDOM:
10464	case VM_BEHAVIOR_SEQUENTIAL:
10465	case VM_BEHAVIOR_RSEQNTL:
10466	case VM_BEHAVIOR_ZERO_WIRED_PAGES:
10467		vm_map_lock(map);
10468
10469		/*
10470		 *	The entire address range must be valid for the map.
10471		 * 	Note that vm_map_range_check() does a
10472		 *	vm_map_lookup_entry() internally and returns the
10473		 *	entry containing the start of the address range if
10474		 *	the entire range is valid.
10475		 */
10476		if (vm_map_range_check(map, start, end, &temp_entry)) {
10477			entry = temp_entry;
10478			vm_map_clip_start(map, entry, start);
10479		}
10480		else {
10481			vm_map_unlock(map);
10482			return(KERN_INVALID_ADDRESS);
10483		}
10484
10485		while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
10486			vm_map_clip_end(map, entry, end);
10487			assert(!entry->use_pmap);
10488
10489			if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
10490				entry->zero_wired_pages = TRUE;
10491			} else {
10492				entry->behavior = new_behavior;
10493			}
10494			entry = entry->vme_next;
10495		}
10496
10497		vm_map_unlock(map);
10498		break;
10499
10500	/*
10501	 * The rest of these are different from the above in that they cause
10502	 * an immediate action to take place as opposed to setting a behavior that
10503	 * affects future actions.
10504	 */
10505
10506	case VM_BEHAVIOR_WILLNEED:
10507		return vm_map_willneed(map, start, end);
10508
10509	case VM_BEHAVIOR_DONTNEED:
10510		return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
10511
10512	case VM_BEHAVIOR_FREE:
10513		return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
10514
10515	case VM_BEHAVIOR_REUSABLE:
10516		return vm_map_reusable_pages(map, start, end);
10517
10518	case VM_BEHAVIOR_REUSE:
10519		return vm_map_reuse_pages(map, start, end);
10520
10521	case VM_BEHAVIOR_CAN_REUSE:
10522		return vm_map_can_reuse(map, start, end);
10523
10524	default:
10525		return(KERN_INVALID_ARGUMENT);
10526	}
10527
10528	return(KERN_SUCCESS);
10529}
10530
10531
10532/*
10533 * Internals for madvise(MADV_WILLNEED) system call.
10534 *
10535 * The present implementation is to do a read-ahead if the mapping corresponds
10536 * to a mapped regular file.  If it's an anonymous mapping, then we do nothing
10537 * and basically ignore the "advice" (which we are always free to do).
10538 */
10539
10540
10541static kern_return_t
10542vm_map_willneed(
10543	vm_map_t	map,
10544	vm_map_offset_t	start,
10545	vm_map_offset_t	end
10546)
10547{
10548	vm_map_entry_t 			entry;
10549	vm_object_t			object;
10550	memory_object_t			pager;
10551	struct vm_object_fault_info	fault_info;
10552	kern_return_t			kr;
10553	vm_object_size_t		len;
10554	vm_object_offset_t		offset;
10555
10556	/*
10557	 * Fill in static values in fault_info.  Several fields get ignored by the code
10558	 * we call, but we'll fill them in anyway since uninitialized fields are bad
10559	 * when it comes to future backwards compatibility.
10560	 */
10561
10562	fault_info.interruptible = THREAD_UNINT;		/* ignored value */
10563	fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
10564	fault_info.no_cache      = FALSE;			/* ignored value */
10565	fault_info.stealth	 = TRUE;
10566	fault_info.io_sync = FALSE;
10567	fault_info.cs_bypass = FALSE;
10568	fault_info.mark_zf_absent = FALSE;
10569	fault_info.batch_pmap_op = FALSE;
10570
10571	/*
10572	 * The MADV_WILLNEED operation doesn't require any changes to the
10573	 * vm_map_entry_t's, so the read lock is sufficient.
10574	 */
10575
10576	vm_map_lock_read(map);
10577
10578	/*
10579	 * The madvise semantics require that the address range be fully
10580	 * allocated with no holes.  Otherwise, we're required to return
10581	 * an error.
10582	 */
10583
10584	if (! vm_map_range_check(map, start, end, &entry)) {
10585		vm_map_unlock_read(map);
10586		return KERN_INVALID_ADDRESS;
10587	}
10588
10589	/*
10590	 * Examine each vm_map_entry_t in the range.
10591	 */
10592	for (; entry != vm_map_to_entry(map) && start < end; ) {
10593
10594		/*
10595		 * The first time through, the start address could be anywhere
10596		 * within the vm_map_entry we found.  So adjust the offset to
10597		 * correspond.  After that, the offset will always be zero to
10598		 * correspond to the beginning of the current vm_map_entry.
10599		 */
10600		offset = (start - entry->vme_start) + entry->offset;
10601
10602		/*
10603		 * Set the length so we don't go beyond the end of the
10604		 * map_entry or beyond the end of the range we were given.
10605		 * This range could span also multiple map entries all of which
10606		 * map different files, so make sure we only do the right amount
10607		 * of I/O for each object.  Note that it's possible for there
10608		 * to be multiple map entries all referring to the same object
10609		 * but with different page permissions, but it's not worth
10610		 * trying to optimize that case.
10611		 */
10612		len = MIN(entry->vme_end - start, end - start);
10613
10614		if ((vm_size_t) len != len) {
10615			/* 32-bit overflow */
10616			len = (vm_size_t) (0 - PAGE_SIZE);
10617		}
10618		fault_info.cluster_size = (vm_size_t) len;
10619		fault_info.lo_offset    = offset;
10620		fault_info.hi_offset    = offset + len;
10621		fault_info.user_tag     = entry->alias;
10622
10623		/*
10624		 * If there's no read permission to this mapping, then just
10625		 * skip it.
10626		 */
10627		if ((entry->protection & VM_PROT_READ) == 0) {
10628			entry = entry->vme_next;
10629			start = entry->vme_start;
10630			continue;
10631		}
10632
10633		/*
10634		 * Find the file object backing this map entry.  If there is
10635		 * none, then we simply ignore the "will need" advice for this
10636		 * entry and go on to the next one.
10637		 */
10638		if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
10639			entry = entry->vme_next;
10640			start = entry->vme_start;
10641			continue;
10642		}
10643
10644		/*
10645		 * The data_request() could take a long time, so let's
10646		 * release the map lock to avoid blocking other threads.
10647		 */
10648		vm_map_unlock_read(map);
10649
10650		vm_object_paging_begin(object);
10651		pager = object->pager;
10652		vm_object_unlock(object);
10653
10654		/*
10655		 * Get the data from the object asynchronously.
10656		 *
10657		 * Note that memory_object_data_request() places limits on the
10658		 * amount of I/O it will do.  Regardless of the len we
10659		 * specified, it won't do more than MAX_UPL_TRANSFER and it
10660		 * silently truncates the len to that size.  This isn't
10661		 * necessarily bad since madvise shouldn't really be used to
10662		 * page in unlimited amounts of data.  Other Unix variants
10663		 * limit the willneed case as well.  If this turns out to be an
10664		 * issue for developers, then we can always adjust the policy
10665		 * here and still be backwards compatible since this is all
10666		 * just "advice".
10667		 */
10668		kr = memory_object_data_request(
10669			pager,
10670			offset + object->paging_offset,
10671			0,	/* ignored */
10672			VM_PROT_READ,
10673			(memory_object_fault_info_t)&fault_info);
10674
10675		vm_object_lock(object);
10676		vm_object_paging_end(object);
10677		vm_object_unlock(object);
10678
10679		/*
10680		 * If we couldn't do the I/O for some reason, just give up on
10681		 * the madvise.  We still return success to the user since
10682		 * madvise isn't supposed to fail when the advice can't be
10683		 * taken.
10684		 */
10685		if (kr != KERN_SUCCESS) {
10686			return KERN_SUCCESS;
10687		}
10688
10689		start += len;
10690		if (start >= end) {
10691			/* done */
10692			return KERN_SUCCESS;
10693		}
10694
10695		/* look up next entry */
10696		vm_map_lock_read(map);
10697		if (! vm_map_lookup_entry(map, start, &entry)) {
10698			/*
10699			 * There's a new hole in the address range.
10700			 */
10701			vm_map_unlock_read(map);
10702			return KERN_INVALID_ADDRESS;
10703		}
10704	}
10705
10706	vm_map_unlock_read(map);
10707	return KERN_SUCCESS;
10708}
10709
10710static boolean_t
10711vm_map_entry_is_reusable(
10712	vm_map_entry_t entry)
10713{
10714	vm_object_t object;
10715
10716	if (entry->is_shared ||
10717	    entry->is_sub_map ||
10718	    entry->in_transition ||
10719	    entry->protection != VM_PROT_DEFAULT ||
10720	    entry->max_protection != VM_PROT_ALL ||
10721	    entry->inheritance != VM_INHERIT_DEFAULT ||
10722	    entry->no_cache ||
10723	    entry->permanent ||
10724	    entry->superpage_size != 0 ||
10725	    entry->zero_wired_pages ||
10726	    entry->wired_count != 0 ||
10727	    entry->user_wired_count != 0) {
10728		return FALSE;
10729	}
10730
10731	object = entry->object.vm_object;
10732	if (object == VM_OBJECT_NULL) {
10733		return TRUE;
10734	}
10735	if (
10736#if 0
10737		/*
10738		 * Let's proceed even if the VM object is potentially
10739		 * shared.
10740		 * We check for this later when processing the actual
10741		 * VM pages, so the contents will be safe if shared.
10742		 *
10743		 * But we can still mark this memory region as "reusable" to
10744		 * acknowledge that the caller did let us know that the memory
10745		 * could be re-used and should not be penalized for holding
10746		 * on to it.  This allows its "resident size" to not include
10747		 * the reusable range.
10748		 */
10749	    object->ref_count == 1 &&
10750#endif
10751	    object->wired_page_count == 0 &&
10752	    object->copy == VM_OBJECT_NULL &&
10753	    object->shadow == VM_OBJECT_NULL &&
10754	    object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10755	    object->internal &&
10756	    !object->true_share &&
10757	    object->wimg_bits == VM_WIMG_USE_DEFAULT &&
10758	    !object->code_signed) {
10759		return TRUE;
10760	}
10761	return FALSE;
10762
10763
10764}
10765
10766static kern_return_t
10767vm_map_reuse_pages(
10768	vm_map_t	map,
10769	vm_map_offset_t	start,
10770	vm_map_offset_t	end)
10771{
10772	vm_map_entry_t 			entry;
10773	vm_object_t			object;
10774	vm_object_offset_t		start_offset, end_offset;
10775
10776	/*
10777	 * The MADV_REUSE operation doesn't require any changes to the
10778	 * vm_map_entry_t's, so the read lock is sufficient.
10779	 */
10780
10781	vm_map_lock_read(map);
10782
10783	/*
10784	 * The madvise semantics require that the address range be fully
10785	 * allocated with no holes.  Otherwise, we're required to return
10786	 * an error.
10787	 */
10788
10789	if (!vm_map_range_check(map, start, end, &entry)) {
10790		vm_map_unlock_read(map);
10791		vm_page_stats_reusable.reuse_pages_failure++;
10792		return KERN_INVALID_ADDRESS;
10793	}
10794
10795	/*
10796	 * Examine each vm_map_entry_t in the range.
10797	 */
10798	for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10799	     entry = entry->vme_next) {
10800		/*
10801		 * Sanity check on the VM map entry.
10802		 */
10803		if (! vm_map_entry_is_reusable(entry)) {
10804			vm_map_unlock_read(map);
10805			vm_page_stats_reusable.reuse_pages_failure++;
10806			return KERN_INVALID_ADDRESS;
10807		}
10808
10809		/*
10810		 * The first time through, the start address could be anywhere
10811		 * within the vm_map_entry we found.  So adjust the offset to
10812		 * correspond.
10813		 */
10814		if (entry->vme_start < start) {
10815			start_offset = start - entry->vme_start;
10816		} else {
10817			start_offset = 0;
10818		}
10819		end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10820		start_offset += entry->offset;
10821		end_offset += entry->offset;
10822
10823		object = entry->object.vm_object;
10824		if (object != VM_OBJECT_NULL) {
10825			vm_object_lock(object);
10826			vm_object_reuse_pages(object, start_offset, end_offset,
10827					      TRUE);
10828			vm_object_unlock(object);
10829		}
10830
10831		if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
10832			/*
10833			 * XXX
10834			 * We do not hold the VM map exclusively here.
10835			 * The "alias" field is not that critical, so it's
10836			 * safe to update it here, as long as it is the only
10837			 * one that can be modified while holding the VM map
10838			 * "shared".
10839			 */
10840			entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED;
10841		}
10842	}
10843
10844	vm_map_unlock_read(map);
10845	vm_page_stats_reusable.reuse_pages_success++;
10846	return KERN_SUCCESS;
10847}
10848
10849
10850static kern_return_t
10851vm_map_reusable_pages(
10852	vm_map_t	map,
10853	vm_map_offset_t	start,
10854	vm_map_offset_t	end)
10855{
10856	vm_map_entry_t 			entry;
10857	vm_object_t			object;
10858	vm_object_offset_t		start_offset, end_offset;
10859
10860	/*
10861	 * The MADV_REUSABLE operation doesn't require any changes to the
10862	 * vm_map_entry_t's, so the read lock is sufficient.
10863	 */
10864
10865	vm_map_lock_read(map);
10866
10867	/*
10868	 * The madvise semantics require that the address range be fully
10869	 * allocated with no holes.  Otherwise, we're required to return
10870	 * an error.
10871	 */
10872
10873	if (!vm_map_range_check(map, start, end, &entry)) {
10874		vm_map_unlock_read(map);
10875		vm_page_stats_reusable.reusable_pages_failure++;
10876		return KERN_INVALID_ADDRESS;
10877	}
10878
10879	/*
10880	 * Examine each vm_map_entry_t in the range.
10881	 */
10882	for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10883	     entry = entry->vme_next) {
10884		int kill_pages = 0;
10885
10886		/*
10887		 * Sanity check on the VM map entry.
10888		 */
10889		if (! vm_map_entry_is_reusable(entry)) {
10890			vm_map_unlock_read(map);
10891			vm_page_stats_reusable.reusable_pages_failure++;
10892			return KERN_INVALID_ADDRESS;
10893		}
10894
10895		/*
10896		 * The first time through, the start address could be anywhere
10897		 * within the vm_map_entry we found.  So adjust the offset to
10898		 * correspond.
10899		 */
10900		if (entry->vme_start < start) {
10901			start_offset = start - entry->vme_start;
10902		} else {
10903			start_offset = 0;
10904		}
10905		end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10906		start_offset += entry->offset;
10907		end_offset += entry->offset;
10908
10909		object = entry->object.vm_object;
10910		if (object == VM_OBJECT_NULL)
10911			continue;
10912
10913
10914		vm_object_lock(object);
10915		if (object->ref_count == 1 && !object->shadow)
10916			kill_pages = 1;
10917		else
10918			kill_pages = -1;
10919		if (kill_pages != -1) {
10920			vm_object_deactivate_pages(object,
10921						   start_offset,
10922						   end_offset - start_offset,
10923						   kill_pages,
10924						   TRUE /*reusable_pages*/);
10925		} else {
10926			vm_page_stats_reusable.reusable_pages_shared++;
10927		}
10928		vm_object_unlock(object);
10929
10930		if (entry->alias == VM_MEMORY_MALLOC_LARGE ||
10931		    entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) {
10932			/*
10933			 * XXX
10934			 * We do not hold the VM map exclusively here.
10935			 * The "alias" field is not that critical, so it's
10936			 * safe to update it here, as long as it is the only
10937			 * one that can be modified while holding the VM map
10938			 * "shared".
10939			 */
10940			entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE;
10941		}
10942	}
10943
10944	vm_map_unlock_read(map);
10945	vm_page_stats_reusable.reusable_pages_success++;
10946	return KERN_SUCCESS;
10947}
10948
10949
10950static kern_return_t
10951vm_map_can_reuse(
10952	vm_map_t	map,
10953	vm_map_offset_t	start,
10954	vm_map_offset_t	end)
10955{
10956	vm_map_entry_t 			entry;
10957
10958	/*
10959	 * The MADV_REUSABLE operation doesn't require any changes to the
10960	 * vm_map_entry_t's, so the read lock is sufficient.
10961	 */
10962
10963	vm_map_lock_read(map);
10964
10965	/*
10966	 * The madvise semantics require that the address range be fully
10967	 * allocated with no holes.  Otherwise, we're required to return
10968	 * an error.
10969	 */
10970
10971	if (!vm_map_range_check(map, start, end, &entry)) {
10972		vm_map_unlock_read(map);
10973		vm_page_stats_reusable.can_reuse_failure++;
10974		return KERN_INVALID_ADDRESS;
10975	}
10976
10977	/*
10978	 * Examine each vm_map_entry_t in the range.
10979	 */
10980	for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10981	     entry = entry->vme_next) {
10982		/*
10983		 * Sanity check on the VM map entry.
10984		 */
10985		if (! vm_map_entry_is_reusable(entry)) {
10986			vm_map_unlock_read(map);
10987			vm_page_stats_reusable.can_reuse_failure++;
10988			return KERN_INVALID_ADDRESS;
10989		}
10990	}
10991
10992	vm_map_unlock_read(map);
10993	vm_page_stats_reusable.can_reuse_success++;
10994	return KERN_SUCCESS;
10995}
10996
10997
10998/*
10999 *	Routine:	vm_map_entry_insert
11000 *
11001 *	Descritpion:	This routine inserts a new vm_entry in a locked map.
11002 */
11003vm_map_entry_t
11004vm_map_entry_insert(
11005	vm_map_t		map,
11006	vm_map_entry_t		insp_entry,
11007	vm_map_offset_t		start,
11008	vm_map_offset_t		end,
11009	vm_object_t		object,
11010	vm_object_offset_t	offset,
11011	boolean_t		needs_copy,
11012	boolean_t		is_shared,
11013	boolean_t		in_transition,
11014	vm_prot_t		cur_protection,
11015	vm_prot_t		max_protection,
11016	vm_behavior_t		behavior,
11017	vm_inherit_t		inheritance,
11018	unsigned		wired_count,
11019	boolean_t		no_cache,
11020	boolean_t		permanent,
11021	unsigned int		superpage_size)
11022{
11023	vm_map_entry_t	new_entry;
11024
11025	assert(insp_entry != (vm_map_entry_t)0);
11026
11027	new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
11028
11029	new_entry->vme_start = start;
11030	new_entry->vme_end = end;
11031	assert(page_aligned(new_entry->vme_start));
11032	assert(page_aligned(new_entry->vme_end));
11033	assert(new_entry->vme_start < new_entry->vme_end);
11034
11035	new_entry->object.vm_object = object;
11036	new_entry->offset = offset;
11037	new_entry->is_shared = is_shared;
11038	new_entry->is_sub_map = FALSE;
11039	new_entry->needs_copy = needs_copy;
11040	new_entry->in_transition = in_transition;
11041	new_entry->needs_wakeup = FALSE;
11042	new_entry->inheritance = inheritance;
11043	new_entry->protection = cur_protection;
11044	new_entry->max_protection = max_protection;
11045	new_entry->behavior = behavior;
11046	new_entry->wired_count = wired_count;
11047	new_entry->user_wired_count = 0;
11048	new_entry->use_pmap = FALSE;
11049	new_entry->alias = 0;
11050	new_entry->zero_wired_pages = FALSE;
11051	new_entry->no_cache = no_cache;
11052	new_entry->permanent = permanent;
11053	new_entry->superpage_size = superpage_size;
11054	new_entry->used_for_jit = FALSE;
11055
11056	/*
11057	 *	Insert the new entry into the list.
11058	 */
11059
11060	vm_map_store_entry_link(map, insp_entry, new_entry);
11061	map->size += end - start;
11062
11063	/*
11064	 *	Update the free space hint and the lookup hint.
11065	 */
11066
11067	SAVE_HINT_MAP_WRITE(map, new_entry);
11068	return new_entry;
11069}
11070
11071/*
11072 *	Routine:	vm_map_remap_extract
11073 *
11074 *	Descritpion:	This routine returns a vm_entry list from a map.
11075 */
11076static kern_return_t
11077vm_map_remap_extract(
11078	vm_map_t		map,
11079	vm_map_offset_t		addr,
11080	vm_map_size_t		size,
11081	boolean_t		copy,
11082	struct vm_map_header	*map_header,
11083	vm_prot_t		*cur_protection,
11084	vm_prot_t		*max_protection,
11085	/* What, no behavior? */
11086	vm_inherit_t		inheritance,
11087	boolean_t		pageable)
11088{
11089	kern_return_t		result;
11090	vm_map_size_t		mapped_size;
11091	vm_map_size_t		tmp_size;
11092	vm_map_entry_t		src_entry;     /* result of last map lookup */
11093	vm_map_entry_t		new_entry;
11094	vm_object_offset_t	offset;
11095	vm_map_offset_t		map_address;
11096	vm_map_offset_t		src_start;     /* start of entry to map */
11097	vm_map_offset_t		src_end;       /* end of region to be mapped */
11098	vm_object_t		object;
11099	vm_map_version_t	version;
11100	boolean_t		src_needs_copy;
11101	boolean_t		new_entry_needs_copy;
11102
11103	assert(map != VM_MAP_NULL);
11104	assert(size != 0 && size == vm_map_round_page(size));
11105	assert(inheritance == VM_INHERIT_NONE ||
11106	       inheritance == VM_INHERIT_COPY ||
11107	       inheritance == VM_INHERIT_SHARE);
11108
11109	/*
11110	 *	Compute start and end of region.
11111	 */
11112	src_start = vm_map_trunc_page(addr);
11113	src_end = vm_map_round_page(src_start + size);
11114
11115	/*
11116	 *	Initialize map_header.
11117	 */
11118	map_header->links.next = (struct vm_map_entry *)&map_header->links;
11119	map_header->links.prev = (struct vm_map_entry *)&map_header->links;
11120	map_header->nentries = 0;
11121	map_header->entries_pageable = pageable;
11122
11123	vm_map_store_init( map_header );
11124
11125	*cur_protection = VM_PROT_ALL;
11126	*max_protection = VM_PROT_ALL;
11127
11128	map_address = 0;
11129	mapped_size = 0;
11130	result = KERN_SUCCESS;
11131
11132	/*
11133	 *	The specified source virtual space might correspond to
11134	 *	multiple map entries, need to loop on them.
11135	 */
11136	vm_map_lock(map);
11137	while (mapped_size != size) {
11138		vm_map_size_t	entry_size;
11139
11140		/*
11141		 *	Find the beginning of the region.
11142		 */
11143		if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
11144			result = KERN_INVALID_ADDRESS;
11145			break;
11146		}
11147
11148		if (src_start < src_entry->vme_start ||
11149		    (mapped_size && src_start != src_entry->vme_start)) {
11150			result = KERN_INVALID_ADDRESS;
11151			break;
11152		}
11153
11154		tmp_size = size - mapped_size;
11155		if (src_end > src_entry->vme_end)
11156			tmp_size -= (src_end - src_entry->vme_end);
11157
11158		entry_size = (vm_map_size_t)(src_entry->vme_end -
11159					     src_entry->vme_start);
11160
11161		if(src_entry->is_sub_map) {
11162			vm_map_reference(src_entry->object.sub_map);
11163			object = VM_OBJECT_NULL;
11164		} else {
11165			object = src_entry->object.vm_object;
11166
11167			if (object == VM_OBJECT_NULL) {
11168				object = vm_object_allocate(entry_size);
11169				src_entry->offset = 0;
11170				src_entry->object.vm_object = object;
11171			} else if (object->copy_strategy !=
11172				   MEMORY_OBJECT_COPY_SYMMETRIC) {
11173				/*
11174				 *	We are already using an asymmetric
11175				 *	copy, and therefore we already have
11176				 *	the right object.
11177				 */
11178				assert(!src_entry->needs_copy);
11179			} else if (src_entry->needs_copy || object->shadowed ||
11180				   (object->internal && !object->true_share &&
11181				    !src_entry->is_shared &&
11182				    object->vo_size > entry_size)) {
11183
11184				vm_object_shadow(&src_entry->object.vm_object,
11185						 &src_entry->offset,
11186						 entry_size);
11187
11188				if (!src_entry->needs_copy &&
11189				    (src_entry->protection & VM_PROT_WRITE)) {
11190				        vm_prot_t prot;
11191
11192				        prot = src_entry->protection & ~VM_PROT_WRITE;
11193
11194					if (override_nx(map, src_entry->alias) && prot)
11195					        prot |= VM_PROT_EXECUTE;
11196
11197					if(map->mapped_in_other_pmaps) {
11198						vm_object_pmap_protect(
11199							src_entry->object.vm_object,
11200							src_entry->offset,
11201							entry_size,
11202							PMAP_NULL,
11203							src_entry->vme_start,
11204							prot);
11205					} else {
11206						pmap_protect(vm_map_pmap(map),
11207							     src_entry->vme_start,
11208							     src_entry->vme_end,
11209							     prot);
11210					}
11211				}
11212
11213				object = src_entry->object.vm_object;
11214				src_entry->needs_copy = FALSE;
11215			}
11216
11217
11218			vm_object_lock(object);
11219			vm_object_reference_locked(object); /* object ref. for new entry */
11220			if (object->copy_strategy ==
11221			    MEMORY_OBJECT_COPY_SYMMETRIC) {
11222				object->copy_strategy =
11223					MEMORY_OBJECT_COPY_DELAY;
11224			}
11225			vm_object_unlock(object);
11226		}
11227
11228		offset = src_entry->offset + (src_start - src_entry->vme_start);
11229
11230		new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
11231		vm_map_entry_copy(new_entry, src_entry);
11232		new_entry->use_pmap = FALSE; /* clr address space specifics */
11233
11234		new_entry->vme_start = map_address;
11235		new_entry->vme_end = map_address + tmp_size;
11236		assert(new_entry->vme_start < new_entry->vme_end);
11237		new_entry->inheritance = inheritance;
11238		new_entry->offset = offset;
11239
11240		/*
11241		 * The new region has to be copied now if required.
11242		 */
11243	RestartCopy:
11244		if (!copy) {
11245			/*
11246			 * Cannot allow an entry describing a JIT
11247			 * region to be shared across address spaces.
11248			 */
11249			if (src_entry->used_for_jit == TRUE) {
11250				result = KERN_INVALID_ARGUMENT;
11251				break;
11252			}
11253			src_entry->is_shared = TRUE;
11254			new_entry->is_shared = TRUE;
11255			if (!(new_entry->is_sub_map))
11256				new_entry->needs_copy = FALSE;
11257
11258		} else if (src_entry->is_sub_map) {
11259			/* make this a COW sub_map if not already */
11260			new_entry->needs_copy = TRUE;
11261			object = VM_OBJECT_NULL;
11262		} else if (src_entry->wired_count == 0 &&
11263			   vm_object_copy_quickly(&new_entry->object.vm_object,
11264						  new_entry->offset,
11265						  (new_entry->vme_end -
11266						   new_entry->vme_start),
11267						  &src_needs_copy,
11268						  &new_entry_needs_copy)) {
11269
11270			new_entry->needs_copy = new_entry_needs_copy;
11271			new_entry->is_shared = FALSE;
11272
11273			/*
11274			 * Handle copy_on_write semantics.
11275			 */
11276			if (src_needs_copy && !src_entry->needs_copy) {
11277			        vm_prot_t prot;
11278
11279				prot = src_entry->protection & ~VM_PROT_WRITE;
11280
11281				if (override_nx(map, src_entry->alias) && prot)
11282				        prot |= VM_PROT_EXECUTE;
11283
11284				vm_object_pmap_protect(object,
11285						       offset,
11286						       entry_size,
11287						       ((src_entry->is_shared
11288							 || map->mapped_in_other_pmaps) ?
11289							PMAP_NULL : map->pmap),
11290						       src_entry->vme_start,
11291						       prot);
11292
11293				src_entry->needs_copy = TRUE;
11294			}
11295			/*
11296			 * Throw away the old object reference of the new entry.
11297			 */
11298			vm_object_deallocate(object);
11299
11300		} else {
11301			new_entry->is_shared = FALSE;
11302
11303			/*
11304			 * The map can be safely unlocked since we
11305			 * already hold a reference on the object.
11306			 *
11307			 * Record the timestamp of the map for later
11308			 * verification, and unlock the map.
11309			 */
11310			version.main_timestamp = map->timestamp;
11311			vm_map_unlock(map); 	/* Increments timestamp once! */
11312
11313			/*
11314			 * Perform the copy.
11315			 */
11316			if (src_entry->wired_count > 0) {
11317				vm_object_lock(object);
11318				result = vm_object_copy_slowly(
11319					object,
11320					offset,
11321					entry_size,
11322					THREAD_UNINT,
11323					&new_entry->object.vm_object);
11324
11325				new_entry->offset = 0;
11326				new_entry->needs_copy = FALSE;
11327			} else {
11328				result = vm_object_copy_strategically(
11329					object,
11330					offset,
11331					entry_size,
11332					&new_entry->object.vm_object,
11333					&new_entry->offset,
11334					&new_entry_needs_copy);
11335
11336				new_entry->needs_copy = new_entry_needs_copy;
11337			}
11338
11339			/*
11340			 * Throw away the old object reference of the new entry.
11341			 */
11342			vm_object_deallocate(object);
11343
11344			if (result != KERN_SUCCESS &&
11345			    result != KERN_MEMORY_RESTART_COPY) {
11346				_vm_map_entry_dispose(map_header, new_entry);
11347				break;
11348			}
11349
11350			/*
11351			 * Verify that the map has not substantially
11352			 * changed while the copy was being made.
11353			 */
11354
11355			vm_map_lock(map);
11356			if (version.main_timestamp + 1 != map->timestamp) {
11357				/*
11358				 * Simple version comparison failed.
11359				 *
11360				 * Retry the lookup and verify that the
11361				 * same object/offset are still present.
11362				 */
11363				vm_object_deallocate(new_entry->
11364						     object.vm_object);
11365				_vm_map_entry_dispose(map_header, new_entry);
11366				if (result == KERN_MEMORY_RESTART_COPY)
11367					result = KERN_SUCCESS;
11368				continue;
11369			}
11370
11371			if (result == KERN_MEMORY_RESTART_COPY) {
11372				vm_object_reference(object);
11373				goto RestartCopy;
11374			}
11375		}
11376
11377		_vm_map_store_entry_link(map_header,
11378				   map_header->links.prev, new_entry);
11379
11380		/*Protections for submap mapping are irrelevant here*/
11381		if( !src_entry->is_sub_map ) {
11382			*cur_protection &= src_entry->protection;
11383			*max_protection &= src_entry->max_protection;
11384		}
11385		map_address += tmp_size;
11386		mapped_size += tmp_size;
11387		src_start += tmp_size;
11388
11389	} /* end while */
11390
11391	vm_map_unlock(map);
11392	if (result != KERN_SUCCESS) {
11393		/*
11394		 * Free all allocated elements.
11395		 */
11396		for (src_entry = map_header->links.next;
11397		     src_entry != (struct vm_map_entry *)&map_header->links;
11398		     src_entry = new_entry) {
11399			new_entry = src_entry->vme_next;
11400			_vm_map_store_entry_unlink(map_header, src_entry);
11401			vm_object_deallocate(src_entry->object.vm_object);
11402			_vm_map_entry_dispose(map_header, src_entry);
11403		}
11404	}
11405	return result;
11406}
11407
11408/*
11409 *	Routine:	vm_remap
11410 *
11411 *			Map portion of a task's address space.
11412 *			Mapped region must not overlap more than
11413 *			one vm memory object. Protections and
11414 *			inheritance attributes remain the same
11415 *			as in the original task and are	out parameters.
11416 *			Source and Target task can be identical
11417 *			Other attributes are identical as for vm_map()
11418 */
11419kern_return_t
11420vm_map_remap(
11421	vm_map_t		target_map,
11422	vm_map_address_t	*address,
11423	vm_map_size_t		size,
11424	vm_map_offset_t		mask,
11425	int			flags,
11426	vm_map_t		src_map,
11427	vm_map_offset_t		memory_address,
11428	boolean_t		copy,
11429	vm_prot_t		*cur_protection,
11430	vm_prot_t		*max_protection,
11431	vm_inherit_t		inheritance)
11432{
11433	kern_return_t		result;
11434	vm_map_entry_t		entry;
11435	vm_map_entry_t		insp_entry = VM_MAP_ENTRY_NULL;
11436	vm_map_entry_t		new_entry;
11437	struct vm_map_header	map_header;
11438
11439	if (target_map == VM_MAP_NULL)
11440		return KERN_INVALID_ARGUMENT;
11441
11442	switch (inheritance) {
11443	case VM_INHERIT_NONE:
11444	case VM_INHERIT_COPY:
11445	case VM_INHERIT_SHARE:
11446		if (size != 0 && src_map != VM_MAP_NULL)
11447			break;
11448		/*FALL THRU*/
11449	default:
11450		return KERN_INVALID_ARGUMENT;
11451	}
11452
11453	size = vm_map_round_page(size);
11454
11455	result = vm_map_remap_extract(src_map, memory_address,
11456				      size, copy, &map_header,
11457				      cur_protection,
11458				      max_protection,
11459				      inheritance,
11460				      target_map->hdr.
11461				      entries_pageable);
11462
11463	if (result != KERN_SUCCESS) {
11464		return result;
11465	}
11466
11467	/*
11468	 * Allocate/check a range of free virtual address
11469	 * space for the target
11470	 */
11471	*address = vm_map_trunc_page(*address);
11472	vm_map_lock(target_map);
11473	result = vm_map_remap_range_allocate(target_map, address, size,
11474					     mask, flags, &insp_entry);
11475
11476	for (entry = map_header.links.next;
11477	     entry != (struct vm_map_entry *)&map_header.links;
11478	     entry = new_entry) {
11479		new_entry = entry->vme_next;
11480		_vm_map_store_entry_unlink(&map_header, entry);
11481		if (result == KERN_SUCCESS) {
11482			entry->vme_start += *address;
11483			entry->vme_end += *address;
11484			vm_map_store_entry_link(target_map, insp_entry, entry);
11485			insp_entry = entry;
11486		} else {
11487			if (!entry->is_sub_map) {
11488				vm_object_deallocate(entry->object.vm_object);
11489			} else {
11490				vm_map_deallocate(entry->object.sub_map);
11491			}
11492			_vm_map_entry_dispose(&map_header, entry);
11493		}
11494	}
11495
11496	if( target_map->disable_vmentry_reuse == TRUE) {
11497		if( target_map->highest_entry_end < insp_entry->vme_end ){
11498			target_map->highest_entry_end = insp_entry->vme_end;
11499		}
11500	}
11501
11502	if (result == KERN_SUCCESS) {
11503		target_map->size += size;
11504		SAVE_HINT_MAP_WRITE(target_map, insp_entry);
11505	}
11506	vm_map_unlock(target_map);
11507
11508	if (result == KERN_SUCCESS && target_map->wiring_required)
11509		result = vm_map_wire(target_map, *address,
11510				     *address + size, *cur_protection, TRUE);
11511	return result;
11512}
11513
11514/*
11515 *	Routine:	vm_map_remap_range_allocate
11516 *
11517 *	Description:
11518 *		Allocate a range in the specified virtual address map.
11519 *		returns the address and the map entry just before the allocated
11520 *		range
11521 *
11522 *	Map must be locked.
11523 */
11524
11525static kern_return_t
11526vm_map_remap_range_allocate(
11527	vm_map_t		map,
11528	vm_map_address_t	*address,	/* IN/OUT */
11529	vm_map_size_t		size,
11530	vm_map_offset_t		mask,
11531	int			flags,
11532	vm_map_entry_t		*map_entry)	/* OUT */
11533{
11534	vm_map_entry_t	entry;
11535	vm_map_offset_t	start;
11536	vm_map_offset_t	end;
11537	kern_return_t	kr;
11538
11539StartAgain: ;
11540
11541	start = *address;
11542
11543	if (flags & VM_FLAGS_ANYWHERE)
11544	{
11545		/*
11546		 *	Calculate the first possible address.
11547		 */
11548
11549		if (start < map->min_offset)
11550			start = map->min_offset;
11551		if (start > map->max_offset)
11552			return(KERN_NO_SPACE);
11553
11554		/*
11555		 *	Look for the first possible address;
11556		 *	if there's already something at this
11557		 *	address, we have to start after it.
11558		 */
11559
11560		if( map->disable_vmentry_reuse == TRUE) {
11561			VM_MAP_HIGHEST_ENTRY(map, entry, start);
11562		} else {
11563			assert(first_free_is_valid(map));
11564			if (start == map->min_offset) {
11565				if ((entry = map->first_free) != vm_map_to_entry(map))
11566					start = entry->vme_end;
11567			} else {
11568				vm_map_entry_t	tmp_entry;
11569				if (vm_map_lookup_entry(map, start, &tmp_entry))
11570					start = tmp_entry->vme_end;
11571				entry = tmp_entry;
11572			}
11573		}
11574
11575		/*
11576		 *	In any case, the "entry" always precedes
11577		 *	the proposed new region throughout the
11578		 *	loop:
11579		 */
11580
11581		while (TRUE) {
11582			register vm_map_entry_t	next;
11583
11584			/*
11585			 *	Find the end of the proposed new region.
11586			 *	Be sure we didn't go beyond the end, or
11587			 *	wrap around the address.
11588			 */
11589
11590			end = ((start + mask) & ~mask);
11591			if (end < start)
11592				return(KERN_NO_SPACE);
11593			start = end;
11594			end += size;
11595
11596			if ((end > map->max_offset) || (end < start)) {
11597				if (map->wait_for_space) {
11598					if (size <= (map->max_offset -
11599						     map->min_offset)) {
11600						assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
11601						vm_map_unlock(map);
11602						thread_block(THREAD_CONTINUE_NULL);
11603						vm_map_lock(map);
11604						goto StartAgain;
11605					}
11606				}
11607
11608				return(KERN_NO_SPACE);
11609			}
11610
11611			/*
11612			 *	If there are no more entries, we must win.
11613			 */
11614
11615			next = entry->vme_next;
11616			if (next == vm_map_to_entry(map))
11617				break;
11618
11619			/*
11620			 *	If there is another entry, it must be
11621			 *	after the end of the potential new region.
11622			 */
11623
11624			if (next->vme_start >= end)
11625				break;
11626
11627			/*
11628			 *	Didn't fit -- move to the next entry.
11629			 */
11630
11631			entry = next;
11632			start = entry->vme_end;
11633		}
11634		*address = start;
11635	} else {
11636		vm_map_entry_t		temp_entry;
11637
11638		/*
11639		 *	Verify that:
11640		 *		the address doesn't itself violate
11641		 *		the mask requirement.
11642		 */
11643
11644		if ((start & mask) != 0)
11645			return(KERN_NO_SPACE);
11646
11647
11648		/*
11649		 *	...	the address is within bounds
11650		 */
11651
11652		end = start + size;
11653
11654		if ((start < map->min_offset) ||
11655		    (end > map->max_offset) ||
11656		    (start >= end)) {
11657			return(KERN_INVALID_ADDRESS);
11658		}
11659
11660		/*
11661		 * If we're asked to overwrite whatever was mapped in that
11662		 * range, first deallocate that range.
11663		 */
11664		if (flags & VM_FLAGS_OVERWRITE) {
11665			vm_map_t zap_map;
11666
11667			/*
11668			 * We use a "zap_map" to avoid having to unlock
11669			 * the "map" in vm_map_delete(), which would compromise
11670			 * the atomicity of the "deallocate" and then "remap"
11671			 * combination.
11672			 */
11673			zap_map = vm_map_create(PMAP_NULL,
11674						start,
11675						end,
11676						map->hdr.entries_pageable);
11677			if (zap_map == VM_MAP_NULL) {
11678				return KERN_RESOURCE_SHORTAGE;
11679			}
11680
11681			kr = vm_map_delete(map, start, end,
11682					   VM_MAP_REMOVE_SAVE_ENTRIES,
11683					   zap_map);
11684			if (kr == KERN_SUCCESS) {
11685				vm_map_destroy(zap_map,
11686					       VM_MAP_REMOVE_NO_PMAP_CLEANUP);
11687				zap_map = VM_MAP_NULL;
11688			}
11689		}
11690
11691		/*
11692		 *	...	the starting address isn't allocated
11693		 */
11694
11695		if (vm_map_lookup_entry(map, start, &temp_entry))
11696			return(KERN_NO_SPACE);
11697
11698		entry = temp_entry;
11699
11700		/*
11701		 *	...	the next region doesn't overlap the
11702		 *		end point.
11703		 */
11704
11705		if ((entry->vme_next != vm_map_to_entry(map)) &&
11706		    (entry->vme_next->vme_start < end))
11707			return(KERN_NO_SPACE);
11708	}
11709	*map_entry = entry;
11710	return(KERN_SUCCESS);
11711}
11712
11713/*
11714 *	vm_map_switch:
11715 *
11716 *	Set the address map for the current thread to the specified map
11717 */
11718
11719vm_map_t
11720vm_map_switch(
11721	vm_map_t	map)
11722{
11723	int		mycpu;
11724	thread_t	thread = current_thread();
11725	vm_map_t	oldmap = thread->map;
11726
11727	mp_disable_preemption();
11728	mycpu = cpu_number();
11729
11730	/*
11731	 *	Deactivate the current map and activate the requested map
11732	 */
11733	PMAP_SWITCH_USER(thread, map, mycpu);
11734
11735	mp_enable_preemption();
11736	return(oldmap);
11737}
11738
11739
11740/*
11741 *	Routine:	vm_map_write_user
11742 *
11743 *	Description:
11744 *		Copy out data from a kernel space into space in the
11745 *		destination map. The space must already exist in the
11746 *		destination map.
11747 *		NOTE:  This routine should only be called by threads
11748 *		which can block on a page fault. i.e. kernel mode user
11749 *		threads.
11750 *
11751 */
11752kern_return_t
11753vm_map_write_user(
11754	vm_map_t		map,
11755	void			*src_p,
11756	vm_map_address_t	dst_addr,
11757	vm_size_t		size)
11758{
11759	kern_return_t	kr = KERN_SUCCESS;
11760
11761	if(current_map() == map) {
11762		if (copyout(src_p, dst_addr, size)) {
11763			kr = KERN_INVALID_ADDRESS;
11764		}
11765	} else {
11766		vm_map_t	oldmap;
11767
11768		/* take on the identity of the target map while doing */
11769		/* the transfer */
11770
11771		vm_map_reference(map);
11772		oldmap = vm_map_switch(map);
11773		if (copyout(src_p, dst_addr, size)) {
11774			kr = KERN_INVALID_ADDRESS;
11775		}
11776		vm_map_switch(oldmap);
11777		vm_map_deallocate(map);
11778	}
11779	return kr;
11780}
11781
11782/*
11783 *	Routine:	vm_map_read_user
11784 *
11785 *	Description:
11786 *		Copy in data from a user space source map into the
11787 *		kernel map. The space must already exist in the
11788 *		kernel map.
11789 *		NOTE:  This routine should only be called by threads
11790 *		which can block on a page fault. i.e. kernel mode user
11791 *		threads.
11792 *
11793 */
11794kern_return_t
11795vm_map_read_user(
11796	vm_map_t		map,
11797	vm_map_address_t	src_addr,
11798	void			*dst_p,
11799	vm_size_t		size)
11800{
11801	kern_return_t	kr = KERN_SUCCESS;
11802
11803	if(current_map() == map) {
11804		if (copyin(src_addr, dst_p, size)) {
11805			kr = KERN_INVALID_ADDRESS;
11806		}
11807	} else {
11808		vm_map_t	oldmap;
11809
11810		/* take on the identity of the target map while doing */
11811		/* the transfer */
11812
11813		vm_map_reference(map);
11814		oldmap = vm_map_switch(map);
11815		if (copyin(src_addr, dst_p, size)) {
11816			kr = KERN_INVALID_ADDRESS;
11817		}
11818		vm_map_switch(oldmap);
11819		vm_map_deallocate(map);
11820	}
11821	return kr;
11822}
11823
11824
11825/*
11826 *	vm_map_check_protection:
11827 *
11828 *	Assert that the target map allows the specified
11829 *	privilege on the entire address region given.
11830 *	The entire region must be allocated.
11831 */
11832boolean_t
11833vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
11834			vm_map_offset_t end, vm_prot_t protection)
11835{
11836	vm_map_entry_t entry;
11837	vm_map_entry_t tmp_entry;
11838
11839	vm_map_lock(map);
11840
11841	if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
11842	{
11843		vm_map_unlock(map);
11844		return (FALSE);
11845	}
11846
11847	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11848		vm_map_unlock(map);
11849		return(FALSE);
11850	}
11851
11852	entry = tmp_entry;
11853
11854	while (start < end) {
11855		if (entry == vm_map_to_entry(map)) {
11856			vm_map_unlock(map);
11857			return(FALSE);
11858		}
11859
11860		/*
11861		 *	No holes allowed!
11862		 */
11863
11864		if (start < entry->vme_start) {
11865			vm_map_unlock(map);
11866			return(FALSE);
11867		}
11868
11869		/*
11870		 * Check protection associated with entry.
11871		 */
11872
11873		if ((entry->protection & protection) != protection) {
11874			vm_map_unlock(map);
11875			return(FALSE);
11876		}
11877
11878		/* go to next entry */
11879
11880		start = entry->vme_end;
11881		entry = entry->vme_next;
11882	}
11883	vm_map_unlock(map);
11884	return(TRUE);
11885}
11886
11887kern_return_t
11888vm_map_purgable_control(
11889	vm_map_t		map,
11890	vm_map_offset_t		address,
11891	vm_purgable_t		control,
11892	int			*state)
11893{
11894	vm_map_entry_t		entry;
11895	vm_object_t		object;
11896	kern_return_t		kr;
11897
11898	/*
11899	 * Vet all the input parameters and current type and state of the
11900	 * underlaying object.  Return with an error if anything is amiss.
11901	 */
11902	if (map == VM_MAP_NULL)
11903		return(KERN_INVALID_ARGUMENT);
11904
11905	if (control != VM_PURGABLE_SET_STATE &&
11906	    control != VM_PURGABLE_GET_STATE &&
11907	    control != VM_PURGABLE_PURGE_ALL)
11908		return(KERN_INVALID_ARGUMENT);
11909
11910	if (control == VM_PURGABLE_PURGE_ALL) {
11911		vm_purgeable_object_purge_all();
11912		return KERN_SUCCESS;
11913	}
11914
11915	if (control == VM_PURGABLE_SET_STATE &&
11916	    (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
11917	     ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
11918		return(KERN_INVALID_ARGUMENT);
11919
11920	vm_map_lock_read(map);
11921
11922	if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
11923
11924		/*
11925		 * Must pass a valid non-submap address.
11926		 */
11927		vm_map_unlock_read(map);
11928		return(KERN_INVALID_ADDRESS);
11929	}
11930
11931	if ((entry->protection & VM_PROT_WRITE) == 0) {
11932		/*
11933		 * Can't apply purgable controls to something you can't write.
11934		 */
11935		vm_map_unlock_read(map);
11936		return(KERN_PROTECTION_FAILURE);
11937	}
11938
11939	object = entry->object.vm_object;
11940	if (object == VM_OBJECT_NULL) {
11941		/*
11942		 * Object must already be present or it can't be purgable.
11943		 */
11944		vm_map_unlock_read(map);
11945		return KERN_INVALID_ARGUMENT;
11946	}
11947
11948	vm_object_lock(object);
11949
11950	if (entry->offset != 0 ||
11951	    entry->vme_end - entry->vme_start != object->vo_size) {
11952		/*
11953		 * Can only apply purgable controls to the whole (existing)
11954		 * object at once.
11955		 */
11956		vm_map_unlock_read(map);
11957		vm_object_unlock(object);
11958		return KERN_INVALID_ARGUMENT;
11959	}
11960
11961	vm_map_unlock_read(map);
11962
11963	kr = vm_object_purgable_control(object, control, state);
11964
11965	vm_object_unlock(object);
11966
11967	return kr;
11968}
11969
11970kern_return_t
11971vm_map_page_query_internal(
11972	vm_map_t	target_map,
11973	vm_map_offset_t	offset,
11974	int		*disposition,
11975	int		*ref_count)
11976{
11977	kern_return_t			kr;
11978	vm_page_info_basic_data_t	info;
11979	mach_msg_type_number_t		count;
11980
11981	count = VM_PAGE_INFO_BASIC_COUNT;
11982	kr = vm_map_page_info(target_map,
11983			      offset,
11984			      VM_PAGE_INFO_BASIC,
11985			      (vm_page_info_t) &info,
11986			      &count);
11987	if (kr == KERN_SUCCESS) {
11988		*disposition = info.disposition;
11989		*ref_count = info.ref_count;
11990	} else {
11991		*disposition = 0;
11992		*ref_count = 0;
11993	}
11994
11995	return kr;
11996}
11997
11998kern_return_t
11999vm_map_page_info(
12000	vm_map_t		map,
12001	vm_map_offset_t		offset,
12002	vm_page_info_flavor_t	flavor,
12003	vm_page_info_t		info,
12004	mach_msg_type_number_t	*count)
12005{
12006	vm_map_entry_t		map_entry;
12007	vm_object_t		object;
12008	vm_page_t		m;
12009	kern_return_t		kr;
12010	kern_return_t		retval = KERN_SUCCESS;
12011	boolean_t		top_object;
12012	int			disposition;
12013	int 			ref_count;
12014	vm_object_id_t		object_id;
12015	vm_page_info_basic_t	basic_info;
12016	int			depth;
12017	vm_map_offset_t		offset_in_page;
12018
12019	switch (flavor) {
12020	case VM_PAGE_INFO_BASIC:
12021		if (*count != VM_PAGE_INFO_BASIC_COUNT) {
12022			/*
12023			 * The "vm_page_info_basic_data" structure was not
12024			 * properly padded, so allow the size to be off by
12025			 * one to maintain backwards binary compatibility...
12026			 */
12027			if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
12028				return KERN_INVALID_ARGUMENT;
12029		}
12030		break;
12031	default:
12032		return KERN_INVALID_ARGUMENT;
12033	}
12034
12035	disposition = 0;
12036	ref_count = 0;
12037	object_id = 0;
12038	top_object = TRUE;
12039	depth = 0;
12040
12041	retval = KERN_SUCCESS;
12042	offset_in_page = offset & PAGE_MASK;
12043	offset = vm_map_trunc_page(offset);
12044
12045	vm_map_lock_read(map);
12046
12047	/*
12048	 * First, find the map entry covering "offset", going down
12049	 * submaps if necessary.
12050	 */
12051	for (;;) {
12052		if (!vm_map_lookup_entry(map, offset, &map_entry)) {
12053			vm_map_unlock_read(map);
12054			return KERN_INVALID_ADDRESS;
12055		}
12056		/* compute offset from this map entry's start */
12057		offset -= map_entry->vme_start;
12058		/* compute offset into this map entry's object (or submap) */
12059		offset += map_entry->offset;
12060
12061		if (map_entry->is_sub_map) {
12062			vm_map_t sub_map;
12063
12064			sub_map = map_entry->object.sub_map;
12065			vm_map_lock_read(sub_map);
12066			vm_map_unlock_read(map);
12067
12068			map = sub_map;
12069
12070			ref_count = MAX(ref_count, map->ref_count);
12071			continue;
12072		}
12073		break;
12074	}
12075
12076	object = map_entry->object.vm_object;
12077	if (object == VM_OBJECT_NULL) {
12078		/* no object -> no page */
12079		vm_map_unlock_read(map);
12080		goto done;
12081	}
12082
12083	vm_object_lock(object);
12084	vm_map_unlock_read(map);
12085
12086	/*
12087	 * Go down the VM object shadow chain until we find the page
12088	 * we're looking for.
12089	 */
12090	for (;;) {
12091		ref_count = MAX(ref_count, object->ref_count);
12092
12093		m = vm_page_lookup(object, offset);
12094
12095		if (m != VM_PAGE_NULL) {
12096			disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
12097			break;
12098		} else {
12099#if MACH_PAGEMAP
12100			if (object->existence_map) {
12101				if (vm_external_state_get(object->existence_map,
12102							  offset) ==
12103				    VM_EXTERNAL_STATE_EXISTS) {
12104					/*
12105					 * this page has been paged out
12106					 */
12107				        disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12108					break;
12109				}
12110			} else
12111#endif
12112			{
12113				if (object->internal &&
12114				    object->alive &&
12115				    !object->terminating &&
12116				    object->pager_ready) {
12117
12118					memory_object_t pager;
12119
12120					vm_object_paging_begin(object);
12121					pager = object->pager;
12122					vm_object_unlock(object);
12123
12124					/*
12125					 * Ask the default pager if
12126					 * it has this page.
12127					 */
12128					kr = memory_object_data_request(
12129						pager,
12130						offset + object->paging_offset,
12131						0, /* just poke the pager */
12132						VM_PROT_READ,
12133						NULL);
12134
12135					vm_object_lock(object);
12136					vm_object_paging_end(object);
12137
12138					if (kr == KERN_SUCCESS) {
12139						/* the default pager has it */
12140						disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12141						break;
12142					}
12143				}
12144			}
12145
12146			if (object->shadow != VM_OBJECT_NULL) {
12147			        vm_object_t shadow;
12148
12149				offset += object->vo_shadow_offset;
12150				shadow = object->shadow;
12151
12152				vm_object_lock(shadow);
12153				vm_object_unlock(object);
12154
12155				object = shadow;
12156				top_object = FALSE;
12157				depth++;
12158			} else {
12159//			        if (!object->internal)
12160//				        break;
12161//				retval = KERN_FAILURE;
12162//				goto done_with_object;
12163				break;
12164			}
12165		}
12166	}
12167	/* The ref_count is not strictly accurate, it measures the number   */
12168	/* of entities holding a ref on the object, they may not be mapping */
12169	/* the object or may not be mapping the section holding the         */
12170	/* target page but its still a ball park number and though an over- */
12171	/* count, it picks up the copy-on-write cases                       */
12172
12173	/* We could also get a picture of page sharing from pmap_attributes */
12174	/* but this would under count as only faulted-in mappings would     */
12175	/* show up.							    */
12176
12177	if (top_object == TRUE && object->shadow)
12178		disposition |= VM_PAGE_QUERY_PAGE_COPIED;
12179
12180	if (! object->internal)
12181		disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
12182
12183	if (m == VM_PAGE_NULL)
12184	        goto done_with_object;
12185
12186	if (m->fictitious) {
12187		disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
12188		goto done_with_object;
12189	}
12190	if (m->dirty || pmap_is_modified(m->phys_page))
12191		disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
12192
12193	if (m->reference || pmap_is_referenced(m->phys_page))
12194		disposition |= VM_PAGE_QUERY_PAGE_REF;
12195
12196	if (m->speculative)
12197		disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
12198
12199	if (m->cs_validated)
12200		disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
12201	if (m->cs_tainted)
12202		disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
12203
12204done_with_object:
12205	vm_object_unlock(object);
12206done:
12207
12208	switch (flavor) {
12209	case VM_PAGE_INFO_BASIC:
12210		basic_info = (vm_page_info_basic_t) info;
12211		basic_info->disposition = disposition;
12212		basic_info->ref_count = ref_count;
12213		basic_info->object_id = (vm_object_id_t) (uintptr_t) object;
12214		basic_info->offset =
12215			(memory_object_offset_t) offset + offset_in_page;
12216		basic_info->depth = depth;
12217		break;
12218	}
12219
12220	return retval;
12221}
12222
12223/*
12224 *	vm_map_msync
12225 *
12226 *	Synchronises the memory range specified with its backing store
12227 *	image by either flushing or cleaning the contents to the appropriate
12228 *	memory manager engaging in a memory object synchronize dialog with
12229 *	the manager.  The client doesn't return until the manager issues
12230 *	m_o_s_completed message.  MIG Magically converts user task parameter
12231 *	to the task's address map.
12232 *
12233 *	interpretation of sync_flags
12234 *	VM_SYNC_INVALIDATE	- discard pages, only return precious
12235 *				  pages to manager.
12236 *
12237 *	VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
12238 *				- discard pages, write dirty or precious
12239 *				  pages back to memory manager.
12240 *
12241 *	VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
12242 *				- write dirty or precious pages back to
12243 *				  the memory manager.
12244 *
12245 *	VM_SYNC_CONTIGUOUS	- does everything normally, but if there
12246 *				  is a hole in the region, and we would
12247 *				  have returned KERN_SUCCESS, return
12248 *				  KERN_INVALID_ADDRESS instead.
12249 *
12250 *	NOTE
12251 *	The memory object attributes have not yet been implemented, this
12252 *	function will have to deal with the invalidate attribute
12253 *
12254 *	RETURNS
12255 *	KERN_INVALID_TASK		Bad task parameter
12256 *	KERN_INVALID_ARGUMENT		both sync and async were specified.
12257 *	KERN_SUCCESS			The usual.
12258 *	KERN_INVALID_ADDRESS		There was a hole in the region.
12259 */
12260
12261kern_return_t
12262vm_map_msync(
12263	vm_map_t		map,
12264	vm_map_address_t	address,
12265	vm_map_size_t		size,
12266	vm_sync_t		sync_flags)
12267{
12268	msync_req_t		msr;
12269	msync_req_t		new_msr;
12270	queue_chain_t		req_q;	/* queue of requests for this msync */
12271	vm_map_entry_t		entry;
12272	vm_map_size_t		amount_left;
12273	vm_object_offset_t	offset;
12274	boolean_t		do_sync_req;
12275	boolean_t		had_hole = FALSE;
12276	memory_object_t		pager;
12277
12278	if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
12279	    (sync_flags & VM_SYNC_SYNCHRONOUS))
12280		return(KERN_INVALID_ARGUMENT);
12281
12282	/*
12283	 * align address and size on page boundaries
12284	 */
12285	size = vm_map_round_page(address + size) - vm_map_trunc_page(address);
12286	address = vm_map_trunc_page(address);
12287
12288        if (map == VM_MAP_NULL)
12289                return(KERN_INVALID_TASK);
12290
12291	if (size == 0)
12292		return(KERN_SUCCESS);
12293
12294	queue_init(&req_q);
12295	amount_left = size;
12296
12297	while (amount_left > 0) {
12298		vm_object_size_t	flush_size;
12299		vm_object_t		object;
12300
12301		vm_map_lock(map);
12302		if (!vm_map_lookup_entry(map,
12303					 vm_map_trunc_page(address), &entry)) {
12304
12305			vm_map_size_t	skip;
12306
12307			/*
12308			 * hole in the address map.
12309			 */
12310			had_hole = TRUE;
12311
12312			/*
12313			 * Check for empty map.
12314			 */
12315			if (entry == vm_map_to_entry(map) &&
12316			    entry->vme_next == entry) {
12317				vm_map_unlock(map);
12318				break;
12319			}
12320			/*
12321			 * Check that we don't wrap and that
12322			 * we have at least one real map entry.
12323			 */
12324			if ((map->hdr.nentries == 0) ||
12325			    (entry->vme_next->vme_start < address)) {
12326				vm_map_unlock(map);
12327				break;
12328			}
12329			/*
12330			 * Move up to the next entry if needed
12331			 */
12332			skip = (entry->vme_next->vme_start - address);
12333			if (skip >= amount_left)
12334				amount_left = 0;
12335			else
12336				amount_left -= skip;
12337			address = entry->vme_next->vme_start;
12338			vm_map_unlock(map);
12339			continue;
12340		}
12341
12342		offset = address - entry->vme_start;
12343
12344		/*
12345		 * do we have more to flush than is contained in this
12346		 * entry ?
12347		 */
12348		if (amount_left + entry->vme_start + offset > entry->vme_end) {
12349			flush_size = entry->vme_end -
12350				(entry->vme_start + offset);
12351		} else {
12352			flush_size = amount_left;
12353		}
12354		amount_left -= flush_size;
12355		address += flush_size;
12356
12357		if (entry->is_sub_map == TRUE) {
12358			vm_map_t	local_map;
12359			vm_map_offset_t	local_offset;
12360
12361			local_map = entry->object.sub_map;
12362			local_offset = entry->offset;
12363			vm_map_unlock(map);
12364			if (vm_map_msync(
12365				    local_map,
12366				    local_offset,
12367				    flush_size,
12368				    sync_flags) == KERN_INVALID_ADDRESS) {
12369				had_hole = TRUE;
12370			}
12371			continue;
12372		}
12373		object = entry->object.vm_object;
12374
12375		/*
12376		 * We can't sync this object if the object has not been
12377		 * created yet
12378		 */
12379		if (object == VM_OBJECT_NULL) {
12380			vm_map_unlock(map);
12381			continue;
12382		}
12383		offset += entry->offset;
12384
12385                vm_object_lock(object);
12386
12387		if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
12388		        int kill_pages = 0;
12389			boolean_t reusable_pages = FALSE;
12390
12391			if (sync_flags & VM_SYNC_KILLPAGES) {
12392			        if (object->ref_count == 1 && !object->shadow)
12393				        kill_pages = 1;
12394				else
12395				        kill_pages = -1;
12396			}
12397			if (kill_pages != -1)
12398			        vm_object_deactivate_pages(object, offset,
12399							   (vm_object_size_t)flush_size, kill_pages, reusable_pages);
12400			vm_object_unlock(object);
12401			vm_map_unlock(map);
12402			continue;
12403		}
12404		/*
12405		 * We can't sync this object if there isn't a pager.
12406		 * Don't bother to sync internal objects, since there can't
12407		 * be any "permanent" storage for these objects anyway.
12408		 */
12409		if ((object->pager == MEMORY_OBJECT_NULL) ||
12410		    (object->internal) || (object->private)) {
12411			vm_object_unlock(object);
12412			vm_map_unlock(map);
12413			continue;
12414		}
12415		/*
12416		 * keep reference on the object until syncing is done
12417		 */
12418		vm_object_reference_locked(object);
12419		vm_object_unlock(object);
12420
12421		vm_map_unlock(map);
12422
12423		do_sync_req = vm_object_sync(object,
12424					     offset,
12425					     flush_size,
12426					     sync_flags & VM_SYNC_INVALIDATE,
12427					     ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
12428					      (sync_flags & VM_SYNC_ASYNCHRONOUS)),
12429					     sync_flags & VM_SYNC_SYNCHRONOUS);
12430		/*
12431		 * only send a m_o_s if we returned pages or if the entry
12432		 * is writable (ie dirty pages may have already been sent back)
12433		 */
12434		if (!do_sync_req) {
12435			if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
12436				/*
12437				 * clear out the clustering and read-ahead hints
12438				 */
12439				vm_object_lock(object);
12440
12441				object->pages_created = 0;
12442				object->pages_used = 0;
12443				object->sequential = 0;
12444				object->last_alloc = 0;
12445
12446				vm_object_unlock(object);
12447			}
12448			vm_object_deallocate(object);
12449			continue;
12450		}
12451		msync_req_alloc(new_msr);
12452
12453                vm_object_lock(object);
12454		offset += object->paging_offset;
12455
12456		new_msr->offset = offset;
12457		new_msr->length = flush_size;
12458		new_msr->object = object;
12459		new_msr->flag = VM_MSYNC_SYNCHRONIZING;
12460	re_iterate:
12461
12462		/*
12463		 * We can't sync this object if there isn't a pager.  The
12464		 * pager can disappear anytime we're not holding the object
12465		 * lock.  So this has to be checked anytime we goto re_iterate.
12466		 */
12467
12468		pager = object->pager;
12469
12470		if (pager == MEMORY_OBJECT_NULL) {
12471			vm_object_unlock(object);
12472			vm_object_deallocate(object);
12473			continue;
12474		}
12475
12476		queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
12477			/*
12478			 * need to check for overlapping entry, if found, wait
12479			 * on overlapping msr to be done, then reiterate
12480			 */
12481			msr_lock(msr);
12482			if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
12483			    ((offset >= msr->offset &&
12484			      offset < (msr->offset + msr->length)) ||
12485			     (msr->offset >= offset &&
12486			      msr->offset < (offset + flush_size))))
12487			{
12488				assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
12489				msr_unlock(msr);
12490				vm_object_unlock(object);
12491				thread_block(THREAD_CONTINUE_NULL);
12492				vm_object_lock(object);
12493				goto re_iterate;
12494			}
12495			msr_unlock(msr);
12496		}/* queue_iterate */
12497
12498		queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
12499
12500		vm_object_paging_begin(object);
12501		vm_object_unlock(object);
12502
12503		queue_enter(&req_q, new_msr, msync_req_t, req_q);
12504
12505		(void) memory_object_synchronize(
12506			pager,
12507			offset,
12508			flush_size,
12509			sync_flags & ~VM_SYNC_CONTIGUOUS);
12510
12511		vm_object_lock(object);
12512		vm_object_paging_end(object);
12513		vm_object_unlock(object);
12514	}/* while */
12515
12516	/*
12517	 * wait for memory_object_sychronize_completed messages from pager(s)
12518	 */
12519
12520	while (!queue_empty(&req_q)) {
12521		msr = (msync_req_t)queue_first(&req_q);
12522		msr_lock(msr);
12523		while(msr->flag != VM_MSYNC_DONE) {
12524			assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
12525			msr_unlock(msr);
12526			thread_block(THREAD_CONTINUE_NULL);
12527			msr_lock(msr);
12528		}/* while */
12529		queue_remove(&req_q, msr, msync_req_t, req_q);
12530		msr_unlock(msr);
12531		vm_object_deallocate(msr->object);
12532		msync_req_free(msr);
12533	}/* queue_iterate */
12534
12535	/* for proper msync() behaviour */
12536	if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
12537		return(KERN_INVALID_ADDRESS);
12538
12539	return(KERN_SUCCESS);
12540}/* vm_msync */
12541
12542/*
12543 *	Routine:	convert_port_entry_to_map
12544 *	Purpose:
12545 *		Convert from a port specifying an entry or a task
12546 *		to a map. Doesn't consume the port ref; produces a map ref,
12547 *		which may be null.  Unlike convert_port_to_map, the
12548 *		port may be task or a named entry backed.
12549 *	Conditions:
12550 *		Nothing locked.
12551 */
12552
12553
12554vm_map_t
12555convert_port_entry_to_map(
12556	ipc_port_t	port)
12557{
12558	vm_map_t map;
12559	vm_named_entry_t	named_entry;
12560	uint32_t	try_failed_count = 0;
12561
12562	if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12563		while(TRUE) {
12564			ip_lock(port);
12565			if(ip_active(port) && (ip_kotype(port)
12566					       == IKOT_NAMED_ENTRY)) {
12567				named_entry =
12568					(vm_named_entry_t)port->ip_kobject;
12569				if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12570                       			ip_unlock(port);
12571
12572					try_failed_count++;
12573                       			mutex_pause(try_failed_count);
12574                       			continue;
12575                		}
12576				named_entry->ref_count++;
12577				lck_mtx_unlock(&(named_entry)->Lock);
12578				ip_unlock(port);
12579				if ((named_entry->is_sub_map) &&
12580				    (named_entry->protection
12581				     & VM_PROT_WRITE)) {
12582					map = named_entry->backing.map;
12583				} else {
12584					mach_destroy_memory_entry(port);
12585					return VM_MAP_NULL;
12586				}
12587				vm_map_reference_swap(map);
12588				mach_destroy_memory_entry(port);
12589				break;
12590			}
12591			else
12592				return VM_MAP_NULL;
12593		}
12594	}
12595	else
12596		map = convert_port_to_map(port);
12597
12598	return map;
12599}
12600
12601/*
12602 *	Routine:	convert_port_entry_to_object
12603 *	Purpose:
12604 *		Convert from a port specifying a named entry to an
12605 *		object. Doesn't consume the port ref; produces a map ref,
12606 *		which may be null.
12607 *	Conditions:
12608 *		Nothing locked.
12609 */
12610
12611
12612vm_object_t
12613convert_port_entry_to_object(
12614	ipc_port_t	port)
12615{
12616	vm_object_t object;
12617	vm_named_entry_t	named_entry;
12618	uint32_t	try_failed_count = 0;
12619
12620	if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12621		while(TRUE) {
12622			ip_lock(port);
12623			if(ip_active(port) && (ip_kotype(port)
12624					       == IKOT_NAMED_ENTRY)) {
12625				named_entry =
12626					(vm_named_entry_t)port->ip_kobject;
12627				if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12628                       			ip_unlock(port);
12629
12630					try_failed_count++;
12631                       			mutex_pause(try_failed_count);
12632                       			continue;
12633                		}
12634				named_entry->ref_count++;
12635				lck_mtx_unlock(&(named_entry)->Lock);
12636				ip_unlock(port);
12637				if ((!named_entry->is_sub_map) &&
12638				    (!named_entry->is_pager) &&
12639				    (named_entry->protection
12640				     & VM_PROT_WRITE)) {
12641					object = named_entry->backing.object;
12642				} else {
12643					mach_destroy_memory_entry(port);
12644					return (vm_object_t)NULL;
12645				}
12646				vm_object_reference(named_entry->backing.object);
12647				mach_destroy_memory_entry(port);
12648				break;
12649			}
12650			else
12651				return (vm_object_t)NULL;
12652		}
12653	} else {
12654		return (vm_object_t)NULL;
12655	}
12656
12657	return object;
12658}
12659
12660/*
12661 * Export routines to other components for the things we access locally through
12662 * macros.
12663 */
12664#undef current_map
12665vm_map_t
12666current_map(void)
12667{
12668	return (current_map_fast());
12669}
12670
12671/*
12672 *	vm_map_reference:
12673 *
12674 *	Most code internal to the osfmk will go through a
12675 *	macro defining this.  This is always here for the
12676 *	use of other kernel components.
12677 */
12678#undef vm_map_reference
12679void
12680vm_map_reference(
12681	register vm_map_t	map)
12682{
12683	if (map == VM_MAP_NULL)
12684		return;
12685
12686	lck_mtx_lock(&map->s_lock);
12687#if	TASK_SWAPPER
12688	assert(map->res_count > 0);
12689	assert(map->ref_count >= map->res_count);
12690	map->res_count++;
12691#endif
12692	map->ref_count++;
12693	lck_mtx_unlock(&map->s_lock);
12694}
12695
12696/*
12697 *	vm_map_deallocate:
12698 *
12699 *	Removes a reference from the specified map,
12700 *	destroying it if no references remain.
12701 *	The map should not be locked.
12702 */
12703void
12704vm_map_deallocate(
12705	register vm_map_t	map)
12706{
12707	unsigned int		ref;
12708
12709	if (map == VM_MAP_NULL)
12710		return;
12711
12712	lck_mtx_lock(&map->s_lock);
12713	ref = --map->ref_count;
12714	if (ref > 0) {
12715		vm_map_res_deallocate(map);
12716		lck_mtx_unlock(&map->s_lock);
12717		return;
12718	}
12719	assert(map->ref_count == 0);
12720	lck_mtx_unlock(&map->s_lock);
12721
12722#if	TASK_SWAPPER
12723	/*
12724	 * The map residence count isn't decremented here because
12725	 * the vm_map_delete below will traverse the entire map,
12726	 * deleting entries, and the residence counts on objects
12727	 * and sharing maps will go away then.
12728	 */
12729#endif
12730
12731	vm_map_destroy(map, VM_MAP_NO_FLAGS);
12732}
12733
12734
12735void
12736vm_map_disable_NX(vm_map_t map)
12737{
12738        if (map == NULL)
12739	        return;
12740        if (map->pmap == NULL)
12741	        return;
12742
12743        pmap_disable_NX(map->pmap);
12744}
12745
12746void
12747vm_map_disallow_data_exec(vm_map_t map)
12748{
12749    if (map == NULL)
12750        return;
12751
12752    map->map_disallow_data_exec = TRUE;
12753}
12754
12755/* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
12756 * more descriptive.
12757 */
12758void
12759vm_map_set_32bit(vm_map_t map)
12760{
12761	map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
12762}
12763
12764
12765void
12766vm_map_set_64bit(vm_map_t map)
12767{
12768	map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
12769}
12770
12771vm_map_offset_t
12772vm_compute_max_offset(unsigned is64)
12773{
12774	return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
12775}
12776
12777boolean_t
12778vm_map_is_64bit(
12779		vm_map_t map)
12780{
12781	return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
12782}
12783
12784boolean_t
12785vm_map_has_hard_pagezero(
12786		vm_map_t 	map,
12787		vm_map_offset_t	pagezero_size)
12788{
12789	/*
12790	 * XXX FBDP
12791	 * We should lock the VM map (for read) here but we can get away
12792	 * with it for now because there can't really be any race condition:
12793	 * the VM map's min_offset is changed only when the VM map is created
12794	 * and when the zero page is established (when the binary gets loaded),
12795	 * and this routine gets called only when the task terminates and the
12796	 * VM map is being torn down, and when a new map is created via
12797	 * load_machfile()/execve().
12798	 */
12799	return (map->min_offset >= pagezero_size);
12800}
12801
12802void
12803vm_map_set_4GB_pagezero(vm_map_t map)
12804{
12805#if defined(__i386__)
12806	pmap_set_4GB_pagezero(map->pmap);
12807#else
12808#pragma unused(map)
12809#endif
12810
12811}
12812
12813void
12814vm_map_clear_4GB_pagezero(vm_map_t map)
12815{
12816#if defined(__i386__)
12817	pmap_clear_4GB_pagezero(map->pmap);
12818#else
12819#pragma unused(map)
12820#endif
12821}
12822
12823/*
12824 * Raise a VM map's maximun offset.
12825 */
12826kern_return_t
12827vm_map_raise_max_offset(
12828	vm_map_t	map,
12829	vm_map_offset_t	new_max_offset)
12830{
12831	kern_return_t	ret;
12832
12833	vm_map_lock(map);
12834	ret = KERN_INVALID_ADDRESS;
12835
12836	if (new_max_offset >= map->max_offset) {
12837		if (!vm_map_is_64bit(map)) {
12838			if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
12839				map->max_offset = new_max_offset;
12840				ret = KERN_SUCCESS;
12841			}
12842		} else {
12843			if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
12844				map->max_offset = new_max_offset;
12845				ret = KERN_SUCCESS;
12846			}
12847		}
12848	}
12849
12850	vm_map_unlock(map);
12851	return ret;
12852}
12853
12854
12855/*
12856 * Raise a VM map's minimum offset.
12857 * To strictly enforce "page zero" reservation.
12858 */
12859kern_return_t
12860vm_map_raise_min_offset(
12861	vm_map_t	map,
12862	vm_map_offset_t	new_min_offset)
12863{
12864	vm_map_entry_t	first_entry;
12865
12866	new_min_offset = vm_map_round_page(new_min_offset);
12867
12868	vm_map_lock(map);
12869
12870	if (new_min_offset < map->min_offset) {
12871		/*
12872		 * Can't move min_offset backwards, as that would expose
12873		 * a part of the address space that was previously, and for
12874		 * possibly good reasons, inaccessible.
12875		 */
12876		vm_map_unlock(map);
12877		return KERN_INVALID_ADDRESS;
12878	}
12879
12880	first_entry = vm_map_first_entry(map);
12881	if (first_entry != vm_map_to_entry(map) &&
12882	    first_entry->vme_start < new_min_offset) {
12883		/*
12884		 * Some memory was already allocated below the new
12885		 * minimun offset.  It's too late to change it now...
12886		 */
12887		vm_map_unlock(map);
12888		return KERN_NO_SPACE;
12889	}
12890
12891	map->min_offset = new_min_offset;
12892
12893	vm_map_unlock(map);
12894
12895	return KERN_SUCCESS;
12896}
12897
12898/*
12899 * Set the limit on the maximum amount of user wired memory allowed for this map.
12900 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
12901 * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
12902 * don't have to reach over to the BSD data structures.
12903 */
12904
12905void
12906vm_map_set_user_wire_limit(vm_map_t 	map,
12907			   vm_size_t	limit)
12908{
12909	map->user_wire_limit = limit;
12910}
12911
12912
12913void vm_map_switch_protect(vm_map_t	map,
12914			   boolean_t	val)
12915{
12916	vm_map_lock(map);
12917	map->switch_protect=val;
12918	vm_map_unlock(map);
12919}
12920
12921/* Add (generate) code signature for memory range */
12922#if CONFIG_DYNAMIC_CODE_SIGNING
12923kern_return_t vm_map_sign(vm_map_t map,
12924		 vm_map_offset_t start,
12925		 vm_map_offset_t end)
12926{
12927	vm_map_entry_t entry;
12928	vm_page_t m;
12929	vm_object_t object;
12930
12931	/*
12932	 * Vet all the input parameters and current type and state of the
12933	 * underlaying object.  Return with an error if anything is amiss.
12934	 */
12935	if (map == VM_MAP_NULL)
12936		return(KERN_INVALID_ARGUMENT);
12937
12938	vm_map_lock_read(map);
12939
12940	if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
12941		/*
12942		 * Must pass a valid non-submap address.
12943		 */
12944		vm_map_unlock_read(map);
12945		return(KERN_INVALID_ADDRESS);
12946	}
12947
12948	if((entry->vme_start > start) || (entry->vme_end < end)) {
12949		/*
12950		 * Map entry doesn't cover the requested range. Not handling
12951		 * this situation currently.
12952		 */
12953		vm_map_unlock_read(map);
12954		return(KERN_INVALID_ARGUMENT);
12955	}
12956
12957	object = entry->object.vm_object;
12958	if (object == VM_OBJECT_NULL) {
12959		/*
12960		 * Object must already be present or we can't sign.
12961		 */
12962		vm_map_unlock_read(map);
12963		return KERN_INVALID_ARGUMENT;
12964	}
12965
12966	vm_object_lock(object);
12967	vm_map_unlock_read(map);
12968
12969	while(start < end) {
12970		uint32_t refmod;
12971
12972		m = vm_page_lookup(object, start - entry->vme_start + entry->offset );
12973		if (m==VM_PAGE_NULL) {
12974			/* shoud we try to fault a page here? we can probably
12975			 * demand it exists and is locked for this request */
12976			vm_object_unlock(object);
12977			return KERN_FAILURE;
12978		}
12979		/* deal with special page status */
12980		if (m->busy ||
12981		    (m->unusual && (m->error || m->restart || m->private || m->absent))) {
12982			vm_object_unlock(object);
12983			return KERN_FAILURE;
12984		}
12985
12986		/* Page is OK... now "validate" it */
12987		/* This is the place where we'll call out to create a code
12988		 * directory, later */
12989		m->cs_validated = TRUE;
12990
12991		/* The page is now "clean" for codesigning purposes. That means
12992		 * we don't consider it as modified (wpmapped) anymore. But
12993		 * we'll disconnect the page so we note any future modification
12994		 * attempts. */
12995		m->wpmapped = FALSE;
12996		refmod = pmap_disconnect(m->phys_page);
12997
12998		/* Pull the dirty status from the pmap, since we cleared the
12999		 * wpmapped bit */
13000		if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
13001			SET_PAGE_DIRTY(m, FALSE);
13002		}
13003
13004		/* On to the next page */
13005		start += PAGE_SIZE;
13006	}
13007	vm_object_unlock(object);
13008
13009	return KERN_SUCCESS;
13010}
13011#endif
13012
13013#if CONFIG_FREEZE
13014
13015kern_return_t vm_map_freeze_walk(
13016             	vm_map_t map,
13017             	unsigned int *purgeable_count,
13018             	unsigned int *wired_count,
13019             	unsigned int *clean_count,
13020             	unsigned int *dirty_count,
13021             	unsigned int  dirty_budget,
13022             	boolean_t *has_shared)
13023{
13024	vm_map_entry_t entry;
13025
13026	vm_map_lock_read(map);
13027
13028	*purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13029	*has_shared = FALSE;
13030
13031	for (entry = vm_map_first_entry(map);
13032	     entry != vm_map_to_entry(map);
13033	     entry = entry->vme_next) {
13034		unsigned int purgeable, clean, dirty, wired;
13035		boolean_t shared;
13036
13037		if ((entry->object.vm_object == 0) ||
13038		    (entry->is_sub_map) ||
13039		    (entry->object.vm_object->phys_contiguous)) {
13040			continue;
13041		}
13042
13043		default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, entry->object.vm_object, NULL);
13044
13045		*purgeable_count += purgeable;
13046		*wired_count += wired;
13047		*clean_count += clean;
13048		*dirty_count += dirty;
13049
13050		if (shared) {
13051			*has_shared = TRUE;
13052		}
13053
13054		/* Adjust pageout budget and finish up if reached */
13055		if (dirty_budget) {
13056			dirty_budget -= dirty;
13057			if (dirty_budget == 0) {
13058				break;
13059			}
13060		}
13061	}
13062
13063	vm_map_unlock_read(map);
13064
13065	return KERN_SUCCESS;
13066}
13067
13068kern_return_t vm_map_freeze(
13069             	vm_map_t map,
13070             	unsigned int *purgeable_count,
13071             	unsigned int *wired_count,
13072             	unsigned int *clean_count,
13073             	unsigned int *dirty_count,
13074             	unsigned int dirty_budget,
13075             	boolean_t *has_shared)
13076{
13077	vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
13078	kern_return_t kr = KERN_SUCCESS;
13079
13080	*purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13081	*has_shared = FALSE;
13082
13083	/*
13084	 * We need the exclusive lock here so that we can
13085	 * block any page faults or lookups while we are
13086	 * in the middle of freezing this vm map.
13087	 */
13088	vm_map_lock(map);
13089
13090	if (map->default_freezer_handle == NULL) {
13091		map->default_freezer_handle = default_freezer_handle_allocate();
13092	}
13093
13094	if ((kr = default_freezer_handle_init(map->default_freezer_handle)) != KERN_SUCCESS) {
13095		/*
13096		 * Can happen if default_freezer_handle passed in is NULL
13097		 * Or, a table has already been allocated and associated
13098		 * with this handle, i.e. the map is already frozen.
13099		 */
13100		goto done;
13101	}
13102
13103	for (entry2 = vm_map_first_entry(map);
13104	     entry2 != vm_map_to_entry(map);
13105	     entry2 = entry2->vme_next) {
13106
13107		vm_object_t	src_object = entry2->object.vm_object;
13108
13109		/* If eligible, scan the entry, moving eligible pages over to our parent object */
13110		if (entry2->object.vm_object && !entry2->is_sub_map && !entry2->object.vm_object->phys_contiguous) {
13111			unsigned int purgeable, clean, dirty, wired;
13112			boolean_t shared;
13113
13114			default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared,
13115							src_object, map->default_freezer_handle);
13116
13117			*purgeable_count += purgeable;
13118			*wired_count += wired;
13119			*clean_count += clean;
13120			*dirty_count += dirty;
13121
13122			/* Adjust pageout budget and finish up if reached */
13123			if (dirty_budget) {
13124				dirty_budget -= dirty;
13125				if (dirty_budget == 0) {
13126					break;
13127				}
13128			}
13129
13130			if (shared) {
13131				*has_shared = TRUE;
13132			}
13133		}
13134	}
13135
13136	/* Finally, throw out the pages to swap */
13137	default_freezer_pageout(map->default_freezer_handle);
13138
13139done:
13140	vm_map_unlock(map);
13141
13142	return kr;
13143}
13144
13145kern_return_t
13146vm_map_thaw(
13147	vm_map_t map)
13148{
13149	kern_return_t kr = KERN_SUCCESS;
13150
13151	vm_map_lock(map);
13152
13153	if (map->default_freezer_handle == NULL) {
13154		/*
13155		 * This map is not in a frozen state.
13156		 */
13157		kr = KERN_FAILURE;
13158		goto out;
13159	}
13160
13161	default_freezer_unpack(map->default_freezer_handle);
13162out:
13163	vm_map_unlock(map);
13164
13165	return kr;
13166}
13167#endif
13168
13169#if !CONFIG_EMBEDDED
13170/*
13171 * vm_map_entry_should_cow_for_true_share:
13172 *
13173 * Determines if the map entry should be clipped and setup for copy-on-write
13174 * to avoid applying "true_share" to a large VM object when only a subset is
13175 * targeted.
13176 *
13177 * For now, we target only the map entries created for the Objective C
13178 * Garbage Collector, which initially have the following properties:
13179 *	- alias == VM_MEMORY_MALLOC
13180 * 	- wired_count == 0
13181 * 	- !needs_copy
13182 * and a VM object with:
13183 * 	- internal
13184 * 	- copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
13185 * 	- !true_share
13186 * 	- vo_size == ANON_CHUNK_SIZE
13187 */
13188boolean_t
13189vm_map_entry_should_cow_for_true_share(
13190	vm_map_entry_t	entry)
13191{
13192	vm_object_t	object;
13193
13194	if (entry->is_sub_map) {
13195		/* entry does not point at a VM object */
13196		return FALSE;
13197	}
13198
13199	if (entry->needs_copy) {
13200		/* already set for copy_on_write: done! */
13201		return FALSE;
13202	}
13203
13204	if (entry->alias != VM_MEMORY_MALLOC) {
13205		/* not tagged as an ObjectiveC's Garbage Collector entry */
13206		return FALSE;
13207	}
13208
13209	if (entry->wired_count) {
13210		/* wired: can't change the map entry... */
13211		return FALSE;
13212	}
13213
13214	object = entry->object.vm_object;
13215
13216	if (object == VM_OBJECT_NULL) {
13217		/* no object yet... */
13218		return FALSE;
13219	}
13220
13221	if (!object->internal) {
13222		/* not an internal object */
13223		return FALSE;
13224	}
13225
13226	if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
13227		/* not the default copy strategy */
13228		return FALSE;
13229	}
13230
13231	if (object->true_share) {
13232		/* already true_share: too late to avoid it */
13233		return FALSE;
13234	}
13235
13236	if (object->vo_size != ANON_CHUNK_SIZE) {
13237		/* not an object created for the ObjC Garbage Collector */
13238		return FALSE;
13239	}
13240
13241	/*
13242	 * All the criteria match: we have a large object being targeted for "true_share".
13243	 * To limit the adverse side-effects linked with "true_share", tell the caller to
13244	 * try and avoid setting up the entire object for "true_share" by clipping the
13245	 * targeted range and setting it up for copy-on-write.
13246	 */
13247	return TRUE;
13248}
13249#endif /* !CONFIG_EMBEDDED */
13250