1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
49 *  School of Computer Science
50 *  Carnegie Mellon University
51 *  Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 *	File:	vm/vm_map.c
60 *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
61 *	Date:	1985
62 *
63 *	Virtual memory mapping module.
64 */
65
66#include <task_swapper.h>
67#include <mach_assert.h>
68
69#include <vm/vm_options.h>
70
71#include <libkern/OSAtomic.h>
72
73#include <mach/kern_return.h>
74#include <mach/port.h>
75#include <mach/vm_attributes.h>
76#include <mach/vm_param.h>
77#include <mach/vm_behavior.h>
78#include <mach/vm_statistics.h>
79#include <mach/memory_object.h>
80#include <mach/mach_vm.h>
81#include <machine/cpu_capabilities.h>
82#include <mach/sdt.h>
83
84#include <kern/assert.h>
85#include <kern/counters.h>
86#include <kern/kalloc.h>
87#include <kern/zalloc.h>
88
89#include <vm/cpm.h>
90#include <vm/vm_compressor_pager.h>
91#include <vm/vm_init.h>
92#include <vm/vm_fault.h>
93#include <vm/vm_map.h>
94#include <vm/vm_object.h>
95#include <vm/vm_page.h>
96#include <vm/vm_pageout.h>
97#include <vm/vm_kern.h>
98#include <ipc/ipc_port.h>
99#include <kern/sched_prim.h>
100#include <kern/misc_protos.h>
101#include <kern/xpr.h>
102
103#include <mach/vm_map_server.h>
104#include <mach/mach_host_server.h>
105#include <vm/vm_protos.h>
106#include <vm/vm_purgeable_internal.h>
107
108#include <vm/vm_protos.h>
109#include <vm/vm_shared_region.h>
110#include <vm/vm_map_store.h>
111
112extern u_int32_t random(void);	/* from <libkern/libkern.h> */
113/* Internal prototypes
114 */
115
116static void vm_map_simplify_range(
117	vm_map_t	map,
118	vm_map_offset_t	start,
119	vm_map_offset_t	end);	/* forward */
120
121static boolean_t	vm_map_range_check(
122	vm_map_t	map,
123	vm_map_offset_t	start,
124	vm_map_offset_t	end,
125	vm_map_entry_t	*entry);
126
127static vm_map_entry_t	_vm_map_entry_create(
128	struct vm_map_header	*map_header, boolean_t map_locked);
129
130static void		_vm_map_entry_dispose(
131	struct vm_map_header	*map_header,
132	vm_map_entry_t		entry);
133
134static void		vm_map_pmap_enter(
135	vm_map_t		map,
136	vm_map_offset_t 	addr,
137	vm_map_offset_t		end_addr,
138	vm_object_t 		object,
139	vm_object_offset_t	offset,
140	vm_prot_t		protection);
141
142static void		_vm_map_clip_end(
143	struct vm_map_header	*map_header,
144	vm_map_entry_t		entry,
145	vm_map_offset_t		end);
146
147static void		_vm_map_clip_start(
148	struct vm_map_header	*map_header,
149	vm_map_entry_t		entry,
150	vm_map_offset_t		start);
151
152static void		vm_map_entry_delete(
153	vm_map_t	map,
154	vm_map_entry_t	entry);
155
156static kern_return_t	vm_map_delete(
157	vm_map_t	map,
158	vm_map_offset_t	start,
159	vm_map_offset_t	end,
160	int		flags,
161	vm_map_t	zap_map);
162
163static kern_return_t	vm_map_copy_overwrite_unaligned(
164	vm_map_t	dst_map,
165	vm_map_entry_t	entry,
166	vm_map_copy_t	copy,
167	vm_map_address_t start,
168	boolean_t	discard_on_success);
169
170static kern_return_t	vm_map_copy_overwrite_aligned(
171	vm_map_t	dst_map,
172	vm_map_entry_t	tmp_entry,
173	vm_map_copy_t	copy,
174	vm_map_offset_t start,
175	pmap_t		pmap);
176
177static kern_return_t	vm_map_copyin_kernel_buffer(
178	vm_map_t	src_map,
179	vm_map_address_t src_addr,
180	vm_map_size_t	len,
181	boolean_t	src_destroy,
182	vm_map_copy_t	*copy_result);  /* OUT */
183
184static kern_return_t	vm_map_copyout_kernel_buffer(
185	vm_map_t	map,
186	vm_map_address_t *addr,	/* IN/OUT */
187	vm_map_copy_t	copy,
188	boolean_t	overwrite,
189	boolean_t	consume_on_success);
190
191static void		vm_map_fork_share(
192	vm_map_t	old_map,
193	vm_map_entry_t	old_entry,
194	vm_map_t	new_map);
195
196static boolean_t	vm_map_fork_copy(
197	vm_map_t	old_map,
198	vm_map_entry_t	*old_entry_p,
199	vm_map_t	new_map);
200
201void		vm_map_region_top_walk(
202	vm_map_entry_t		   entry,
203	vm_region_top_info_t       top);
204
205void		vm_map_region_walk(
206	vm_map_t		   map,
207	vm_map_offset_t		   va,
208	vm_map_entry_t		   entry,
209	vm_object_offset_t	   offset,
210	vm_object_size_t	   range,
211	vm_region_extended_info_t  extended,
212	boolean_t		   look_for_pages,
213	mach_msg_type_number_t count);
214
215static kern_return_t	vm_map_wire_nested(
216	vm_map_t		   map,
217	vm_map_offset_t		   start,
218	vm_map_offset_t		   end,
219	vm_prot_t		   access_type,
220	boolean_t		   user_wire,
221	pmap_t			   map_pmap,
222	vm_map_offset_t		   pmap_addr,
223	ppnum_t			   *physpage_p);
224
225static kern_return_t	vm_map_unwire_nested(
226	vm_map_t		   map,
227	vm_map_offset_t		   start,
228	vm_map_offset_t		   end,
229	boolean_t		   user_wire,
230	pmap_t			   map_pmap,
231	vm_map_offset_t		   pmap_addr);
232
233static kern_return_t	vm_map_overwrite_submap_recurse(
234	vm_map_t		   dst_map,
235	vm_map_offset_t		   dst_addr,
236	vm_map_size_t		   dst_size);
237
238static kern_return_t	vm_map_copy_overwrite_nested(
239	vm_map_t		   dst_map,
240	vm_map_offset_t		   dst_addr,
241	vm_map_copy_t		   copy,
242	boolean_t		   interruptible,
243	pmap_t			   pmap,
244	boolean_t		   discard_on_success);
245
246static kern_return_t	vm_map_remap_extract(
247	vm_map_t		map,
248	vm_map_offset_t		addr,
249	vm_map_size_t		size,
250	boolean_t		copy,
251	struct vm_map_header 	*map_header,
252	vm_prot_t		*cur_protection,
253	vm_prot_t		*max_protection,
254	vm_inherit_t		inheritance,
255	boolean_t		pageable);
256
257static kern_return_t	vm_map_remap_range_allocate(
258	vm_map_t		map,
259	vm_map_address_t	*address,
260	vm_map_size_t		size,
261	vm_map_offset_t		mask,
262	int			flags,
263	vm_map_entry_t		*map_entry);
264
265static void		vm_map_region_look_for_page(
266	vm_map_t		   map,
267	vm_map_offset_t            va,
268	vm_object_t		   object,
269	vm_object_offset_t	   offset,
270	int                        max_refcnt,
271	int                        depth,
272	vm_region_extended_info_t  extended,
273	mach_msg_type_number_t count);
274
275static int		vm_map_region_count_obj_refs(
276	vm_map_entry_t    	   entry,
277	vm_object_t       	   object);
278
279
280static kern_return_t	vm_map_willneed(
281	vm_map_t	map,
282	vm_map_offset_t	start,
283	vm_map_offset_t	end);
284
285static kern_return_t	vm_map_reuse_pages(
286	vm_map_t	map,
287	vm_map_offset_t	start,
288	vm_map_offset_t	end);
289
290static kern_return_t	vm_map_reusable_pages(
291	vm_map_t	map,
292	vm_map_offset_t	start,
293	vm_map_offset_t	end);
294
295static kern_return_t	vm_map_can_reuse(
296	vm_map_t	map,
297	vm_map_offset_t	start,
298	vm_map_offset_t	end);
299
300
301/*
302 * Macros to copy a vm_map_entry. We must be careful to correctly
303 * manage the wired page count. vm_map_entry_copy() creates a new
304 * map entry to the same memory - the wired count in the new entry
305 * must be set to zero. vm_map_entry_copy_full() creates a new
306 * entry that is identical to the old entry.  This preserves the
307 * wire count; it's used for map splitting and zone changing in
308 * vm_map_copyout.
309 */
310
311#define vm_map_entry_copy(NEW,OLD)	\
312MACRO_BEGIN				\
313boolean_t _vmec_reserved = (NEW)->from_reserved_zone;	\
314	*(NEW) = *(OLD);                \
315	(NEW)->is_shared = FALSE;	\
316	(NEW)->needs_wakeup = FALSE;    \
317	(NEW)->in_transition = FALSE;   \
318	(NEW)->wired_count = 0;         \
319	(NEW)->user_wired_count = 0;    \
320	(NEW)->permanent = FALSE;	\
321	(NEW)->used_for_jit = FALSE;	\
322	(NEW)->from_reserved_zone = _vmec_reserved;	\
323	(NEW)->iokit_acct = FALSE;	\
324MACRO_END
325
326#define vm_map_entry_copy_full(NEW,OLD)			\
327MACRO_BEGIN						\
328boolean_t _vmecf_reserved = (NEW)->from_reserved_zone;	\
329(*(NEW) = *(OLD));					\
330(NEW)->from_reserved_zone = _vmecf_reserved;			\
331MACRO_END
332
333/*
334 *	Decide if we want to allow processes to execute from their data or stack areas.
335 *	override_nx() returns true if we do.  Data/stack execution can be enabled independently
336 *	for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
337 *	or allow_stack_exec to enable data execution for that type of data area for that particular
338 *	ABI (or both by or'ing the flags together).  These are initialized in the architecture
339 *	specific pmap files since the default behavior varies according to architecture.  The
340 *	main reason it varies is because of the need to provide binary compatibility with old
341 *	applications that were written before these restrictions came into being.  In the old
342 *	days, an app could execute anything it could read, but this has slowly been tightened
343 *	up over time.  The default behavior is:
344 *
345 *	32-bit PPC apps		may execute from both stack and data areas
346 *	32-bit Intel apps	may exeucte from data areas but not stack
347 *	64-bit PPC/Intel apps	may not execute from either data or stack
348 *
349 *	An application on any architecture may override these defaults by explicitly
350 *	adding PROT_EXEC permission to the page in question with the mprotect(2)
351 *	system call.  This code here just determines what happens when an app tries to
352 * 	execute from a page that lacks execute permission.
353 *
354 *	Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
355 *	default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
356 *	a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
357 *	execution from data areas for a particular binary even if the arch normally permits it. As
358 *	a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
359 *	to support some complicated use cases, notably browsers with out-of-process plugins that
360 *	are not all NX-safe.
361 */
362
363extern int allow_data_exec, allow_stack_exec;
364
365int
366override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
367{
368	int current_abi;
369
370	/*
371	 * Determine if the app is running in 32 or 64 bit mode.
372	 */
373
374	if (vm_map_is_64bit(map))
375		current_abi = VM_ABI_64;
376	else
377		current_abi = VM_ABI_32;
378
379	/*
380	 * Determine if we should allow the execution based on whether it's a
381	 * stack or data area and the current architecture.
382	 */
383
384	if (user_tag == VM_MEMORY_STACK)
385		return allow_stack_exec & current_abi;
386
387	return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
388}
389
390
391/*
392 *	Virtual memory maps provide for the mapping, protection,
393 *	and sharing of virtual memory objects.  In addition,
394 *	this module provides for an efficient virtual copy of
395 *	memory from one map to another.
396 *
397 *	Synchronization is required prior to most operations.
398 *
399 *	Maps consist of an ordered doubly-linked list of simple
400 *	entries; a single hint is used to speed up lookups.
401 *
402 *	Sharing maps have been deleted from this version of Mach.
403 *	All shared objects are now mapped directly into the respective
404 *	maps.  This requires a change in the copy on write strategy;
405 *	the asymmetric (delayed) strategy is used for shared temporary
406 *	objects instead of the symmetric (shadow) strategy.  All maps
407 *	are now "top level" maps (either task map, kernel map or submap
408 *	of the kernel map).
409 *
410 *	Since portions of maps are specified by start/end addreses,
411 *	which may not align with existing map entries, all
412 *	routines merely "clip" entries to these start/end values.
413 *	[That is, an entry is split into two, bordering at a
414 *	start or end value.]  Note that these clippings may not
415 *	always be necessary (as the two resulting entries are then
416 *	not changed); however, the clipping is done for convenience.
417 *	No attempt is currently made to "glue back together" two
418 *	abutting entries.
419 *
420 *	The symmetric (shadow) copy strategy implements virtual copy
421 *	by copying VM object references from one map to
422 *	another, and then marking both regions as copy-on-write.
423 *	It is important to note that only one writeable reference
424 *	to a VM object region exists in any map when this strategy
425 *	is used -- this means that shadow object creation can be
426 *	delayed until a write operation occurs.  The symmetric (delayed)
427 *	strategy allows multiple maps to have writeable references to
428 *	the same region of a vm object, and hence cannot delay creating
429 *	its copy objects.  See vm_object_copy_quickly() in vm_object.c.
430 *	Copying of permanent objects is completely different; see
431 *	vm_object_copy_strategically() in vm_object.c.
432 */
433
434static zone_t	vm_map_zone;		/* zone for vm_map structures */
435static zone_t	vm_map_entry_zone;	/* zone for vm_map_entry structures */
436static zone_t	vm_map_entry_reserved_zone;	/* zone with reserve for non-blocking
437					 * allocations */
438static zone_t	vm_map_copy_zone;	/* zone for vm_map_copy structures */
439
440
441/*
442 *	Placeholder object for submap operations.  This object is dropped
443 *	into the range by a call to vm_map_find, and removed when
444 *	vm_map_submap creates the submap.
445 */
446
447vm_object_t	vm_submap_object;
448
449static void		*map_data;
450static vm_size_t	map_data_size;
451static void		*kentry_data;
452static vm_size_t	kentry_data_size;
453
454#define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
455
456/* Skip acquiring locks if we're in the midst of a kernel core dump */
457unsigned int not_in_kdp = 1;
458
459unsigned int vm_map_set_cache_attr_count = 0;
460
461kern_return_t
462vm_map_set_cache_attr(
463	vm_map_t	map,
464	vm_map_offset_t	va)
465{
466	vm_map_entry_t	map_entry;
467	vm_object_t	object;
468	kern_return_t	kr = KERN_SUCCESS;
469
470	vm_map_lock_read(map);
471
472	if (!vm_map_lookup_entry(map, va, &map_entry) ||
473	    map_entry->is_sub_map) {
474		/*
475		 * that memory is not properly mapped
476		 */
477		kr = KERN_INVALID_ARGUMENT;
478		goto done;
479	}
480	object = map_entry->object.vm_object;
481
482	if (object == VM_OBJECT_NULL) {
483		/*
484		 * there should be a VM object here at this point
485		 */
486		kr = KERN_INVALID_ARGUMENT;
487		goto done;
488	}
489	vm_object_lock(object);
490	object->set_cache_attr = TRUE;
491	vm_object_unlock(object);
492
493	vm_map_set_cache_attr_count++;
494done:
495	vm_map_unlock_read(map);
496
497	return kr;
498}
499
500
501#if CONFIG_CODE_DECRYPTION
502/*
503 * vm_map_apple_protected:
504 * This remaps the requested part of the object with an object backed by
505 * the decrypting pager.
506 * crypt_info contains entry points and session data for the crypt module.
507 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
508 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
509 */
510kern_return_t
511vm_map_apple_protected(
512	vm_map_t	map,
513	vm_map_offset_t	start,
514	vm_map_offset_t	end,
515	struct pager_crypt_info *crypt_info)
516{
517	boolean_t	map_locked;
518	kern_return_t	kr;
519	vm_map_entry_t	map_entry;
520	memory_object_t	protected_mem_obj;
521	vm_object_t	protected_object;
522	vm_map_offset_t	map_addr;
523
524	vm_map_lock_read(map);
525	map_locked = TRUE;
526
527	/* lookup the protected VM object */
528	if (!vm_map_lookup_entry(map,
529				 start,
530				 &map_entry) ||
531	    map_entry->vme_end < end ||
532	    map_entry->is_sub_map ||
533	    !(map_entry->protection & VM_PROT_EXECUTE)) {
534		/* that memory is not properly mapped */
535		kr = KERN_INVALID_ARGUMENT;
536		goto done;
537	}
538	protected_object = map_entry->object.vm_object;
539	if (protected_object == VM_OBJECT_NULL) {
540		/* there should be a VM object here at this point */
541		kr = KERN_INVALID_ARGUMENT;
542		goto done;
543	}
544
545	/* make sure protected object stays alive while map is unlocked */
546	vm_object_reference(protected_object);
547
548	vm_map_unlock_read(map);
549	map_locked = FALSE;
550
551	/*
552	 * Lookup (and create if necessary) the protected memory object
553	 * matching that VM object.
554	 * If successful, this also grabs a reference on the memory object,
555	 * to guarantee that it doesn't go away before we get a chance to map
556	 * it.
557	 */
558	protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
559
560	/* release extra ref on protected object */
561	vm_object_deallocate(protected_object);
562
563	if (protected_mem_obj == NULL) {
564		kr = KERN_FAILURE;
565		goto done;
566	}
567
568	/* map this memory object in place of the current one */
569	map_addr = start;
570	kr = vm_map_enter_mem_object(map,
571				     &map_addr,
572				     end - start,
573				     (mach_vm_offset_t) 0,
574				     VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
575				     (ipc_port_t) protected_mem_obj,
576				     (map_entry->offset +
577				      (start - map_entry->vme_start)),
578				     TRUE,
579				     map_entry->protection,
580				     map_entry->max_protection,
581				     map_entry->inheritance);
582	assert(map_addr == start);
583	/*
584	 * Release the reference obtained by apple_protect_pager_setup().
585	 * The mapping (if it succeeded) is now holding a reference on the
586	 * memory object.
587	 */
588	memory_object_deallocate(protected_mem_obj);
589
590done:
591	if (map_locked) {
592		vm_map_unlock_read(map);
593	}
594	return kr;
595}
596#endif	/* CONFIG_CODE_DECRYPTION */
597
598
599lck_grp_t		vm_map_lck_grp;
600lck_grp_attr_t	vm_map_lck_grp_attr;
601lck_attr_t		vm_map_lck_attr;
602lck_attr_t		vm_map_lck_rw_attr;
603
604
605/*
606 *	vm_map_init:
607 *
608 *	Initialize the vm_map module.  Must be called before
609 *	any other vm_map routines.
610 *
611 *	Map and entry structures are allocated from zones -- we must
612 *	initialize those zones.
613 *
614 *	There are three zones of interest:
615 *
616 *	vm_map_zone:		used to allocate maps.
617 *	vm_map_entry_zone:	used to allocate map entries.
618 *	vm_map_entry_reserved_zone:	fallback zone for kernel map entries
619 *
620 *	The kernel allocates map entries from a special zone that is initially
621 *	"crammed" with memory.  It would be difficult (perhaps impossible) for
622 *	the kernel to allocate more memory to a entry zone when it became
623 *	empty since the very act of allocating memory implies the creation
624 *	of a new entry.
625 */
626void
627vm_map_init(
628	void)
629{
630	vm_size_t entry_zone_alloc_size;
631	const char *mez_name = "VM map entries";
632
633	vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
634			    PAGE_SIZE, "maps");
635	zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
636#if	defined(__LP64__)
637	entry_zone_alloc_size = PAGE_SIZE * 5;
638#else
639	entry_zone_alloc_size = PAGE_SIZE * 6;
640#endif
641	vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
642				  1024*1024, entry_zone_alloc_size,
643				  mez_name);
644	zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
645	zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
646	zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
647
648	vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
649				   kentry_data_size * 64, kentry_data_size,
650				   "Reserved VM map entries");
651	zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
652
653	vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
654				 16*1024, PAGE_SIZE, "VM map copies");
655	zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
656
657	/*
658	 *	Cram the map and kentry zones with initial data.
659	 *	Set reserved_zone non-collectible to aid zone_gc().
660	 */
661	zone_change(vm_map_zone, Z_COLLECT, FALSE);
662
663	zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
664	zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
665	zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
666	zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
667	zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
668	zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
669	zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
670
671	zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
672	zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
673
674	lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
675	lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
676	lck_attr_setdefault(&vm_map_lck_attr);
677
678	lck_attr_setdefault(&vm_map_lck_rw_attr);
679	lck_attr_cleardebug(&vm_map_lck_rw_attr);
680
681#if CONFIG_FREEZE
682	default_freezer_init();
683#endif /* CONFIG_FREEZE */
684}
685
686void
687vm_map_steal_memory(
688	void)
689{
690	uint32_t kentry_initial_pages;
691
692	map_data_size = round_page(10 * sizeof(struct _vm_map));
693	map_data = pmap_steal_memory(map_data_size);
694
695	/*
696	 * kentry_initial_pages corresponds to the number of kernel map entries
697	 * required during bootstrap until the asynchronous replenishment
698	 * scheme is activated and/or entries are available from the general
699	 * map entry pool.
700	 */
701#if	defined(__LP64__)
702	kentry_initial_pages = 10;
703#else
704	kentry_initial_pages = 6;
705#endif
706
707#if CONFIG_GZALLOC
708	/* If using the guard allocator, reserve more memory for the kernel
709	 * reserved map entry pool.
710	*/
711	if (gzalloc_enabled())
712		kentry_initial_pages *= 1024;
713#endif
714
715	kentry_data_size = kentry_initial_pages * PAGE_SIZE;
716	kentry_data = pmap_steal_memory(kentry_data_size);
717}
718
719void vm_kernel_reserved_entry_init(void) {
720	zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
721}
722
723/*
724 *	vm_map_create:
725 *
726 *	Creates and returns a new empty VM map with
727 *	the given physical map structure, and having
728 *	the given lower and upper address bounds.
729 */
730vm_map_t
731vm_map_create(
732	pmap_t			pmap,
733	vm_map_offset_t	min,
734	vm_map_offset_t	max,
735	boolean_t		pageable)
736{
737	static int		color_seed = 0;
738	register vm_map_t	result;
739
740	result = (vm_map_t) zalloc(vm_map_zone);
741	if (result == VM_MAP_NULL)
742		panic("vm_map_create");
743
744	vm_map_first_entry(result) = vm_map_to_entry(result);
745	vm_map_last_entry(result)  = vm_map_to_entry(result);
746	result->hdr.nentries = 0;
747	result->hdr.entries_pageable = pageable;
748
749	vm_map_store_init( &(result->hdr) );
750
751	result->hdr.page_shift = PAGE_SHIFT;
752
753	result->size = 0;
754	result->user_wire_limit = MACH_VM_MAX_ADDRESS;	/* default limit is unlimited */
755	result->user_wire_size  = 0;
756	result->ref_count = 1;
757#if	TASK_SWAPPER
758	result->res_count = 1;
759	result->sw_state = MAP_SW_IN;
760#endif	/* TASK_SWAPPER */
761	result->pmap = pmap;
762	result->min_offset = min;
763	result->max_offset = max;
764	result->wiring_required = FALSE;
765	result->no_zero_fill = FALSE;
766	result->mapped_in_other_pmaps = FALSE;
767	result->wait_for_space = FALSE;
768	result->switch_protect = FALSE;
769	result->disable_vmentry_reuse = FALSE;
770	result->map_disallow_data_exec = FALSE;
771	result->highest_entry_end = 0;
772	result->first_free = vm_map_to_entry(result);
773	result->hint = vm_map_to_entry(result);
774	result->color_rr = (color_seed++) & vm_color_mask;
775 	result->jit_entry_exists = FALSE;
776#if CONFIG_FREEZE
777	result->default_freezer_handle = NULL;
778#endif
779	vm_map_lock_init(result);
780	lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
781
782	return(result);
783}
784
785/*
786 *	vm_map_entry_create:	[ internal use only ]
787 *
788 *	Allocates a VM map entry for insertion in the
789 *	given map (or map copy).  No fields are filled.
790 */
791#define	vm_map_entry_create(map, map_locked)	_vm_map_entry_create(&(map)->hdr, map_locked)
792
793#define	vm_map_copy_entry_create(copy, map_locked)					\
794	_vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
795unsigned reserved_zalloc_count, nonreserved_zalloc_count;
796
797static vm_map_entry_t
798_vm_map_entry_create(
799	struct vm_map_header	*map_header, boolean_t __unused map_locked)
800{
801	zone_t	zone;
802	vm_map_entry_t	entry;
803
804	zone = vm_map_entry_zone;
805
806	assert(map_header->entries_pageable ? !map_locked : TRUE);
807
808	if (map_header->entries_pageable) {
809		entry = (vm_map_entry_t) zalloc(zone);
810	}
811	else {
812		entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
813
814		if (entry == VM_MAP_ENTRY_NULL) {
815			zone = vm_map_entry_reserved_zone;
816			entry = (vm_map_entry_t) zalloc(zone);
817			OSAddAtomic(1, &reserved_zalloc_count);
818		} else
819			OSAddAtomic(1, &nonreserved_zalloc_count);
820	}
821
822	if (entry == VM_MAP_ENTRY_NULL)
823		panic("vm_map_entry_create");
824	entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
825
826	vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
827#if	MAP_ENTRY_CREATION_DEBUG
828	entry->vme_creation_maphdr = map_header;
829	fastbacktrace(&entry->vme_creation_bt[0],
830		      (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t)));
831#endif
832	return(entry);
833}
834
835/*
836 *	vm_map_entry_dispose:	[ internal use only ]
837 *
838 *	Inverse of vm_map_entry_create.
839 *
840 * 	write map lock held so no need to
841 *	do anything special to insure correctness
842 * 	of the stores
843 */
844#define	vm_map_entry_dispose(map, entry)			\
845	_vm_map_entry_dispose(&(map)->hdr, (entry))
846
847#define	vm_map_copy_entry_dispose(map, entry) \
848	_vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
849
850static void
851_vm_map_entry_dispose(
852	register struct vm_map_header	*map_header,
853	register vm_map_entry_t		entry)
854{
855	register zone_t		zone;
856
857	if (map_header->entries_pageable || !(entry->from_reserved_zone))
858		zone = vm_map_entry_zone;
859	else
860		zone = vm_map_entry_reserved_zone;
861
862	if (!map_header->entries_pageable) {
863		if (zone == vm_map_entry_zone)
864			OSAddAtomic(-1, &nonreserved_zalloc_count);
865		else
866			OSAddAtomic(-1, &reserved_zalloc_count);
867	}
868
869	zfree(zone, entry);
870}
871
872#if MACH_ASSERT
873static boolean_t first_free_check = FALSE;
874boolean_t
875first_free_is_valid(
876	vm_map_t	map)
877{
878	if (!first_free_check)
879		return TRUE;
880
881	return( first_free_is_valid_store( map ));
882}
883#endif /* MACH_ASSERT */
884
885
886#define vm_map_copy_entry_link(copy, after_where, entry)		\
887	_vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
888
889#define vm_map_copy_entry_unlink(copy, entry)				\
890	_vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
891
892#if	MACH_ASSERT && TASK_SWAPPER
893/*
894 *	vm_map_res_reference:
895 *
896 *	Adds another valid residence count to the given map.
897 *
898 *	Map is locked so this function can be called from
899 *	vm_map_swapin.
900 *
901 */
902void vm_map_res_reference(register vm_map_t map)
903{
904	/* assert map is locked */
905	assert(map->res_count >= 0);
906	assert(map->ref_count >= map->res_count);
907	if (map->res_count == 0) {
908		lck_mtx_unlock(&map->s_lock);
909		vm_map_lock(map);
910		vm_map_swapin(map);
911		lck_mtx_lock(&map->s_lock);
912		++map->res_count;
913		vm_map_unlock(map);
914	} else
915		++map->res_count;
916}
917
918/*
919 *	vm_map_reference_swap:
920 *
921 *	Adds valid reference and residence counts to the given map.
922 *
923 *	The map may not be in memory (i.e. zero residence count).
924 *
925 */
926void vm_map_reference_swap(register vm_map_t map)
927{
928	assert(map != VM_MAP_NULL);
929	lck_mtx_lock(&map->s_lock);
930	assert(map->res_count >= 0);
931	assert(map->ref_count >= map->res_count);
932	map->ref_count++;
933	vm_map_res_reference(map);
934	lck_mtx_unlock(&map->s_lock);
935}
936
937/*
938 *	vm_map_res_deallocate:
939 *
940 *	Decrement residence count on a map; possibly causing swapout.
941 *
942 *	The map must be in memory (i.e. non-zero residence count).
943 *
944 *	The map is locked, so this function is callable from vm_map_deallocate.
945 *
946 */
947void vm_map_res_deallocate(register vm_map_t map)
948{
949	assert(map->res_count > 0);
950	if (--map->res_count == 0) {
951		lck_mtx_unlock(&map->s_lock);
952		vm_map_lock(map);
953		vm_map_swapout(map);
954		vm_map_unlock(map);
955		lck_mtx_lock(&map->s_lock);
956	}
957	assert(map->ref_count >= map->res_count);
958}
959#endif	/* MACH_ASSERT && TASK_SWAPPER */
960
961/*
962 *	vm_map_destroy:
963 *
964 *	Actually destroy a map.
965 */
966void
967vm_map_destroy(
968	vm_map_t	map,
969	int		flags)
970{
971	vm_map_lock(map);
972
973	/* clean up regular map entries */
974	(void) vm_map_delete(map, map->min_offset, map->max_offset,
975			     flags, VM_MAP_NULL);
976	/* clean up leftover special mappings (commpage, etc...) */
977	(void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
978			     flags, VM_MAP_NULL);
979
980#if CONFIG_FREEZE
981	if (map->default_freezer_handle) {
982		default_freezer_handle_deallocate(map->default_freezer_handle);
983		map->default_freezer_handle = NULL;
984	}
985#endif
986	vm_map_unlock(map);
987
988	assert(map->hdr.nentries == 0);
989
990	if(map->pmap)
991		pmap_destroy(map->pmap);
992
993	zfree(vm_map_zone, map);
994}
995
996#if	TASK_SWAPPER
997/*
998 * vm_map_swapin/vm_map_swapout
999 *
1000 * Swap a map in and out, either referencing or releasing its resources.
1001 * These functions are internal use only; however, they must be exported
1002 * because they may be called from macros, which are exported.
1003 *
1004 * In the case of swapout, there could be races on the residence count,
1005 * so if the residence count is up, we return, assuming that a
1006 * vm_map_deallocate() call in the near future will bring us back.
1007 *
1008 * Locking:
1009 *	-- We use the map write lock for synchronization among races.
1010 *	-- The map write lock, and not the simple s_lock, protects the
1011 *	   swap state of the map.
1012 *	-- If a map entry is a share map, then we hold both locks, in
1013 *	   hierarchical order.
1014 *
1015 * Synchronization Notes:
1016 *	1) If a vm_map_swapin() call happens while swapout in progress, it
1017 *	will block on the map lock and proceed when swapout is through.
1018 *	2) A vm_map_reference() call at this time is illegal, and will
1019 *	cause a panic.  vm_map_reference() is only allowed on resident
1020 *	maps, since it refuses to block.
1021 *	3) A vm_map_swapin() call during a swapin will block, and
1022 *	proceeed when the first swapin is done, turning into a nop.
1023 *	This is the reason the res_count is not incremented until
1024 *	after the swapin is complete.
1025 *	4) There is a timing hole after the checks of the res_count, before
1026 *	the map lock is taken, during which a swapin may get the lock
1027 *	before a swapout about to happen.  If this happens, the swapin
1028 *	will detect the state and increment the reference count, causing
1029 *	the swapout to be a nop, thereby delaying it until a later
1030 *	vm_map_deallocate.  If the swapout gets the lock first, then
1031 *	the swapin will simply block until the swapout is done, and
1032 *	then proceed.
1033 *
1034 * Because vm_map_swapin() is potentially an expensive operation, it
1035 * should be used with caution.
1036 *
1037 * Invariants:
1038 *	1) A map with a residence count of zero is either swapped, or
1039 *	   being swapped.
1040 *	2) A map with a non-zero residence count is either resident,
1041 *	   or being swapped in.
1042 */
1043
1044int vm_map_swap_enable = 1;
1045
1046void vm_map_swapin (vm_map_t map)
1047{
1048	register vm_map_entry_t entry;
1049
1050	if (!vm_map_swap_enable)	/* debug */
1051		return;
1052
1053	/*
1054	 * Map is locked
1055	 * First deal with various races.
1056	 */
1057	if (map->sw_state == MAP_SW_IN)
1058		/*
1059		 * we raced with swapout and won.  Returning will incr.
1060		 * the res_count, turning the swapout into a nop.
1061		 */
1062		return;
1063
1064	/*
1065	 * The residence count must be zero.  If we raced with another
1066	 * swapin, the state would have been IN; if we raced with a
1067	 * swapout (after another competing swapin), we must have lost
1068	 * the race to get here (see above comment), in which case
1069	 * res_count is still 0.
1070	 */
1071	assert(map->res_count == 0);
1072
1073	/*
1074	 * There are no intermediate states of a map going out or
1075	 * coming in, since the map is locked during the transition.
1076	 */
1077	assert(map->sw_state == MAP_SW_OUT);
1078
1079	/*
1080	 * We now operate upon each map entry.  If the entry is a sub-
1081	 * or share-map, we call vm_map_res_reference upon it.
1082	 * If the entry is an object, we call vm_object_res_reference
1083	 * (this may iterate through the shadow chain).
1084	 * Note that we hold the map locked the entire time,
1085	 * even if we get back here via a recursive call in
1086	 * vm_map_res_reference.
1087	 */
1088	entry = vm_map_first_entry(map);
1089
1090	while (entry != vm_map_to_entry(map)) {
1091		if (entry->object.vm_object != VM_OBJECT_NULL) {
1092			if (entry->is_sub_map) {
1093				vm_map_t lmap = entry->object.sub_map;
1094				lck_mtx_lock(&lmap->s_lock);
1095				vm_map_res_reference(lmap);
1096				lck_mtx_unlock(&lmap->s_lock);
1097			} else {
1098				vm_object_t object = entry->object.vm_object;
1099				vm_object_lock(object);
1100				/*
1101				 * This call may iterate through the
1102				 * shadow chain.
1103				 */
1104				vm_object_res_reference(object);
1105				vm_object_unlock(object);
1106			}
1107		}
1108		entry = entry->vme_next;
1109	}
1110	assert(map->sw_state == MAP_SW_OUT);
1111	map->sw_state = MAP_SW_IN;
1112}
1113
1114void vm_map_swapout(vm_map_t map)
1115{
1116	register vm_map_entry_t entry;
1117
1118	/*
1119	 * Map is locked
1120	 * First deal with various races.
1121	 * If we raced with a swapin and lost, the residence count
1122	 * will have been incremented to 1, and we simply return.
1123	 */
1124	lck_mtx_lock(&map->s_lock);
1125	if (map->res_count != 0) {
1126		lck_mtx_unlock(&map->s_lock);
1127		return;
1128	}
1129	lck_mtx_unlock(&map->s_lock);
1130
1131	/*
1132	 * There are no intermediate states of a map going out or
1133	 * coming in, since the map is locked during the transition.
1134	 */
1135	assert(map->sw_state == MAP_SW_IN);
1136
1137	if (!vm_map_swap_enable)
1138		return;
1139
1140	/*
1141	 * We now operate upon each map entry.  If the entry is a sub-
1142	 * or share-map, we call vm_map_res_deallocate upon it.
1143	 * If the entry is an object, we call vm_object_res_deallocate
1144	 * (this may iterate through the shadow chain).
1145	 * Note that we hold the map locked the entire time,
1146	 * even if we get back here via a recursive call in
1147	 * vm_map_res_deallocate.
1148	 */
1149	entry = vm_map_first_entry(map);
1150
1151	while (entry != vm_map_to_entry(map)) {
1152		if (entry->object.vm_object != VM_OBJECT_NULL) {
1153			if (entry->is_sub_map) {
1154				vm_map_t lmap = entry->object.sub_map;
1155				lck_mtx_lock(&lmap->s_lock);
1156				vm_map_res_deallocate(lmap);
1157				lck_mtx_unlock(&lmap->s_lock);
1158			} else {
1159				vm_object_t object = entry->object.vm_object;
1160				vm_object_lock(object);
1161				/*
1162				 * This call may take a long time,
1163				 * since it could actively push
1164				 * out pages (if we implement it
1165				 * that way).
1166				 */
1167				vm_object_res_deallocate(object);
1168				vm_object_unlock(object);
1169			}
1170		}
1171		entry = entry->vme_next;
1172	}
1173	assert(map->sw_state == MAP_SW_IN);
1174	map->sw_state = MAP_SW_OUT;
1175}
1176
1177#endif	/* TASK_SWAPPER */
1178
1179/*
1180 *	vm_map_lookup_entry:	[ internal use only ]
1181 *
1182 *	Calls into the vm map store layer to find the map
1183 *	entry containing (or immediately preceding) the
1184 *	specified address in the given map; the entry is returned
1185 *	in the "entry" parameter.  The boolean
1186 *	result indicates whether the address is
1187 *	actually contained in the map.
1188 */
1189boolean_t
1190vm_map_lookup_entry(
1191	register vm_map_t		map,
1192	register vm_map_offset_t	address,
1193	vm_map_entry_t		*entry)		/* OUT */
1194{
1195	return ( vm_map_store_lookup_entry( map, address, entry ));
1196}
1197
1198/*
1199 *	Routine:	vm_map_find_space
1200 *	Purpose:
1201 *		Allocate a range in the specified virtual address map,
1202 *		returning the entry allocated for that range.
1203 *		Used by kmem_alloc, etc.
1204 *
1205 *		The map must be NOT be locked. It will be returned locked
1206 *		on KERN_SUCCESS, unlocked on failure.
1207 *
1208 *		If an entry is allocated, the object/offset fields
1209 *		are initialized to zero.
1210 */
1211kern_return_t
1212vm_map_find_space(
1213	register vm_map_t	map,
1214	vm_map_offset_t		*address,	/* OUT */
1215	vm_map_size_t		size,
1216	vm_map_offset_t		mask,
1217	int			flags,
1218	vm_map_entry_t		*o_entry)	/* OUT */
1219{
1220	register vm_map_entry_t	entry, new_entry;
1221	register vm_map_offset_t	start;
1222	register vm_map_offset_t	end;
1223
1224	if (size == 0) {
1225		*address = 0;
1226		return KERN_INVALID_ARGUMENT;
1227	}
1228
1229	if (flags & VM_FLAGS_GUARD_AFTER) {
1230		/* account for the back guard page in the size */
1231		size += VM_MAP_PAGE_SIZE(map);
1232	}
1233
1234	new_entry = vm_map_entry_create(map, FALSE);
1235
1236	/*
1237	 *	Look for the first possible address; if there's already
1238	 *	something at this address, we have to start after it.
1239	 */
1240
1241	vm_map_lock(map);
1242
1243	if( map->disable_vmentry_reuse == TRUE) {
1244		VM_MAP_HIGHEST_ENTRY(map, entry, start);
1245	} else {
1246		assert(first_free_is_valid(map));
1247		if ((entry = map->first_free) == vm_map_to_entry(map))
1248			start = map->min_offset;
1249		else
1250			start = entry->vme_end;
1251	}
1252
1253	/*
1254	 *	In any case, the "entry" always precedes
1255	 *	the proposed new region throughout the loop:
1256	 */
1257
1258	while (TRUE) {
1259		register vm_map_entry_t	next;
1260
1261		/*
1262		 *	Find the end of the proposed new region.
1263		 *	Be sure we didn't go beyond the end, or
1264		 *	wrap around the address.
1265		 */
1266
1267		if (flags & VM_FLAGS_GUARD_BEFORE) {
1268			/* reserve space for the front guard page */
1269			start += VM_MAP_PAGE_SIZE(map);
1270		}
1271		end = ((start + mask) & ~mask);
1272
1273		if (end < start) {
1274			vm_map_entry_dispose(map, new_entry);
1275			vm_map_unlock(map);
1276			return(KERN_NO_SPACE);
1277		}
1278		start = end;
1279		end += size;
1280
1281		if ((end > map->max_offset) || (end < start)) {
1282			vm_map_entry_dispose(map, new_entry);
1283			vm_map_unlock(map);
1284			return(KERN_NO_SPACE);
1285		}
1286
1287		/*
1288		 *	If there are no more entries, we must win.
1289		 */
1290
1291		next = entry->vme_next;
1292		if (next == vm_map_to_entry(map))
1293			break;
1294
1295		/*
1296		 *	If there is another entry, it must be
1297		 *	after the end of the potential new region.
1298		 */
1299
1300		if (next->vme_start >= end)
1301			break;
1302
1303		/*
1304		 *	Didn't fit -- move to the next entry.
1305		 */
1306
1307		entry = next;
1308		start = entry->vme_end;
1309	}
1310
1311	/*
1312	 *	At this point,
1313	 *		"start" and "end" should define the endpoints of the
1314	 *			available new range, and
1315	 *		"entry" should refer to the region before the new
1316	 *			range, and
1317	 *
1318	 *		the map should be locked.
1319	 */
1320
1321	if (flags & VM_FLAGS_GUARD_BEFORE) {
1322		/* go back for the front guard page */
1323		start -= VM_MAP_PAGE_SIZE(map);
1324	}
1325	*address = start;
1326
1327	assert(start < end);
1328	new_entry->vme_start = start;
1329	new_entry->vme_end = end;
1330	assert(page_aligned(new_entry->vme_start));
1331	assert(page_aligned(new_entry->vme_end));
1332	assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1333				   VM_MAP_PAGE_MASK(map)));
1334	assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1335				   VM_MAP_PAGE_MASK(map)));
1336
1337	new_entry->is_shared = FALSE;
1338	new_entry->is_sub_map = FALSE;
1339	new_entry->use_pmap = TRUE;
1340	new_entry->object.vm_object = VM_OBJECT_NULL;
1341	new_entry->offset = (vm_object_offset_t) 0;
1342
1343	new_entry->needs_copy = FALSE;
1344
1345	new_entry->inheritance = VM_INHERIT_DEFAULT;
1346	new_entry->protection = VM_PROT_DEFAULT;
1347	new_entry->max_protection = VM_PROT_ALL;
1348	new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1349	new_entry->wired_count = 0;
1350	new_entry->user_wired_count = 0;
1351
1352	new_entry->in_transition = FALSE;
1353	new_entry->needs_wakeup = FALSE;
1354	new_entry->no_cache = FALSE;
1355	new_entry->permanent = FALSE;
1356	new_entry->superpage_size = FALSE;
1357	if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1358		new_entry->map_aligned = TRUE;
1359	} else {
1360		new_entry->map_aligned = FALSE;
1361	}
1362
1363	new_entry->used_for_jit = 0;
1364
1365	new_entry->alias = 0;
1366	new_entry->zero_wired_pages = FALSE;
1367	new_entry->iokit_acct = FALSE;
1368
1369	VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1370
1371	/*
1372	 *	Insert the new entry into the list
1373	 */
1374
1375	vm_map_store_entry_link(map, entry, new_entry);
1376
1377	map->size += size;
1378
1379	/*
1380	 *	Update the lookup hint
1381	 */
1382	SAVE_HINT_MAP_WRITE(map, new_entry);
1383
1384	*o_entry = new_entry;
1385	return(KERN_SUCCESS);
1386}
1387
1388int vm_map_pmap_enter_print = FALSE;
1389int vm_map_pmap_enter_enable = FALSE;
1390
1391/*
1392 *	Routine:	vm_map_pmap_enter [internal only]
1393 *
1394 *	Description:
1395 *		Force pages from the specified object to be entered into
1396 *		the pmap at the specified address if they are present.
1397 *		As soon as a page not found in the object the scan ends.
1398 *
1399 *	Returns:
1400 *		Nothing.
1401 *
1402 *	In/out conditions:
1403 *		The source map should not be locked on entry.
1404 */
1405__unused static void
1406vm_map_pmap_enter(
1407	vm_map_t		map,
1408	register vm_map_offset_t 	addr,
1409	register vm_map_offset_t	end_addr,
1410	register vm_object_t 	object,
1411	vm_object_offset_t	offset,
1412	vm_prot_t		protection)
1413{
1414	int			type_of_fault;
1415	kern_return_t		kr;
1416
1417	if(map->pmap == 0)
1418		return;
1419
1420	while (addr < end_addr) {
1421		register vm_page_t	m;
1422
1423
1424		/*
1425   		 * TODO:
1426		 * From vm_map_enter(), we come into this function without the map
1427		 * lock held or the object lock held.
1428		 * We haven't taken a reference on the object either.
1429		 * We should do a proper lookup on the map to make sure
1430		 * that things are sane before we go locking objects that
1431		 * could have been deallocated from under us.
1432		 */
1433
1434		vm_object_lock(object);
1435
1436		m = vm_page_lookup(object, offset);
1437		/*
1438		 * ENCRYPTED SWAP:
1439		 * The user should never see encrypted data, so do not
1440		 * enter an encrypted page in the page table.
1441		 */
1442		if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1443		    m->fictitious ||
1444		    (m->unusual && ( m->error || m->restart || m->absent))) {
1445			vm_object_unlock(object);
1446			return;
1447		}
1448
1449		if (vm_map_pmap_enter_print) {
1450			printf("vm_map_pmap_enter:");
1451			printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1452			       map, (unsigned long long)addr, object, (unsigned long long)offset);
1453		}
1454		type_of_fault = DBG_CACHE_HIT_FAULT;
1455		kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1456				    VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
1457				    0, /* XXX need user tag / alias? */
1458				    0, /* alternate accounting? */
1459				    NULL,
1460				    &type_of_fault);
1461
1462		vm_object_unlock(object);
1463
1464		offset += PAGE_SIZE_64;
1465		addr += PAGE_SIZE;
1466	}
1467}
1468
1469boolean_t vm_map_pmap_is_empty(
1470	vm_map_t	map,
1471	vm_map_offset_t	start,
1472	vm_map_offset_t end);
1473boolean_t vm_map_pmap_is_empty(
1474	vm_map_t	map,
1475	vm_map_offset_t	start,
1476	vm_map_offset_t	end)
1477{
1478#ifdef MACHINE_PMAP_IS_EMPTY
1479	return pmap_is_empty(map->pmap, start, end);
1480#else 	/* MACHINE_PMAP_IS_EMPTY */
1481	vm_map_offset_t	offset;
1482	ppnum_t		phys_page;
1483
1484	if (map->pmap == NULL) {
1485		return TRUE;
1486	}
1487
1488	for (offset = start;
1489	     offset < end;
1490	     offset += PAGE_SIZE) {
1491		phys_page = pmap_find_phys(map->pmap, offset);
1492		if (phys_page) {
1493			kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1494				"page %d at 0x%llx\n",
1495				map, (long long)start, (long long)end,
1496				phys_page, (long long)offset);
1497			return FALSE;
1498		}
1499	}
1500	return TRUE;
1501#endif	/* MACHINE_PMAP_IS_EMPTY */
1502}
1503
1504#define MAX_TRIES_TO_GET_RANDOM_ADDRESS	1000
1505kern_return_t
1506vm_map_random_address_for_size(
1507	vm_map_t	map,
1508	vm_map_offset_t	*address,
1509	vm_map_size_t	size)
1510{
1511	kern_return_t	kr = KERN_SUCCESS;
1512	int		tries = 0;
1513	vm_map_offset_t	random_addr = 0;
1514	vm_map_offset_t hole_end;
1515
1516	vm_map_entry_t	next_entry = VM_MAP_ENTRY_NULL;
1517	vm_map_entry_t	prev_entry = VM_MAP_ENTRY_NULL;
1518	vm_map_size_t	vm_hole_size = 0;
1519	vm_map_size_t	addr_space_size;
1520
1521	addr_space_size = vm_map_max(map) - vm_map_min(map);
1522
1523	assert(page_aligned(size));
1524
1525	while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1526		random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
1527		random_addr = vm_map_trunc_page(
1528			vm_map_min(map) +(random_addr % addr_space_size),
1529			VM_MAP_PAGE_MASK(map));
1530
1531		if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
1532			if (prev_entry == vm_map_to_entry(map)) {
1533				next_entry = vm_map_first_entry(map);
1534			} else {
1535				next_entry = prev_entry->vme_next;
1536			}
1537			if (next_entry == vm_map_to_entry(map)) {
1538				hole_end = vm_map_max(map);
1539			} else {
1540				hole_end = next_entry->vme_start;
1541			}
1542			vm_hole_size = hole_end - random_addr;
1543			if (vm_hole_size >= size) {
1544				*address = random_addr;
1545				break;
1546			}
1547		}
1548		tries++;
1549	}
1550
1551	if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1552		kr = KERN_NO_SPACE;
1553	}
1554	return kr;
1555}
1556
1557/*
1558 *	Routine:	vm_map_enter
1559 *
1560 *	Description:
1561 *		Allocate a range in the specified virtual address map.
1562 *		The resulting range will refer to memory defined by
1563 *		the given memory object and offset into that object.
1564 *
1565 *		Arguments are as defined in the vm_map call.
1566 */
1567int _map_enter_debug = 0;
1568static unsigned int vm_map_enter_restore_successes = 0;
1569static unsigned int vm_map_enter_restore_failures = 0;
1570kern_return_t
1571vm_map_enter(
1572	vm_map_t		map,
1573	vm_map_offset_t		*address,	/* IN/OUT */
1574	vm_map_size_t		size,
1575	vm_map_offset_t		mask,
1576	int			flags,
1577	vm_object_t		object,
1578	vm_object_offset_t	offset,
1579	boolean_t		needs_copy,
1580	vm_prot_t		cur_protection,
1581	vm_prot_t		max_protection,
1582	vm_inherit_t		inheritance)
1583{
1584	vm_map_entry_t		entry, new_entry;
1585	vm_map_offset_t		start, tmp_start, tmp_offset;
1586	vm_map_offset_t		end, tmp_end;
1587	vm_map_offset_t		tmp2_start, tmp2_end;
1588	vm_map_offset_t		step;
1589	kern_return_t		result = KERN_SUCCESS;
1590	vm_map_t		zap_old_map = VM_MAP_NULL;
1591	vm_map_t		zap_new_map = VM_MAP_NULL;
1592	boolean_t		map_locked = FALSE;
1593	boolean_t		pmap_empty = TRUE;
1594	boolean_t		new_mapping_established = FALSE;
1595	boolean_t		keep_map_locked = ((flags & VM_FLAGS_KEEP_MAP_LOCKED) != 0);
1596	boolean_t		anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1597	boolean_t		purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1598	boolean_t		overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1599	boolean_t		no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1600	boolean_t		is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1601	boolean_t		permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1602	boolean_t		entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0);
1603	boolean_t		iokit_acct = ((flags & VM_FLAGS_IOKIT_ACCT) != 0);
1604	unsigned int		superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1605	char			alias;
1606	vm_map_offset_t		effective_min_offset, effective_max_offset;
1607	kern_return_t		kr;
1608	boolean_t		clear_map_aligned = FALSE;
1609
1610	if (superpage_size) {
1611		switch (superpage_size) {
1612			/*
1613			 * Note that the current implementation only supports
1614			 * a single size for superpages, SUPERPAGE_SIZE, per
1615			 * architecture. As soon as more sizes are supposed
1616			 * to be supported, SUPERPAGE_SIZE has to be replaced
1617			 * with a lookup of the size depending on superpage_size.
1618			 */
1619#ifdef __x86_64__
1620			case SUPERPAGE_SIZE_ANY:
1621				/* handle it like 2 MB and round up to page size */
1622				size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
1623			case SUPERPAGE_SIZE_2MB:
1624				break;
1625#endif
1626			default:
1627				return KERN_INVALID_ARGUMENT;
1628		}
1629		mask = SUPERPAGE_SIZE-1;
1630		if (size & (SUPERPAGE_SIZE-1))
1631			return KERN_INVALID_ARGUMENT;
1632		inheritance = VM_INHERIT_NONE;	/* fork() children won't inherit superpages */
1633	}
1634
1635
1636
1637	if (is_submap) {
1638		if (purgable) {
1639			/* submaps can not be purgeable */
1640			return KERN_INVALID_ARGUMENT;
1641		}
1642		if (object == VM_OBJECT_NULL) {
1643			/* submaps can not be created lazily */
1644			return KERN_INVALID_ARGUMENT;
1645		}
1646	}
1647	if (flags & VM_FLAGS_ALREADY) {
1648		/*
1649		 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1650		 * is already present.  For it to be meaningul, the requested
1651		 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1652		 * we shouldn't try and remove what was mapped there first
1653		 * (!VM_FLAGS_OVERWRITE).
1654		 */
1655		if ((flags & VM_FLAGS_ANYWHERE) ||
1656		    (flags & VM_FLAGS_OVERWRITE)) {
1657			return KERN_INVALID_ARGUMENT;
1658		}
1659	}
1660
1661	effective_min_offset = map->min_offset;
1662
1663	if (flags & VM_FLAGS_BEYOND_MAX) {
1664		/*
1665		 * Allow an insertion beyond the map's max offset.
1666		 */
1667		if (vm_map_is_64bit(map))
1668			effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1669		else
1670			effective_max_offset = 0x00000000FFFFF000ULL;
1671	} else {
1672		effective_max_offset = map->max_offset;
1673	}
1674
1675	if (size == 0 ||
1676	    (offset & PAGE_MASK_64) != 0) {
1677		*address = 0;
1678		return KERN_INVALID_ARGUMENT;
1679	}
1680
1681	VM_GET_FLAGS_ALIAS(flags, alias);
1682
1683#define	RETURN(value)	{ result = value; goto BailOut; }
1684
1685	assert(page_aligned(*address));
1686	assert(page_aligned(size));
1687
1688	if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
1689		/*
1690		 * In most cases, the caller rounds the size up to the
1691		 * map's page size.
1692		 * If we get a size that is explicitly not map-aligned here,
1693		 * we'll have to respect the caller's wish and mark the
1694		 * mapping as "not map-aligned" to avoid tripping the
1695		 * map alignment checks later.
1696		 */
1697		clear_map_aligned = TRUE;
1698	}
1699	if (!anywhere &&
1700	    !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
1701		/*
1702		 * We've been asked to map at a fixed address and that
1703		 * address is not aligned to the map's specific alignment.
1704		 * The caller should know what it's doing (i.e. most likely
1705		 * mapping some fragmented copy map, transferring memory from
1706		 * a VM map with a different alignment), so clear map_aligned
1707		 * for this new VM map entry and proceed.
1708		 */
1709		clear_map_aligned = TRUE;
1710	}
1711
1712	/*
1713	 * Only zero-fill objects are allowed to be purgable.
1714	 * LP64todo - limit purgable objects to 32-bits for now
1715	 */
1716	if (purgable &&
1717	    (offset != 0 ||
1718	     (object != VM_OBJECT_NULL &&
1719	      (object->vo_size != size ||
1720	       object->purgable == VM_PURGABLE_DENY))
1721	     || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
1722		return KERN_INVALID_ARGUMENT;
1723
1724	if (!anywhere && overwrite) {
1725		/*
1726		 * Create a temporary VM map to hold the old mappings in the
1727		 * affected area while we create the new one.
1728		 * This avoids releasing the VM map lock in
1729		 * vm_map_entry_delete() and allows atomicity
1730		 * when we want to replace some mappings with a new one.
1731		 * It also allows us to restore the old VM mappings if the
1732		 * new mapping fails.
1733		 */
1734		zap_old_map = vm_map_create(PMAP_NULL,
1735					    *address,
1736					    *address + size,
1737					    map->hdr.entries_pageable);
1738		vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
1739	}
1740
1741StartAgain: ;
1742
1743	start = *address;
1744
1745	if (anywhere) {
1746		vm_map_lock(map);
1747		map_locked = TRUE;
1748
1749		if (entry_for_jit) {
1750			if (map->jit_entry_exists) {
1751				result = KERN_INVALID_ARGUMENT;
1752				goto BailOut;
1753			}
1754			/*
1755			 * Get a random start address.
1756			 */
1757			result = vm_map_random_address_for_size(map, address, size);
1758			if (result != KERN_SUCCESS) {
1759				goto BailOut;
1760			}
1761			start = *address;
1762		}
1763
1764
1765		/*
1766		 *	Calculate the first possible address.
1767		 */
1768
1769		if (start < effective_min_offset)
1770			start = effective_min_offset;
1771		if (start > effective_max_offset)
1772			RETURN(KERN_NO_SPACE);
1773
1774		/*
1775		 *	Look for the first possible address;
1776		 *	if there's already something at this
1777		 *	address, we have to start after it.
1778		 */
1779
1780		if( map->disable_vmentry_reuse == TRUE) {
1781			VM_MAP_HIGHEST_ENTRY(map, entry, start);
1782		} else {
1783			assert(first_free_is_valid(map));
1784
1785			entry = map->first_free;
1786
1787			if (entry == vm_map_to_entry(map)) {
1788				entry = NULL;
1789			} else {
1790			       if (entry->vme_next == vm_map_to_entry(map)){
1791				       /*
1792					* Hole at the end of the map.
1793					*/
1794					entry = NULL;
1795			       } else {
1796					if (start < (entry->vme_next)->vme_start ) {
1797						start = entry->vme_end;
1798						start = vm_map_round_page(start,
1799									  VM_MAP_PAGE_MASK(map));
1800					} else {
1801						/*
1802						 * Need to do a lookup.
1803						 */
1804						entry = NULL;
1805					}
1806			       }
1807			}
1808
1809			if (entry == NULL) {
1810				vm_map_entry_t	tmp_entry;
1811				if (vm_map_lookup_entry(map, start, &tmp_entry)) {
1812					assert(!entry_for_jit);
1813					start = tmp_entry->vme_end;
1814					start = vm_map_round_page(start,
1815								  VM_MAP_PAGE_MASK(map));
1816				}
1817				entry = tmp_entry;
1818			}
1819		}
1820
1821		/*
1822		 *	In any case, the "entry" always precedes
1823		 *	the proposed new region throughout the
1824		 *	loop:
1825		 */
1826
1827		while (TRUE) {
1828			register vm_map_entry_t	next;
1829
1830			/*
1831			 *	Find the end of the proposed new region.
1832			 *	Be sure we didn't go beyond the end, or
1833			 *	wrap around the address.
1834			 */
1835
1836			end = ((start + mask) & ~mask);
1837			end = vm_map_round_page(end,
1838						VM_MAP_PAGE_MASK(map));
1839			if (end < start)
1840				RETURN(KERN_NO_SPACE);
1841			start = end;
1842			assert(VM_MAP_PAGE_ALIGNED(start,
1843						   VM_MAP_PAGE_MASK(map)));
1844			end += size;
1845
1846			if ((end > effective_max_offset) || (end < start)) {
1847				if (map->wait_for_space) {
1848					assert(!keep_map_locked);
1849					if (size <= (effective_max_offset -
1850						     effective_min_offset)) {
1851						assert_wait((event_t)map,
1852							    THREAD_ABORTSAFE);
1853						vm_map_unlock(map);
1854						map_locked = FALSE;
1855						thread_block(THREAD_CONTINUE_NULL);
1856						goto StartAgain;
1857					}
1858				}
1859				RETURN(KERN_NO_SPACE);
1860			}
1861
1862			/*
1863			 *	If there are no more entries, we must win.
1864			 */
1865
1866			next = entry->vme_next;
1867			if (next == vm_map_to_entry(map))
1868				break;
1869
1870			/*
1871			 *	If there is another entry, it must be
1872			 *	after the end of the potential new region.
1873			 */
1874
1875			if (next->vme_start >= end)
1876				break;
1877
1878			/*
1879			 *	Didn't fit -- move to the next entry.
1880			 */
1881
1882			entry = next;
1883			start = entry->vme_end;
1884			start = vm_map_round_page(start,
1885						  VM_MAP_PAGE_MASK(map));
1886		}
1887		*address = start;
1888		assert(VM_MAP_PAGE_ALIGNED(*address,
1889					   VM_MAP_PAGE_MASK(map)));
1890	} else {
1891		/*
1892		 *	Verify that:
1893		 *		the address doesn't itself violate
1894		 *		the mask requirement.
1895		 */
1896
1897		vm_map_lock(map);
1898		map_locked = TRUE;
1899		if ((start & mask) != 0)
1900			RETURN(KERN_NO_SPACE);
1901
1902		/*
1903		 *	...	the address is within bounds
1904		 */
1905
1906		end = start + size;
1907
1908		if ((start < effective_min_offset) ||
1909		    (end > effective_max_offset) ||
1910		    (start >= end)) {
1911			RETURN(KERN_INVALID_ADDRESS);
1912		}
1913
1914		if (overwrite && zap_old_map != VM_MAP_NULL) {
1915			/*
1916			 * Fixed mapping and "overwrite" flag: attempt to
1917			 * remove all existing mappings in the specified
1918			 * address range, saving them in our "zap_old_map".
1919			 */
1920			(void) vm_map_delete(map, start, end,
1921					     (VM_MAP_REMOVE_SAVE_ENTRIES |
1922					      VM_MAP_REMOVE_NO_MAP_ALIGN),
1923					     zap_old_map);
1924		}
1925
1926		/*
1927		 *	...	the starting address isn't allocated
1928		 */
1929
1930		if (vm_map_lookup_entry(map, start, &entry)) {
1931			if (! (flags & VM_FLAGS_ALREADY)) {
1932				RETURN(KERN_NO_SPACE);
1933			}
1934			/*
1935			 * Check if what's already there is what we want.
1936			 */
1937			tmp_start = start;
1938			tmp_offset = offset;
1939			if (entry->vme_start < start) {
1940				tmp_start -= start - entry->vme_start;
1941				tmp_offset -= start - entry->vme_start;
1942
1943			}
1944			for (; entry->vme_start < end;
1945			     entry = entry->vme_next) {
1946				/*
1947				 * Check if the mapping's attributes
1948				 * match the existing map entry.
1949				 */
1950				if (entry == vm_map_to_entry(map) ||
1951				    entry->vme_start != tmp_start ||
1952				    entry->is_sub_map != is_submap ||
1953				    entry->offset != tmp_offset ||
1954				    entry->needs_copy != needs_copy ||
1955				    entry->protection != cur_protection ||
1956				    entry->max_protection != max_protection ||
1957				    entry->inheritance != inheritance ||
1958				    entry->iokit_acct != iokit_acct ||
1959				    entry->alias != alias) {
1960					/* not the same mapping ! */
1961					RETURN(KERN_NO_SPACE);
1962				}
1963				/*
1964				 * Check if the same object is being mapped.
1965				 */
1966				if (is_submap) {
1967					if (entry->object.sub_map !=
1968					    (vm_map_t) object) {
1969						/* not the same submap */
1970						RETURN(KERN_NO_SPACE);
1971					}
1972				} else {
1973					if (entry->object.vm_object != object) {
1974						/* not the same VM object... */
1975						vm_object_t obj2;
1976
1977						obj2 = entry->object.vm_object;
1978						if ((obj2 == VM_OBJECT_NULL ||
1979						     obj2->internal) &&
1980						    (object == VM_OBJECT_NULL ||
1981						     object->internal)) {
1982							/*
1983							 * ... but both are
1984							 * anonymous memory,
1985							 * so equivalent.
1986							 */
1987						} else {
1988							RETURN(KERN_NO_SPACE);
1989						}
1990					}
1991				}
1992
1993				tmp_offset += entry->vme_end - entry->vme_start;
1994				tmp_start += entry->vme_end - entry->vme_start;
1995				if (entry->vme_end >= end) {
1996					/* reached the end of our mapping */
1997					break;
1998				}
1999			}
2000			/* it all matches:  let's use what's already there ! */
2001			RETURN(KERN_MEMORY_PRESENT);
2002		}
2003
2004		/*
2005		 *	...	the next region doesn't overlap the
2006		 *		end point.
2007		 */
2008
2009		if ((entry->vme_next != vm_map_to_entry(map)) &&
2010		    (entry->vme_next->vme_start < end))
2011			RETURN(KERN_NO_SPACE);
2012	}
2013
2014	/*
2015	 *	At this point,
2016	 *		"start" and "end" should define the endpoints of the
2017	 *			available new range, and
2018	 *		"entry" should refer to the region before the new
2019	 *			range, and
2020	 *
2021	 *		the map should be locked.
2022	 */
2023
2024	/*
2025	 *	See whether we can avoid creating a new entry (and object) by
2026	 *	extending one of our neighbors.  [So far, we only attempt to
2027	 *	extend from below.]  Note that we can never extend/join
2028	 *	purgable objects because they need to remain distinct
2029	 *	entities in order to implement their "volatile object"
2030	 *	semantics.
2031	 */
2032
2033	if (purgable || entry_for_jit) {
2034		if (object == VM_OBJECT_NULL) {
2035			object = vm_object_allocate(size);
2036			object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2037			object->true_share = TRUE;
2038			if (purgable) {
2039				task_t owner;
2040				object->purgable = VM_PURGABLE_NONVOLATILE;
2041				if (map->pmap == kernel_pmap) {
2042					/*
2043					 * Purgeable mappings made in a kernel
2044					 * map are "owned" by the kernel itself
2045					 * rather than the current user task
2046					 * because they're likely to be used by
2047					 * more than this user task (see
2048					 * execargs_purgeable_allocate(), for
2049					 * example).
2050					 */
2051					owner = kernel_task;
2052				} else {
2053					owner = current_task();
2054				}
2055				assert(object->vo_purgeable_owner == NULL);
2056				assert(object->resident_page_count == 0);
2057				assert(object->wired_page_count == 0);
2058				vm_object_lock(object);
2059				vm_purgeable_nonvolatile_enqueue(object, owner);
2060				vm_object_unlock(object);
2061			}
2062			offset = (vm_object_offset_t)0;
2063		}
2064	} else if ((is_submap == FALSE) &&
2065		   (object == VM_OBJECT_NULL) &&
2066		   (entry != vm_map_to_entry(map)) &&
2067		   (entry->vme_end == start) &&
2068		   (!entry->is_shared) &&
2069		   (!entry->is_sub_map) &&
2070		   (!entry->in_transition) &&
2071		   (!entry->needs_wakeup) &&
2072		   (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2073		   (entry->protection == cur_protection) &&
2074		   (entry->max_protection == max_protection) &&
2075		   (entry->inheritance == inheritance) &&
2076		   ((alias == VM_MEMORY_REALLOC) || (entry->alias == alias)) &&
2077		   (entry->no_cache == no_cache) &&
2078		   (entry->permanent == permanent) &&
2079		   (!entry->superpage_size && !superpage_size) &&
2080		   /*
2081		    * No coalescing if not map-aligned, to avoid propagating
2082		    * that condition any further than needed:
2083		    */
2084		   (!entry->map_aligned || !clear_map_aligned) &&
2085		   (!entry->zero_wired_pages) &&
2086		   (!entry->used_for_jit && !entry_for_jit) &&
2087		   (entry->iokit_acct == iokit_acct) &&
2088
2089		   ((entry->vme_end - entry->vme_start) + size <=
2090		    (alias == VM_MEMORY_REALLOC ?
2091		     ANON_CHUNK_SIZE :
2092		     NO_COALESCE_LIMIT)) &&
2093
2094		   (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2095		if (vm_object_coalesce(entry->object.vm_object,
2096				       VM_OBJECT_NULL,
2097				       entry->offset,
2098				       (vm_object_offset_t) 0,
2099				       (vm_map_size_t)(entry->vme_end - entry->vme_start),
2100				       (vm_map_size_t)(end - entry->vme_end))) {
2101
2102			/*
2103			 *	Coalesced the two objects - can extend
2104			 *	the previous map entry to include the
2105			 *	new range.
2106			 */
2107			map->size += (end - entry->vme_end);
2108			assert(entry->vme_start < end);
2109			assert(VM_MAP_PAGE_ALIGNED(end,
2110						   VM_MAP_PAGE_MASK(map)));
2111			entry->vme_end = end;
2112			vm_map_store_update_first_free(map, map->first_free);
2113			new_mapping_established = TRUE;
2114			RETURN(KERN_SUCCESS);
2115		}
2116	}
2117
2118	step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2119	new_entry = NULL;
2120
2121	for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2122		tmp2_end = tmp2_start + step;
2123		/*
2124		 *	Create a new entry
2125		 *	LP64todo - for now, we can only allocate 4GB internal objects
2126		 *	because the default pager can't page bigger ones.  Remove this
2127		 *	when it can.
2128		 *
2129		 * XXX FBDP
2130		 * The reserved "page zero" in each process's address space can
2131		 * be arbitrarily large.  Splitting it into separate 4GB objects and
2132		 * therefore different VM map entries serves no purpose and just
2133		 * slows down operations on the VM map, so let's not split the
2134		 * allocation into 4GB chunks if the max protection is NONE.  That
2135		 * memory should never be accessible, so it will never get to the
2136		 * default pager.
2137		 */
2138		tmp_start = tmp2_start;
2139		if (object == VM_OBJECT_NULL &&
2140		    size > (vm_map_size_t)ANON_CHUNK_SIZE &&
2141		    max_protection != VM_PROT_NONE &&
2142		    superpage_size == 0)
2143			tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
2144		else
2145			tmp_end = tmp2_end;
2146		do {
2147			new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
2148							object,	offset, needs_copy,
2149							FALSE, FALSE,
2150							cur_protection, max_protection,
2151							VM_BEHAVIOR_DEFAULT,
2152							(entry_for_jit)? VM_INHERIT_NONE: inheritance,
2153							0, no_cache,
2154							permanent,
2155							superpage_size,
2156							clear_map_aligned,
2157							is_submap);
2158			new_entry->alias = alias;
2159			if (entry_for_jit){
2160				if (!(map->jit_entry_exists)){
2161					new_entry->used_for_jit = TRUE;
2162					map->jit_entry_exists = TRUE;
2163				}
2164			}
2165
2166			assert(!new_entry->iokit_acct);
2167			if (!is_submap &&
2168			    object != VM_OBJECT_NULL &&
2169			    object->purgable != VM_PURGABLE_DENY) {
2170				assert(new_entry->use_pmap);
2171				assert(!new_entry->iokit_acct);
2172				/*
2173				 * Turn off pmap accounting since
2174				 * purgeable objects have their
2175				 * own ledgers.
2176				 */
2177				new_entry->use_pmap = FALSE;
2178			} else if (!is_submap &&
2179				   iokit_acct) {
2180				/* alternate accounting */
2181				assert(!new_entry->iokit_acct);
2182				assert(new_entry->use_pmap);
2183				new_entry->iokit_acct = TRUE;
2184				new_entry->use_pmap = FALSE;
2185				vm_map_iokit_mapped_region(
2186					map,
2187					(new_entry->vme_end -
2188					 new_entry->vme_start));
2189			} else if (!is_submap) {
2190				assert(!new_entry->iokit_acct);
2191				assert(new_entry->use_pmap);
2192			}
2193
2194			if (is_submap) {
2195				vm_map_t	submap;
2196				boolean_t	submap_is_64bit;
2197				boolean_t	use_pmap;
2198
2199				assert(new_entry->is_sub_map);
2200				assert(!new_entry->use_pmap);
2201				assert(!new_entry->iokit_acct);
2202				submap = (vm_map_t) object;
2203				submap_is_64bit = vm_map_is_64bit(submap);
2204				use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
2205#ifndef NO_NESTED_PMAP
2206				if (use_pmap && submap->pmap == NULL) {
2207					ledger_t ledger = map->pmap->ledger;
2208					/* we need a sub pmap to nest... */
2209					submap->pmap = pmap_create(ledger, 0,
2210					    submap_is_64bit);
2211					if (submap->pmap == NULL) {
2212						/* let's proceed without nesting... */
2213					}
2214				}
2215				if (use_pmap && submap->pmap != NULL) {
2216					kr = pmap_nest(map->pmap,
2217						       submap->pmap,
2218						       tmp_start,
2219						       tmp_start,
2220						       tmp_end - tmp_start);
2221					if (kr != KERN_SUCCESS) {
2222						printf("vm_map_enter: "
2223						       "pmap_nest(0x%llx,0x%llx) "
2224						       "error 0x%x\n",
2225						       (long long)tmp_start,
2226						       (long long)tmp_end,
2227						       kr);
2228					} else {
2229						/* we're now nested ! */
2230						new_entry->use_pmap = TRUE;
2231						pmap_empty = FALSE;
2232					}
2233				}
2234#endif /* NO_NESTED_PMAP */
2235			}
2236			entry = new_entry;
2237
2238			if (superpage_size) {
2239				vm_page_t pages, m;
2240				vm_object_t sp_object;
2241
2242				entry->offset = 0;
2243
2244				/* allocate one superpage */
2245				kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2246				if (kr != KERN_SUCCESS) {
2247					new_mapping_established = TRUE; /* will cause deallocation of whole range */
2248					RETURN(kr);
2249				}
2250
2251				/* create one vm_object per superpage */
2252				sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2253				sp_object->phys_contiguous = TRUE;
2254				sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
2255				entry->object.vm_object = sp_object;
2256				assert(entry->use_pmap);
2257
2258				/* enter the base pages into the object */
2259				vm_object_lock(sp_object);
2260				for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2261					m = pages;
2262					pmap_zero_page(m->phys_page);
2263					pages = NEXT_PAGE(m);
2264					*(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2265					vm_page_insert(m, sp_object, offset);
2266				}
2267				vm_object_unlock(sp_object);
2268			}
2269		} while (tmp_end != tmp2_end &&
2270			 (tmp_start = tmp_end) &&
2271			 (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2272			  tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2273	}
2274
2275	new_mapping_established = TRUE;
2276
2277BailOut:
2278	assert(map_locked == TRUE);
2279
2280	if (result == KERN_SUCCESS) {
2281		vm_prot_t pager_prot;
2282		memory_object_t pager;
2283
2284#if DEBUG
2285		if (pmap_empty &&
2286		    !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2287			assert(vm_map_pmap_is_empty(map,
2288						    *address,
2289						    *address+size));
2290		}
2291#endif /* DEBUG */
2292
2293		/*
2294		 * For "named" VM objects, let the pager know that the
2295		 * memory object is being mapped.  Some pagers need to keep
2296		 * track of this, to know when they can reclaim the memory
2297		 * object, for example.
2298		 * VM calls memory_object_map() for each mapping (specifying
2299		 * the protection of each mapping) and calls
2300		 * memory_object_last_unmap() when all the mappings are gone.
2301		 */
2302		pager_prot = max_protection;
2303		if (needs_copy) {
2304			/*
2305			 * Copy-On-Write mapping: won't modify
2306			 * the memory object.
2307			 */
2308			pager_prot &= ~VM_PROT_WRITE;
2309		}
2310		if (!is_submap &&
2311		    object != VM_OBJECT_NULL &&
2312		    object->named &&
2313		    object->pager != MEMORY_OBJECT_NULL) {
2314			vm_object_lock(object);
2315			pager = object->pager;
2316			if (object->named &&
2317			    pager != MEMORY_OBJECT_NULL) {
2318				assert(object->pager_ready);
2319				vm_object_mapping_wait(object, THREAD_UNINT);
2320				vm_object_mapping_begin(object);
2321				vm_object_unlock(object);
2322
2323				kr = memory_object_map(pager, pager_prot);
2324				assert(kr == KERN_SUCCESS);
2325
2326				vm_object_lock(object);
2327				vm_object_mapping_end(object);
2328			}
2329			vm_object_unlock(object);
2330		}
2331	}
2332
2333	assert(map_locked == TRUE);
2334
2335	if (!keep_map_locked) {
2336		vm_map_unlock(map);
2337		map_locked = FALSE;
2338	}
2339
2340	/*
2341	 * We can't hold the map lock if we enter this block.
2342	 */
2343
2344	if (result == KERN_SUCCESS) {
2345
2346		/*	Wire down the new entry if the user
2347		 *	requested all new map entries be wired.
2348		 */
2349		if ((map->wiring_required)||(superpage_size)) {
2350			assert(!keep_map_locked);
2351			pmap_empty = FALSE; /* pmap won't be empty */
2352			kr = vm_map_wire(map, start, end,
2353					     new_entry->protection, TRUE);
2354			result = kr;
2355		}
2356
2357	}
2358
2359	if (result != KERN_SUCCESS) {
2360		if (new_mapping_established) {
2361			/*
2362			 * We have to get rid of the new mappings since we
2363			 * won't make them available to the user.
2364			 * Try and do that atomically, to minimize the risk
2365			 * that someone else create new mappings that range.
2366			 */
2367			zap_new_map = vm_map_create(PMAP_NULL,
2368						    *address,
2369						    *address + size,
2370						    map->hdr.entries_pageable);
2371			vm_map_set_page_shift(zap_new_map,
2372					      VM_MAP_PAGE_SHIFT(map));
2373			if (!map_locked) {
2374				vm_map_lock(map);
2375				map_locked = TRUE;
2376			}
2377			(void) vm_map_delete(map, *address, *address+size,
2378					     (VM_MAP_REMOVE_SAVE_ENTRIES |
2379					      VM_MAP_REMOVE_NO_MAP_ALIGN),
2380					     zap_new_map);
2381		}
2382		if (zap_old_map != VM_MAP_NULL &&
2383		    zap_old_map->hdr.nentries != 0) {
2384			vm_map_entry_t	entry1, entry2;
2385
2386			/*
2387			 * The new mapping failed.  Attempt to restore
2388			 * the old mappings, saved in the "zap_old_map".
2389			 */
2390			if (!map_locked) {
2391				vm_map_lock(map);
2392				map_locked = TRUE;
2393			}
2394
2395			/* first check if the coast is still clear */
2396			start = vm_map_first_entry(zap_old_map)->vme_start;
2397			end = vm_map_last_entry(zap_old_map)->vme_end;
2398			if (vm_map_lookup_entry(map, start, &entry1) ||
2399			    vm_map_lookup_entry(map, end, &entry2) ||
2400			    entry1 != entry2) {
2401				/*
2402				 * Part of that range has already been
2403				 * re-mapped:  we can't restore the old
2404				 * mappings...
2405				 */
2406				vm_map_enter_restore_failures++;
2407			} else {
2408				/*
2409				 * Transfer the saved map entries from
2410				 * "zap_old_map" to the original "map",
2411				 * inserting them all after "entry1".
2412				 */
2413				for (entry2 = vm_map_first_entry(zap_old_map);
2414				     entry2 != vm_map_to_entry(zap_old_map);
2415				     entry2 = vm_map_first_entry(zap_old_map)) {
2416					vm_map_size_t entry_size;
2417
2418					entry_size = (entry2->vme_end -
2419						      entry2->vme_start);
2420					vm_map_store_entry_unlink(zap_old_map,
2421							    entry2);
2422					zap_old_map->size -= entry_size;
2423					vm_map_store_entry_link(map, entry1, entry2);
2424					map->size += entry_size;
2425					entry1 = entry2;
2426				}
2427				if (map->wiring_required) {
2428					/*
2429					 * XXX TODO: we should rewire the
2430					 * old pages here...
2431					 */
2432				}
2433				vm_map_enter_restore_successes++;
2434			}
2435		}
2436	}
2437
2438	/*
2439	 * The caller is responsible for releasing the lock if it requested to
2440	 * keep the map locked.
2441	 */
2442	if (map_locked && !keep_map_locked) {
2443		vm_map_unlock(map);
2444	}
2445
2446	/*
2447	 * Get rid of the "zap_maps" and all the map entries that
2448	 * they may still contain.
2449	 */
2450	if (zap_old_map != VM_MAP_NULL) {
2451		vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2452		zap_old_map = VM_MAP_NULL;
2453	}
2454	if (zap_new_map != VM_MAP_NULL) {
2455		vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2456		zap_new_map = VM_MAP_NULL;
2457	}
2458
2459	return result;
2460
2461#undef	RETURN
2462}
2463
2464/*
2465 * Counters for the prefault optimization.
2466 */
2467int64_t vm_prefault_nb_pages = 0;
2468int64_t vm_prefault_nb_bailout = 0;
2469
2470static kern_return_t
2471vm_map_enter_mem_object_helper(
2472	vm_map_t		target_map,
2473	vm_map_offset_t		*address,
2474	vm_map_size_t		initial_size,
2475	vm_map_offset_t		mask,
2476	int			flags,
2477	ipc_port_t		port,
2478	vm_object_offset_t	offset,
2479	boolean_t		copy,
2480	vm_prot_t		cur_protection,
2481	vm_prot_t		max_protection,
2482	vm_inherit_t		inheritance,
2483	upl_page_list_ptr_t	page_list,
2484	unsigned int		page_list_count)
2485{
2486	vm_map_address_t	map_addr;
2487	vm_map_size_t		map_size;
2488	vm_object_t		object;
2489	vm_object_size_t	size;
2490	kern_return_t		result;
2491	boolean_t		mask_cur_protection, mask_max_protection;
2492	boolean_t		try_prefault = (page_list_count != 0);
2493	vm_map_offset_t		offset_in_mapping;
2494
2495	mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2496	mask_max_protection = max_protection & VM_PROT_IS_MASK;
2497	cur_protection &= ~VM_PROT_IS_MASK;
2498	max_protection &= ~VM_PROT_IS_MASK;
2499
2500	/*
2501	 * Check arguments for validity
2502	 */
2503	if ((target_map == VM_MAP_NULL) ||
2504	    (cur_protection & ~VM_PROT_ALL) ||
2505	    (max_protection & ~VM_PROT_ALL) ||
2506	    (inheritance > VM_INHERIT_LAST_VALID) ||
2507	    (try_prefault && (copy || !page_list)) ||
2508	    initial_size == 0)
2509		return KERN_INVALID_ARGUMENT;
2510
2511	map_addr = vm_map_trunc_page(*address,
2512				     VM_MAP_PAGE_MASK(target_map));
2513	map_size = vm_map_round_page(initial_size,
2514				     VM_MAP_PAGE_MASK(target_map));
2515	size = vm_object_round_page(initial_size);
2516
2517	/*
2518	 * Find the vm object (if any) corresponding to this port.
2519	 */
2520	if (!IP_VALID(port)) {
2521		object = VM_OBJECT_NULL;
2522		offset = 0;
2523		copy = FALSE;
2524	} else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2525		vm_named_entry_t	named_entry;
2526
2527		named_entry = (vm_named_entry_t) port->ip_kobject;
2528
2529		if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2530			offset += named_entry->data_offset;
2531		}
2532
2533		/* a few checks to make sure user is obeying rules */
2534		if (size == 0) {
2535			if (offset >= named_entry->size)
2536				return KERN_INVALID_RIGHT;
2537			size = named_entry->size - offset;
2538		}
2539		if (mask_max_protection) {
2540			max_protection &= named_entry->protection;
2541		}
2542		if (mask_cur_protection) {
2543			cur_protection &= named_entry->protection;
2544		}
2545		if ((named_entry->protection & max_protection) !=
2546		    max_protection)
2547			return KERN_INVALID_RIGHT;
2548		if ((named_entry->protection & cur_protection) !=
2549		    cur_protection)
2550			return KERN_INVALID_RIGHT;
2551		if (offset + size < offset) {
2552			/* overflow */
2553			return KERN_INVALID_ARGUMENT;
2554		}
2555		if (named_entry->size < (offset + size))
2556			return KERN_INVALID_ARGUMENT;
2557
2558		if (named_entry->is_copy) {
2559			/* for a vm_map_copy, we can only map it whole */
2560			if ((size != named_entry->size) &&
2561			    (vm_map_round_page(size,
2562					       VM_MAP_PAGE_MASK(target_map)) ==
2563			     named_entry->size)) {
2564				/* XXX FBDP use the rounded size... */
2565				size = vm_map_round_page(
2566					size,
2567					VM_MAP_PAGE_MASK(target_map));
2568			}
2569
2570			if (!(flags & VM_FLAGS_ANYWHERE) &&
2571			    (offset != 0 ||
2572			     size != named_entry->size)) {
2573				/*
2574				 * XXX for a mapping at a "fixed" address,
2575				 * we can't trim after mapping the whole
2576				 * memory entry, so reject a request for a
2577				 * partial mapping.
2578				 */
2579				return KERN_INVALID_ARGUMENT;
2580			}
2581		}
2582
2583		/* the callers parameter offset is defined to be the */
2584		/* offset from beginning of named entry offset in object */
2585		offset = offset + named_entry->offset;
2586
2587		if (! VM_MAP_PAGE_ALIGNED(size,
2588					  VM_MAP_PAGE_MASK(target_map))) {
2589			/*
2590			 * Let's not map more than requested;
2591			 * vm_map_enter() will handle this "not map-aligned"
2592			 * case.
2593			 */
2594			map_size = size;
2595		}
2596
2597		named_entry_lock(named_entry);
2598		if (named_entry->is_sub_map) {
2599			vm_map_t		submap;
2600
2601			if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2602				panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
2603			}
2604
2605			submap = named_entry->backing.map;
2606			vm_map_lock(submap);
2607			vm_map_reference(submap);
2608			vm_map_unlock(submap);
2609			named_entry_unlock(named_entry);
2610
2611			result = vm_map_enter(target_map,
2612					      &map_addr,
2613					      map_size,
2614					      mask,
2615					      flags | VM_FLAGS_SUBMAP,
2616					      (vm_object_t) submap,
2617					      offset,
2618					      copy,
2619					      cur_protection,
2620					      max_protection,
2621					      inheritance);
2622			if (result != KERN_SUCCESS) {
2623				vm_map_deallocate(submap);
2624			} else {
2625				/*
2626				 * No need to lock "submap" just to check its
2627				 * "mapped" flag: that flag is never reset
2628				 * once it's been set and if we race, we'll
2629				 * just end up setting it twice, which is OK.
2630				 */
2631				if (submap->mapped_in_other_pmaps == FALSE &&
2632				    vm_map_pmap(submap) != PMAP_NULL &&
2633				    vm_map_pmap(submap) !=
2634				    vm_map_pmap(target_map)) {
2635					/*
2636					 * This submap is being mapped in a map
2637					 * that uses a different pmap.
2638					 * Set its "mapped_in_other_pmaps" flag
2639					 * to indicate that we now need to
2640					 * remove mappings from all pmaps rather
2641					 * than just the submap's pmap.
2642					 */
2643					vm_map_lock(submap);
2644					submap->mapped_in_other_pmaps = TRUE;
2645					vm_map_unlock(submap);
2646				}
2647				*address = map_addr;
2648			}
2649			return result;
2650
2651		} else if (named_entry->is_pager) {
2652			unsigned int	access;
2653			vm_prot_t	protections;
2654			unsigned int	wimg_mode;
2655
2656			protections = named_entry->protection & VM_PROT_ALL;
2657			access = GET_MAP_MEM(named_entry->protection);
2658
2659			if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2660				panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
2661			}
2662
2663			object = vm_object_enter(named_entry->backing.pager,
2664						 named_entry->size,
2665						 named_entry->internal,
2666						 FALSE,
2667						 FALSE);
2668			if (object == VM_OBJECT_NULL) {
2669				named_entry_unlock(named_entry);
2670				return KERN_INVALID_OBJECT;
2671			}
2672
2673			/* JMM - drop reference on pager here */
2674
2675			/* create an extra ref for the named entry */
2676			vm_object_lock(object);
2677			vm_object_reference_locked(object);
2678			named_entry->backing.object = object;
2679			named_entry->is_pager = FALSE;
2680			named_entry_unlock(named_entry);
2681
2682			wimg_mode = object->wimg_bits;
2683
2684			if (access == MAP_MEM_IO) {
2685				wimg_mode = VM_WIMG_IO;
2686			} else if (access == MAP_MEM_COPYBACK) {
2687				wimg_mode = VM_WIMG_USE_DEFAULT;
2688			} else if (access == MAP_MEM_INNERWBACK) {
2689				wimg_mode = VM_WIMG_INNERWBACK;
2690			} else if (access == MAP_MEM_WTHRU) {
2691				wimg_mode = VM_WIMG_WTHRU;
2692			} else if (access == MAP_MEM_WCOMB) {
2693				wimg_mode = VM_WIMG_WCOMB;
2694			}
2695
2696			/* wait for object (if any) to be ready */
2697			if (!named_entry->internal) {
2698				while (!object->pager_ready) {
2699					vm_object_wait(
2700						object,
2701						VM_OBJECT_EVENT_PAGER_READY,
2702						THREAD_UNINT);
2703					vm_object_lock(object);
2704				}
2705			}
2706
2707			if (object->wimg_bits != wimg_mode)
2708				vm_object_change_wimg_mode(object, wimg_mode);
2709
2710#if VM_OBJECT_TRACKING_OP_TRUESHARE
2711			if (!object->true_share &&
2712			    vm_object_tracking_inited) {
2713				void *bt[VM_OBJECT_TRACKING_BTDEPTH];
2714				int num = 0;
2715
2716				num = OSBacktrace(bt,
2717						  VM_OBJECT_TRACKING_BTDEPTH);
2718				btlog_add_entry(vm_object_tracking_btlog,
2719						object,
2720						VM_OBJECT_TRACKING_OP_TRUESHARE,
2721						bt,
2722						num);
2723			}
2724#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
2725
2726			object->true_share = TRUE;
2727
2728			if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2729				object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2730			vm_object_unlock(object);
2731
2732		} else if (named_entry->is_copy) {
2733			kern_return_t	kr;
2734			vm_map_copy_t	copy_map;
2735			vm_map_entry_t	copy_entry;
2736			vm_map_offset_t	copy_addr;
2737
2738			if (flags & ~(VM_FLAGS_FIXED |
2739				      VM_FLAGS_ANYWHERE |
2740				      VM_FLAGS_OVERWRITE |
2741				      VM_FLAGS_RETURN_DATA_ADDR)) {
2742				named_entry_unlock(named_entry);
2743				return KERN_INVALID_ARGUMENT;
2744			}
2745
2746			if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2747				offset_in_mapping = offset - vm_object_trunc_page(offset);
2748				offset = vm_object_trunc_page(offset);
2749				map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
2750			}
2751
2752			copy_map = named_entry->backing.copy;
2753			assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
2754			if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
2755				/* unsupported type; should not happen */
2756				printf("vm_map_enter_mem_object: "
2757				       "memory_entry->backing.copy "
2758				       "unsupported type 0x%x\n",
2759				       copy_map->type);
2760				named_entry_unlock(named_entry);
2761				return KERN_INVALID_ARGUMENT;
2762			}
2763
2764			/* reserve a contiguous range */
2765			kr = vm_map_enter(target_map,
2766					  &map_addr,
2767					  /* map whole mem entry, trim later: */
2768					  named_entry->size,
2769					  mask,
2770					  flags & (VM_FLAGS_ANYWHERE |
2771						   VM_FLAGS_OVERWRITE |
2772						   VM_FLAGS_RETURN_DATA_ADDR),
2773					  VM_OBJECT_NULL,
2774					  0,
2775					  FALSE, /* copy */
2776					  cur_protection,
2777					  max_protection,
2778					  inheritance);
2779			if (kr != KERN_SUCCESS) {
2780				named_entry_unlock(named_entry);
2781				return kr;
2782			}
2783
2784			copy_addr = map_addr;
2785
2786			for (copy_entry = vm_map_copy_first_entry(copy_map);
2787			     copy_entry != vm_map_copy_to_entry(copy_map);
2788			     copy_entry = copy_entry->vme_next) {
2789				int			remap_flags = 0;
2790				vm_map_t		copy_submap;
2791				vm_object_t		copy_object;
2792				vm_map_size_t		copy_size;
2793				vm_object_offset_t	copy_offset;
2794
2795				copy_offset = copy_entry->offset;
2796				copy_size = (copy_entry->vme_end -
2797					     copy_entry->vme_start);
2798
2799				/* sanity check */
2800				if ((copy_addr + copy_size) >
2801				    (map_addr +
2802				     named_entry->size /* XXX full size */ )) {
2803					/* over-mapping too much !? */
2804					kr = KERN_INVALID_ARGUMENT;
2805					/* abort */
2806					break;
2807				}
2808
2809				/* take a reference on the object */
2810				if (copy_entry->is_sub_map) {
2811					remap_flags |= VM_FLAGS_SUBMAP;
2812					copy_submap =
2813						copy_entry->object.sub_map;
2814					vm_map_lock(copy_submap);
2815					vm_map_reference(copy_submap);
2816					vm_map_unlock(copy_submap);
2817					copy_object = (vm_object_t) copy_submap;
2818				} else {
2819					copy_object =
2820						copy_entry->object.vm_object;
2821					vm_object_reference(copy_object);
2822				}
2823
2824				/* over-map the object into destination */
2825				remap_flags |= flags;
2826				remap_flags |= VM_FLAGS_FIXED;
2827				remap_flags |= VM_FLAGS_OVERWRITE;
2828				remap_flags &= ~VM_FLAGS_ANYWHERE;
2829				kr = vm_map_enter(target_map,
2830						  &copy_addr,
2831						  copy_size,
2832						  (vm_map_offset_t) 0,
2833						  remap_flags,
2834						  copy_object,
2835						  copy_offset,
2836						  copy,
2837						  cur_protection,
2838						  max_protection,
2839						  inheritance);
2840				if (kr != KERN_SUCCESS) {
2841					if (copy_entry->is_sub_map) {
2842						vm_map_deallocate(copy_submap);
2843					} else {
2844						vm_object_deallocate(copy_object);
2845					}
2846					/* abort */
2847					break;
2848				}
2849
2850				/* next mapping */
2851				copy_addr += copy_size;
2852			}
2853
2854			if (kr == KERN_SUCCESS) {
2855				if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2856					*address = map_addr + offset_in_mapping;
2857				} else {
2858					*address = map_addr;
2859				}
2860
2861				if (offset) {
2862					/*
2863					 * Trim in front, from 0 to "offset".
2864					 */
2865					vm_map_remove(target_map,
2866						      map_addr,
2867						      map_addr + offset,
2868						      0);
2869					*address += offset;
2870				}
2871				if (offset + map_size < named_entry->size) {
2872					/*
2873					 * Trim in back, from
2874					 * "offset + map_size" to
2875					 * "named_entry->size".
2876					 */
2877					vm_map_remove(target_map,
2878						      (map_addr +
2879						       offset + map_size),
2880						      (map_addr +
2881						       named_entry->size),
2882						      0);
2883				}
2884			}
2885			named_entry_unlock(named_entry);
2886
2887			if (kr != KERN_SUCCESS) {
2888				if (! (flags & VM_FLAGS_OVERWRITE)) {
2889					/* deallocate the contiguous range */
2890					(void) vm_deallocate(target_map,
2891							     map_addr,
2892							     map_size);
2893				}
2894			}
2895
2896			return kr;
2897
2898		} else {
2899			/* This is the case where we are going to map */
2900			/* an already mapped object.  If the object is */
2901			/* not ready it is internal.  An external     */
2902			/* object cannot be mapped until it is ready  */
2903			/* we can therefore avoid the ready check     */
2904			/* in this case.  */
2905			if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2906				offset_in_mapping = offset - vm_object_trunc_page(offset);
2907				offset = vm_object_trunc_page(offset);
2908				map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
2909			}
2910
2911			object = named_entry->backing.object;
2912			assert(object != VM_OBJECT_NULL);
2913			named_entry_unlock(named_entry);
2914			vm_object_reference(object);
2915		}
2916	} else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2917		/*
2918		 * JMM - This is temporary until we unify named entries
2919		 * and raw memory objects.
2920		 *
2921		 * Detected fake ip_kotype for a memory object.  In
2922		 * this case, the port isn't really a port at all, but
2923		 * instead is just a raw memory object.
2924		 */
2925		if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2926			panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
2927		}
2928
2929		object = vm_object_enter((memory_object_t)port,
2930					 size, FALSE, FALSE, FALSE);
2931		if (object == VM_OBJECT_NULL)
2932			return KERN_INVALID_OBJECT;
2933
2934		/* wait for object (if any) to be ready */
2935		if (object != VM_OBJECT_NULL) {
2936			if (object == kernel_object) {
2937				printf("Warning: Attempt to map kernel object"
2938					" by a non-private kernel entity\n");
2939				return KERN_INVALID_OBJECT;
2940			}
2941			if (!object->pager_ready) {
2942				vm_object_lock(object);
2943
2944				while (!object->pager_ready) {
2945					vm_object_wait(object,
2946						       VM_OBJECT_EVENT_PAGER_READY,
2947						       THREAD_UNINT);
2948					vm_object_lock(object);
2949				}
2950				vm_object_unlock(object);
2951			}
2952		}
2953	} else {
2954		return KERN_INVALID_OBJECT;
2955	}
2956
2957	if (object != VM_OBJECT_NULL &&
2958	    object->named &&
2959	    object->pager != MEMORY_OBJECT_NULL &&
2960	    object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2961		memory_object_t pager;
2962		vm_prot_t	pager_prot;
2963		kern_return_t	kr;
2964
2965		/*
2966		 * For "named" VM objects, let the pager know that the
2967		 * memory object is being mapped.  Some pagers need to keep
2968		 * track of this, to know when they can reclaim the memory
2969		 * object, for example.
2970		 * VM calls memory_object_map() for each mapping (specifying
2971		 * the protection of each mapping) and calls
2972		 * memory_object_last_unmap() when all the mappings are gone.
2973		 */
2974		pager_prot = max_protection;
2975		if (copy) {
2976			/*
2977			 * Copy-On-Write mapping: won't modify the
2978			 * memory object.
2979			 */
2980			pager_prot &= ~VM_PROT_WRITE;
2981		}
2982		vm_object_lock(object);
2983		pager = object->pager;
2984		if (object->named &&
2985		    pager != MEMORY_OBJECT_NULL &&
2986		    object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2987			assert(object->pager_ready);
2988			vm_object_mapping_wait(object, THREAD_UNINT);
2989			vm_object_mapping_begin(object);
2990			vm_object_unlock(object);
2991
2992			kr = memory_object_map(pager, pager_prot);
2993			assert(kr == KERN_SUCCESS);
2994
2995			vm_object_lock(object);
2996			vm_object_mapping_end(object);
2997		}
2998		vm_object_unlock(object);
2999	}
3000
3001	/*
3002	 *	Perform the copy if requested
3003	 */
3004
3005	if (copy) {
3006		vm_object_t		new_object;
3007		vm_object_offset_t	new_offset;
3008
3009		result = vm_object_copy_strategically(object, offset, size,
3010						      &new_object, &new_offset,
3011						      &copy);
3012
3013
3014		if (result == KERN_MEMORY_RESTART_COPY) {
3015			boolean_t success;
3016			boolean_t src_needs_copy;
3017
3018			/*
3019			 * XXX
3020			 * We currently ignore src_needs_copy.
3021			 * This really is the issue of how to make
3022			 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3023			 * non-kernel users to use. Solution forthcoming.
3024			 * In the meantime, since we don't allow non-kernel
3025			 * memory managers to specify symmetric copy,
3026			 * we won't run into problems here.
3027			 */
3028			new_object = object;
3029			new_offset = offset;
3030			success = vm_object_copy_quickly(&new_object,
3031							 new_offset, size,
3032							 &src_needs_copy,
3033							 &copy);
3034			assert(success);
3035			result = KERN_SUCCESS;
3036		}
3037		/*
3038		 *	Throw away the reference to the
3039		 *	original object, as it won't be mapped.
3040		 */
3041
3042		vm_object_deallocate(object);
3043
3044		if (result != KERN_SUCCESS)
3045			return result;
3046
3047		object = new_object;
3048		offset = new_offset;
3049	}
3050
3051	/*
3052	 * If users want to try to prefault pages, the mapping and prefault
3053	 * needs to be atomic.
3054	 */
3055	if (try_prefault)
3056		flags |= VM_FLAGS_KEEP_MAP_LOCKED;
3057	result = vm_map_enter(target_map,
3058			      &map_addr, map_size,
3059			      (vm_map_offset_t)mask,
3060			      flags,
3061			      object, offset,
3062			      copy,
3063			      cur_protection, max_protection, inheritance);
3064	if (result != KERN_SUCCESS)
3065		vm_object_deallocate(object);
3066
3067	/*
3068	 * Try to prefault, and do not forget to release the vm map lock.
3069	 */
3070	if (result == KERN_SUCCESS && try_prefault) {
3071		mach_vm_address_t va = map_addr;
3072		kern_return_t kr = KERN_SUCCESS;
3073		unsigned int i = 0;
3074
3075		for (i = 0; i < page_list_count; ++i) {
3076			if (UPL_VALID_PAGE(page_list, i)) {
3077				/*
3078				 * If this function call failed, we should stop
3079				 * trying to optimize, other calls are likely
3080				 * going to fail too.
3081				 *
3082				 * We are not gonna report an error for such
3083				 * failure though. That's an optimization, not
3084				 * something critical.
3085				 */
3086				kr = pmap_enter_options(target_map->pmap,
3087				                        va, UPL_PHYS_PAGE(page_list, i),
3088				                        cur_protection, VM_PROT_NONE,
3089				                        0, TRUE, PMAP_OPTIONS_NOWAIT, NULL);
3090				if (kr != KERN_SUCCESS) {
3091					OSIncrementAtomic64(&vm_prefault_nb_bailout);
3092					goto BailOut;
3093				}
3094				OSIncrementAtomic64(&vm_prefault_nb_pages);
3095			}
3096
3097			/* Next virtual address */
3098			va += PAGE_SIZE;
3099		}
3100BailOut:
3101		vm_map_unlock(target_map);
3102	}
3103
3104	if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
3105		*address = map_addr + offset_in_mapping;
3106	} else {
3107		*address = map_addr;
3108	}
3109	return result;
3110}
3111
3112kern_return_t
3113vm_map_enter_mem_object(
3114	vm_map_t		target_map,
3115	vm_map_offset_t		*address,
3116	vm_map_size_t		initial_size,
3117	vm_map_offset_t		mask,
3118	int			flags,
3119	ipc_port_t		port,
3120	vm_object_offset_t	offset,
3121	boolean_t		copy,
3122	vm_prot_t		cur_protection,
3123	vm_prot_t		max_protection,
3124	vm_inherit_t		inheritance)
3125{
3126	return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3127	                                      port, offset, copy, cur_protection, max_protection,
3128	                                      inheritance, NULL, 0);
3129}
3130
3131kern_return_t
3132vm_map_enter_mem_object_prefault(
3133	vm_map_t		target_map,
3134	vm_map_offset_t		*address,
3135	vm_map_size_t		initial_size,
3136	vm_map_offset_t		mask,
3137	int			flags,
3138	ipc_port_t		port,
3139	vm_object_offset_t	offset,
3140	vm_prot_t		cur_protection,
3141	vm_prot_t		max_protection,
3142	upl_page_list_ptr_t	page_list,
3143	unsigned int		page_list_count)
3144{
3145	return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3146	                                      port, offset, FALSE, cur_protection, max_protection,
3147	                                      VM_INHERIT_DEFAULT, page_list, page_list_count);
3148}
3149
3150
3151kern_return_t
3152vm_map_enter_mem_object_control(
3153	vm_map_t		target_map,
3154	vm_map_offset_t		*address,
3155	vm_map_size_t		initial_size,
3156	vm_map_offset_t		mask,
3157	int			flags,
3158	memory_object_control_t	control,
3159	vm_object_offset_t	offset,
3160	boolean_t		copy,
3161	vm_prot_t		cur_protection,
3162	vm_prot_t		max_protection,
3163	vm_inherit_t		inheritance)
3164{
3165	vm_map_address_t	map_addr;
3166	vm_map_size_t		map_size;
3167	vm_object_t		object;
3168	vm_object_size_t	size;
3169	kern_return_t		result;
3170	memory_object_t		pager;
3171	vm_prot_t		pager_prot;
3172	kern_return_t		kr;
3173
3174	/*
3175	 * Check arguments for validity
3176	 */
3177	if ((target_map == VM_MAP_NULL) ||
3178	    (cur_protection & ~VM_PROT_ALL) ||
3179	    (max_protection & ~VM_PROT_ALL) ||
3180	    (inheritance > VM_INHERIT_LAST_VALID) ||
3181	    initial_size == 0)
3182		return KERN_INVALID_ARGUMENT;
3183
3184	map_addr = vm_map_trunc_page(*address,
3185				     VM_MAP_PAGE_MASK(target_map));
3186	map_size = vm_map_round_page(initial_size,
3187				     VM_MAP_PAGE_MASK(target_map));
3188	size = vm_object_round_page(initial_size);
3189
3190	object = memory_object_control_to_vm_object(control);
3191
3192	if (object == VM_OBJECT_NULL)
3193		return KERN_INVALID_OBJECT;
3194
3195	if (object == kernel_object) {
3196		printf("Warning: Attempt to map kernel object"
3197		       " by a non-private kernel entity\n");
3198		return KERN_INVALID_OBJECT;
3199	}
3200
3201	vm_object_lock(object);
3202	object->ref_count++;
3203	vm_object_res_reference(object);
3204
3205	/*
3206	 * For "named" VM objects, let the pager know that the
3207	 * memory object is being mapped.  Some pagers need to keep
3208	 * track of this, to know when they can reclaim the memory
3209	 * object, for example.
3210	 * VM calls memory_object_map() for each mapping (specifying
3211	 * the protection of each mapping) and calls
3212	 * memory_object_last_unmap() when all the mappings are gone.
3213	 */
3214	pager_prot = max_protection;
3215	if (copy) {
3216		pager_prot &= ~VM_PROT_WRITE;
3217	}
3218	pager = object->pager;
3219	if (object->named &&
3220	    pager != MEMORY_OBJECT_NULL &&
3221	    object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3222		assert(object->pager_ready);
3223		vm_object_mapping_wait(object, THREAD_UNINT);
3224		vm_object_mapping_begin(object);
3225		vm_object_unlock(object);
3226
3227		kr = memory_object_map(pager, pager_prot);
3228		assert(kr == KERN_SUCCESS);
3229
3230		vm_object_lock(object);
3231		vm_object_mapping_end(object);
3232	}
3233	vm_object_unlock(object);
3234
3235	/*
3236	 *	Perform the copy if requested
3237	 */
3238
3239	if (copy) {
3240		vm_object_t		new_object;
3241		vm_object_offset_t	new_offset;
3242
3243		result = vm_object_copy_strategically(object, offset, size,
3244						      &new_object, &new_offset,
3245						      &copy);
3246
3247
3248		if (result == KERN_MEMORY_RESTART_COPY) {
3249			boolean_t success;
3250			boolean_t src_needs_copy;
3251
3252			/*
3253			 * XXX
3254			 * We currently ignore src_needs_copy.
3255			 * This really is the issue of how to make
3256			 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3257			 * non-kernel users to use. Solution forthcoming.
3258			 * In the meantime, since we don't allow non-kernel
3259			 * memory managers to specify symmetric copy,
3260			 * we won't run into problems here.
3261			 */
3262			new_object = object;
3263			new_offset = offset;
3264			success = vm_object_copy_quickly(&new_object,
3265							 new_offset, size,
3266							 &src_needs_copy,
3267							 &copy);
3268			assert(success);
3269			result = KERN_SUCCESS;
3270		}
3271		/*
3272		 *	Throw away the reference to the
3273		 *	original object, as it won't be mapped.
3274		 */
3275
3276		vm_object_deallocate(object);
3277
3278		if (result != KERN_SUCCESS)
3279			return result;
3280
3281		object = new_object;
3282		offset = new_offset;
3283	}
3284
3285	result = vm_map_enter(target_map,
3286			      &map_addr, map_size,
3287			      (vm_map_offset_t)mask,
3288			      flags,
3289			      object, offset,
3290			      copy,
3291			      cur_protection, max_protection, inheritance);
3292	if (result != KERN_SUCCESS)
3293		vm_object_deallocate(object);
3294	*address = map_addr;
3295
3296	return result;
3297}
3298
3299
3300#if	VM_CPM
3301
3302#ifdef MACH_ASSERT
3303extern pmap_paddr_t	avail_start, avail_end;
3304#endif
3305
3306/*
3307 *	Allocate memory in the specified map, with the caveat that
3308 *	the memory is physically contiguous.  This call may fail
3309 *	if the system can't find sufficient contiguous memory.
3310 *	This call may cause or lead to heart-stopping amounts of
3311 *	paging activity.
3312 *
3313 *	Memory obtained from this call should be freed in the
3314 *	normal way, viz., via vm_deallocate.
3315 */
3316kern_return_t
3317vm_map_enter_cpm(
3318	vm_map_t		map,
3319	vm_map_offset_t	*addr,
3320	vm_map_size_t		size,
3321	int			flags)
3322{
3323	vm_object_t		cpm_obj;
3324	pmap_t			pmap;
3325	vm_page_t		m, pages;
3326	kern_return_t		kr;
3327	vm_map_offset_t		va, start, end, offset;
3328#if	MACH_ASSERT
3329	vm_map_offset_t		prev_addr = 0;
3330#endif	/* MACH_ASSERT */
3331
3332	boolean_t		anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
3333
3334	if (size == 0) {
3335		*addr = 0;
3336		return KERN_SUCCESS;
3337	}
3338	if (anywhere)
3339		*addr = vm_map_min(map);
3340	else
3341		*addr = vm_map_trunc_page(*addr,
3342					  VM_MAP_PAGE_MASK(map));
3343	size = vm_map_round_page(size,
3344				 VM_MAP_PAGE_MASK(map));
3345
3346	/*
3347	 * LP64todo - cpm_allocate should probably allow
3348	 * allocations of >4GB, but not with the current
3349	 * algorithm, so just cast down the size for now.
3350	 */
3351	if (size > VM_MAX_ADDRESS)
3352		return KERN_RESOURCE_SHORTAGE;
3353	if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
3354			       &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
3355		return kr;
3356
3357	cpm_obj = vm_object_allocate((vm_object_size_t)size);
3358	assert(cpm_obj != VM_OBJECT_NULL);
3359	assert(cpm_obj->internal);
3360	assert(cpm_obj->vo_size == (vm_object_size_t)size);
3361	assert(cpm_obj->can_persist == FALSE);
3362	assert(cpm_obj->pager_created == FALSE);
3363	assert(cpm_obj->pageout == FALSE);
3364	assert(cpm_obj->shadow == VM_OBJECT_NULL);
3365
3366	/*
3367	 *	Insert pages into object.
3368	 */
3369
3370	vm_object_lock(cpm_obj);
3371	for (offset = 0; offset < size; offset += PAGE_SIZE) {
3372		m = pages;
3373		pages = NEXT_PAGE(m);
3374		*(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3375
3376		assert(!m->gobbled);
3377		assert(!m->wanted);
3378		assert(!m->pageout);
3379		assert(!m->tabled);
3380		assert(VM_PAGE_WIRED(m));
3381		/*
3382		 * ENCRYPTED SWAP:
3383		 * "m" is not supposed to be pageable, so it
3384		 * should not be encrypted.  It wouldn't be safe
3385		 * to enter it in a new VM object while encrypted.
3386		 */
3387		ASSERT_PAGE_DECRYPTED(m);
3388		assert(m->busy);
3389		assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
3390
3391		m->busy = FALSE;
3392		vm_page_insert(m, cpm_obj, offset);
3393	}
3394	assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
3395	vm_object_unlock(cpm_obj);
3396
3397	/*
3398	 *	Hang onto a reference on the object in case a
3399	 *	multi-threaded application for some reason decides
3400	 *	to deallocate the portion of the address space into
3401	 *	which we will insert this object.
3402	 *
3403	 *	Unfortunately, we must insert the object now before
3404	 *	we can talk to the pmap module about which addresses
3405	 *	must be wired down.  Hence, the race with a multi-
3406	 *	threaded app.
3407	 */
3408	vm_object_reference(cpm_obj);
3409
3410	/*
3411	 *	Insert object into map.
3412	 */
3413
3414	kr = vm_map_enter(
3415		map,
3416		addr,
3417		size,
3418		(vm_map_offset_t)0,
3419		flags,
3420		cpm_obj,
3421		(vm_object_offset_t)0,
3422		FALSE,
3423		VM_PROT_ALL,
3424		VM_PROT_ALL,
3425		VM_INHERIT_DEFAULT);
3426
3427	if (kr != KERN_SUCCESS) {
3428		/*
3429		 *	A CPM object doesn't have can_persist set,
3430		 *	so all we have to do is deallocate it to
3431		 *	free up these pages.
3432		 */
3433		assert(cpm_obj->pager_created == FALSE);
3434		assert(cpm_obj->can_persist == FALSE);
3435		assert(cpm_obj->pageout == FALSE);
3436		assert(cpm_obj->shadow == VM_OBJECT_NULL);
3437		vm_object_deallocate(cpm_obj); /* kill acquired ref */
3438		vm_object_deallocate(cpm_obj); /* kill creation ref */
3439	}
3440
3441	/*
3442	 *	Inform the physical mapping system that the
3443	 *	range of addresses may not fault, so that
3444	 *	page tables and such can be locked down as well.
3445	 */
3446	start = *addr;
3447	end = start + size;
3448	pmap = vm_map_pmap(map);
3449	pmap_pageable(pmap, start, end, FALSE);
3450
3451	/*
3452	 *	Enter each page into the pmap, to avoid faults.
3453	 *	Note that this loop could be coded more efficiently,
3454	 *	if the need arose, rather than looking up each page
3455	 *	again.
3456	 */
3457	for (offset = 0, va = start; offset < size;
3458	     va += PAGE_SIZE, offset += PAGE_SIZE) {
3459	        int type_of_fault;
3460
3461		vm_object_lock(cpm_obj);
3462		m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3463		assert(m != VM_PAGE_NULL);
3464
3465		vm_page_zero_fill(m);
3466
3467		type_of_fault = DBG_ZERO_FILL_FAULT;
3468
3469		vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
3470			       VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, 0, NULL,
3471			       &type_of_fault);
3472
3473		vm_object_unlock(cpm_obj);
3474	}
3475
3476#if	MACH_ASSERT
3477	/*
3478	 *	Verify ordering in address space.
3479	 */
3480	for (offset = 0; offset < size; offset += PAGE_SIZE) {
3481		vm_object_lock(cpm_obj);
3482		m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3483		vm_object_unlock(cpm_obj);
3484		if (m == VM_PAGE_NULL)
3485			panic("vm_allocate_cpm:  obj %p off 0x%llx no page",
3486			      cpm_obj, (uint64_t)offset);
3487		assert(m->tabled);
3488		assert(!m->busy);
3489		assert(!m->wanted);
3490		assert(!m->fictitious);
3491		assert(!m->private);
3492		assert(!m->absent);
3493		assert(!m->error);
3494		assert(!m->cleaning);
3495		assert(!m->laundry);
3496		assert(!m->precious);
3497		assert(!m->clustered);
3498		if (offset != 0) {
3499			if (m->phys_page != prev_addr + 1) {
3500				printf("start 0x%llx end 0x%llx va 0x%llx\n",
3501				       (uint64_t)start, (uint64_t)end, (uint64_t)va);
3502				printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
3503				printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
3504				panic("vm_allocate_cpm:  pages not contig!");
3505			}
3506		}
3507		prev_addr = m->phys_page;
3508	}
3509#endif	/* MACH_ASSERT */
3510
3511	vm_object_deallocate(cpm_obj); /* kill extra ref */
3512
3513	return kr;
3514}
3515
3516
3517#else	/* VM_CPM */
3518
3519/*
3520 *	Interface is defined in all cases, but unless the kernel
3521 *	is built explicitly for this option, the interface does
3522 *	nothing.
3523 */
3524
3525kern_return_t
3526vm_map_enter_cpm(
3527	__unused vm_map_t	map,
3528	__unused vm_map_offset_t	*addr,
3529	__unused vm_map_size_t	size,
3530	__unused int		flags)
3531{
3532	return KERN_FAILURE;
3533}
3534#endif /* VM_CPM */
3535
3536/* Not used without nested pmaps */
3537#ifndef NO_NESTED_PMAP
3538/*
3539 * Clip and unnest a portion of a nested submap mapping.
3540 */
3541
3542
3543static void
3544vm_map_clip_unnest(
3545	vm_map_t	map,
3546	vm_map_entry_t	entry,
3547	vm_map_offset_t	start_unnest,
3548	vm_map_offset_t	end_unnest)
3549{
3550	vm_map_offset_t old_start_unnest = start_unnest;
3551	vm_map_offset_t old_end_unnest = end_unnest;
3552
3553	assert(entry->is_sub_map);
3554	assert(entry->object.sub_map != NULL);
3555	assert(entry->use_pmap);
3556
3557	/*
3558	 * Query the platform for the optimal unnest range.
3559	 * DRK: There's some duplication of effort here, since
3560	 * callers may have adjusted the range to some extent. This
3561	 * routine was introduced to support 1GiB subtree nesting
3562	 * for x86 platforms, which can also nest on 2MiB boundaries
3563	 * depending on size/alignment.
3564	 */
3565	if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
3566		log_unnest_badness(map, old_start_unnest, old_end_unnest);
3567	}
3568
3569	if (entry->vme_start > start_unnest ||
3570	    entry->vme_end < end_unnest) {
3571		panic("vm_map_clip_unnest(0x%llx,0x%llx): "
3572		      "bad nested entry: start=0x%llx end=0x%llx\n",
3573		      (long long)start_unnest, (long long)end_unnest,
3574		      (long long)entry->vme_start, (long long)entry->vme_end);
3575	}
3576
3577	if (start_unnest > entry->vme_start) {
3578		_vm_map_clip_start(&map->hdr,
3579				   entry,
3580				   start_unnest);
3581		vm_map_store_update_first_free(map, map->first_free);
3582	}
3583	if (entry->vme_end > end_unnest) {
3584		_vm_map_clip_end(&map->hdr,
3585				 entry,
3586				 end_unnest);
3587		vm_map_store_update_first_free(map, map->first_free);
3588	}
3589
3590	pmap_unnest(map->pmap,
3591		    entry->vme_start,
3592		    entry->vme_end - entry->vme_start);
3593	if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
3594		/* clean up parent map/maps */
3595		vm_map_submap_pmap_clean(
3596			map, entry->vme_start,
3597			entry->vme_end,
3598			entry->object.sub_map,
3599			entry->offset);
3600	}
3601	entry->use_pmap = FALSE;
3602	if (entry->alias == VM_MEMORY_SHARED_PMAP) {
3603		entry->alias = VM_MEMORY_UNSHARED_PMAP;
3604	}
3605}
3606#endif	/* NO_NESTED_PMAP */
3607
3608/*
3609 *	vm_map_clip_start:	[ internal use only ]
3610 *
3611 *	Asserts that the given entry begins at or after
3612 *	the specified address; if necessary,
3613 *	it splits the entry into two.
3614 */
3615void
3616vm_map_clip_start(
3617	vm_map_t	map,
3618	vm_map_entry_t	entry,
3619	vm_map_offset_t	startaddr)
3620{
3621#ifndef NO_NESTED_PMAP
3622	if (entry->is_sub_map &&
3623	    entry->use_pmap &&
3624	    startaddr >= entry->vme_start) {
3625		vm_map_offset_t	start_unnest, end_unnest;
3626
3627		/*
3628		 * Make sure "startaddr" is no longer in a nested range
3629		 * before we clip.  Unnest only the minimum range the platform
3630		 * can handle.
3631		 * vm_map_clip_unnest may perform additional adjustments to
3632		 * the unnest range.
3633		 */
3634		start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
3635		end_unnest = start_unnest + pmap_nesting_size_min;
3636		vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3637	}
3638#endif /* NO_NESTED_PMAP */
3639	if (startaddr > entry->vme_start) {
3640		if (entry->object.vm_object &&
3641		    !entry->is_sub_map &&
3642		    entry->object.vm_object->phys_contiguous) {
3643			pmap_remove(map->pmap,
3644				    (addr64_t)(entry->vme_start),
3645				    (addr64_t)(entry->vme_end));
3646		}
3647		_vm_map_clip_start(&map->hdr, entry, startaddr);
3648		vm_map_store_update_first_free(map, map->first_free);
3649	}
3650}
3651
3652
3653#define vm_map_copy_clip_start(copy, entry, startaddr) \
3654	MACRO_BEGIN \
3655	if ((startaddr) > (entry)->vme_start) \
3656		_vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
3657	MACRO_END
3658
3659/*
3660 *	This routine is called only when it is known that
3661 *	the entry must be split.
3662 */
3663static void
3664_vm_map_clip_start(
3665	register struct vm_map_header	*map_header,
3666	register vm_map_entry_t		entry,
3667	register vm_map_offset_t		start)
3668{
3669	register vm_map_entry_t	new_entry;
3670
3671	/*
3672	 *	Split off the front portion --
3673	 *	note that we must insert the new
3674	 *	entry BEFORE this one, so that
3675	 *	this entry has the specified starting
3676	 *	address.
3677	 */
3678
3679	if (entry->map_aligned) {
3680		assert(VM_MAP_PAGE_ALIGNED(start,
3681					   VM_MAP_HDR_PAGE_MASK(map_header)));
3682	}
3683
3684	new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
3685	vm_map_entry_copy_full(new_entry, entry);
3686
3687	new_entry->vme_end = start;
3688	assert(new_entry->vme_start < new_entry->vme_end);
3689	entry->offset += (start - entry->vme_start);
3690	assert(start < entry->vme_end);
3691	entry->vme_start = start;
3692
3693	_vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
3694
3695	if (entry->is_sub_map)
3696		vm_map_reference(new_entry->object.sub_map);
3697	else
3698		vm_object_reference(new_entry->object.vm_object);
3699}
3700
3701
3702/*
3703 *	vm_map_clip_end:	[ internal use only ]
3704 *
3705 *	Asserts that the given entry ends at or before
3706 *	the specified address; if necessary,
3707 *	it splits the entry into two.
3708 */
3709void
3710vm_map_clip_end(
3711	vm_map_t	map,
3712	vm_map_entry_t	entry,
3713	vm_map_offset_t	endaddr)
3714{
3715	if (endaddr > entry->vme_end) {
3716		/*
3717		 * Within the scope of this clipping, limit "endaddr" to
3718		 * the end of this map entry...
3719		 */
3720		endaddr = entry->vme_end;
3721	}
3722#ifndef NO_NESTED_PMAP
3723	if (entry->is_sub_map && entry->use_pmap) {
3724		vm_map_offset_t	start_unnest, end_unnest;
3725
3726		/*
3727		 * Make sure the range between the start of this entry and
3728		 * the new "endaddr" is no longer nested before we clip.
3729		 * Unnest only the minimum range the platform can handle.
3730		 * vm_map_clip_unnest may perform additional adjustments to
3731		 * the unnest range.
3732		 */
3733		start_unnest = entry->vme_start;
3734		end_unnest =
3735			(endaddr + pmap_nesting_size_min - 1) &
3736			~(pmap_nesting_size_min - 1);
3737		vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3738	}
3739#endif /* NO_NESTED_PMAP */
3740	if (endaddr < entry->vme_end) {
3741		if (entry->object.vm_object &&
3742		    !entry->is_sub_map &&
3743		    entry->object.vm_object->phys_contiguous) {
3744			pmap_remove(map->pmap,
3745				    (addr64_t)(entry->vme_start),
3746				    (addr64_t)(entry->vme_end));
3747		}
3748		_vm_map_clip_end(&map->hdr, entry, endaddr);
3749		vm_map_store_update_first_free(map, map->first_free);
3750	}
3751}
3752
3753
3754#define vm_map_copy_clip_end(copy, entry, endaddr) \
3755	MACRO_BEGIN \
3756	if ((endaddr) < (entry)->vme_end) \
3757		_vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
3758	MACRO_END
3759
3760/*
3761 *	This routine is called only when it is known that
3762 *	the entry must be split.
3763 */
3764static void
3765_vm_map_clip_end(
3766	register struct vm_map_header	*map_header,
3767	register vm_map_entry_t		entry,
3768	register vm_map_offset_t	end)
3769{
3770	register vm_map_entry_t	new_entry;
3771
3772	/*
3773	 *	Create a new entry and insert it
3774	 *	AFTER the specified entry
3775	 */
3776
3777	if (entry->map_aligned) {
3778		assert(VM_MAP_PAGE_ALIGNED(end,
3779					   VM_MAP_HDR_PAGE_MASK(map_header)));
3780	}
3781
3782	new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
3783	vm_map_entry_copy_full(new_entry, entry);
3784
3785	assert(entry->vme_start < end);
3786	new_entry->vme_start = entry->vme_end = end;
3787	new_entry->offset += (end - entry->vme_start);
3788	assert(new_entry->vme_start < new_entry->vme_end);
3789
3790	_vm_map_store_entry_link(map_header, entry, new_entry);
3791
3792	if (entry->is_sub_map)
3793		vm_map_reference(new_entry->object.sub_map);
3794	else
3795		vm_object_reference(new_entry->object.vm_object);
3796}
3797
3798
3799/*
3800 *	VM_MAP_RANGE_CHECK:	[ internal use only ]
3801 *
3802 *	Asserts that the starting and ending region
3803 *	addresses fall within the valid range of the map.
3804 */
3805#define	VM_MAP_RANGE_CHECK(map, start, end)	\
3806	MACRO_BEGIN				\
3807	if (start < vm_map_min(map))		\
3808		start = vm_map_min(map);	\
3809	if (end > vm_map_max(map))		\
3810		end = vm_map_max(map);		\
3811	if (start > end)			\
3812		start = end;			\
3813	MACRO_END
3814
3815/*
3816 *	vm_map_range_check:	[ internal use only ]
3817 *
3818 *	Check that the region defined by the specified start and
3819 *	end addresses are wholly contained within a single map
3820 *	entry or set of adjacent map entries of the spacified map,
3821 *	i.e. the specified region contains no unmapped space.
3822 *	If any or all of the region is unmapped, FALSE is returned.
3823 *	Otherwise, TRUE is returned and if the output argument 'entry'
3824 *	is not NULL it points to the map entry containing the start
3825 *	of the region.
3826 *
3827 *	The map is locked for reading on entry and is left locked.
3828 */
3829static boolean_t
3830vm_map_range_check(
3831	register vm_map_t	map,
3832	register vm_map_offset_t	start,
3833	register vm_map_offset_t	end,
3834	vm_map_entry_t		*entry)
3835{
3836	vm_map_entry_t		cur;
3837	register vm_map_offset_t	prev;
3838
3839	/*
3840	 * 	Basic sanity checks first
3841	 */
3842	if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
3843		return (FALSE);
3844
3845	/*
3846	 * 	Check first if the region starts within a valid
3847	 *	mapping for the map.
3848	 */
3849	if (!vm_map_lookup_entry(map, start, &cur))
3850		return (FALSE);
3851
3852	/*
3853	 *	Optimize for the case that the region is contained
3854	 *	in a single map entry.
3855	 */
3856	if (entry != (vm_map_entry_t *) NULL)
3857		*entry = cur;
3858	if (end <= cur->vme_end)
3859		return (TRUE);
3860
3861	/*
3862	 * 	If the region is not wholly contained within a
3863	 * 	single entry, walk the entries looking for holes.
3864	 */
3865	prev = cur->vme_end;
3866	cur = cur->vme_next;
3867	while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
3868		if (end <= cur->vme_end)
3869			return (TRUE);
3870		prev = cur->vme_end;
3871		cur = cur->vme_next;
3872	}
3873	return (FALSE);
3874}
3875
3876/*
3877 *	vm_map_submap:		[ kernel use only ]
3878 *
3879 *	Mark the given range as handled by a subordinate map.
3880 *
3881 *	This range must have been created with vm_map_find using
3882 *	the vm_submap_object, and no other operations may have been
3883 *	performed on this range prior to calling vm_map_submap.
3884 *
3885 *	Only a limited number of operations can be performed
3886 *	within this rage after calling vm_map_submap:
3887 *		vm_fault
3888 *	[Don't try vm_map_copyin!]
3889 *
3890 *	To remove a submapping, one must first remove the
3891 *	range from the superior map, and then destroy the
3892 *	submap (if desired).  [Better yet, don't try it.]
3893 */
3894kern_return_t
3895vm_map_submap(
3896	vm_map_t	map,
3897	vm_map_offset_t	start,
3898	vm_map_offset_t	end,
3899	vm_map_t	submap,
3900	vm_map_offset_t	offset,
3901#ifdef NO_NESTED_PMAP
3902	__unused
3903#endif	/* NO_NESTED_PMAP */
3904	boolean_t	use_pmap)
3905{
3906	vm_map_entry_t		entry;
3907	register kern_return_t	result = KERN_INVALID_ARGUMENT;
3908	register vm_object_t	object;
3909
3910	vm_map_lock(map);
3911
3912	if (! vm_map_lookup_entry(map, start, &entry)) {
3913		entry = entry->vme_next;
3914	}
3915
3916	if (entry == vm_map_to_entry(map) ||
3917	    entry->is_sub_map) {
3918		vm_map_unlock(map);
3919		return KERN_INVALID_ARGUMENT;
3920	}
3921
3922	vm_map_clip_start(map, entry, start);
3923	vm_map_clip_end(map, entry, end);
3924
3925	if ((entry->vme_start == start) && (entry->vme_end == end) &&
3926	    (!entry->is_sub_map) &&
3927	    ((object = entry->object.vm_object) == vm_submap_object) &&
3928	    (object->resident_page_count == 0) &&
3929	    (object->copy == VM_OBJECT_NULL) &&
3930	    (object->shadow == VM_OBJECT_NULL) &&
3931	    (!object->pager_created)) {
3932		entry->offset = (vm_object_offset_t)offset;
3933		entry->object.vm_object = VM_OBJECT_NULL;
3934		vm_object_deallocate(object);
3935		entry->is_sub_map = TRUE;
3936		entry->use_pmap = FALSE;
3937		entry->object.sub_map = submap;
3938		vm_map_reference(submap);
3939		if (submap->mapped_in_other_pmaps == FALSE &&
3940		    vm_map_pmap(submap) != PMAP_NULL &&
3941		    vm_map_pmap(submap) != vm_map_pmap(map)) {
3942			/*
3943			 * This submap is being mapped in a map
3944			 * that uses a different pmap.
3945			 * Set its "mapped_in_other_pmaps" flag
3946			 * to indicate that we now need to
3947			 * remove mappings from all pmaps rather
3948			 * than just the submap's pmap.
3949			 */
3950			submap->mapped_in_other_pmaps = TRUE;
3951		}
3952
3953#ifndef NO_NESTED_PMAP
3954		if (use_pmap) {
3955			/* nest if platform code will allow */
3956			if(submap->pmap == NULL) {
3957				ledger_t ledger = map->pmap->ledger;
3958				submap->pmap = pmap_create(ledger,
3959						(vm_map_size_t) 0, FALSE);
3960				if(submap->pmap == PMAP_NULL) {
3961					vm_map_unlock(map);
3962					return(KERN_NO_SPACE);
3963				}
3964			}
3965			result = pmap_nest(map->pmap,
3966					   (entry->object.sub_map)->pmap,
3967					   (addr64_t)start,
3968					   (addr64_t)start,
3969					   (uint64_t)(end - start));
3970			if(result)
3971				panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
3972			entry->use_pmap = TRUE;
3973		}
3974#else	/* NO_NESTED_PMAP */
3975		pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
3976#endif	/* NO_NESTED_PMAP */
3977		result = KERN_SUCCESS;
3978	}
3979	vm_map_unlock(map);
3980
3981	return(result);
3982}
3983
3984/*
3985 *	vm_map_protect:
3986 *
3987 *	Sets the protection of the specified address
3988 *	region in the target map.  If "set_max" is
3989 *	specified, the maximum protection is to be set;
3990 *	otherwise, only the current protection is affected.
3991 */
3992kern_return_t
3993vm_map_protect(
3994	register vm_map_t	map,
3995	register vm_map_offset_t	start,
3996	register vm_map_offset_t	end,
3997	register vm_prot_t	new_prot,
3998	register boolean_t	set_max)
3999{
4000	register vm_map_entry_t		current;
4001	register vm_map_offset_t	prev;
4002	vm_map_entry_t			entry;
4003	vm_prot_t			new_max;
4004
4005	XPR(XPR_VM_MAP,
4006	    "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
4007	    map, start, end, new_prot, set_max);
4008
4009	vm_map_lock(map);
4010
4011	/* LP64todo - remove this check when vm_map_commpage64()
4012	 * no longer has to stuff in a map_entry for the commpage
4013	 * above the map's max_offset.
4014	 */
4015	if (start >= map->max_offset) {
4016		vm_map_unlock(map);
4017		return(KERN_INVALID_ADDRESS);
4018	}
4019
4020	while(1) {
4021		/*
4022		 * 	Lookup the entry.  If it doesn't start in a valid
4023		 *	entry, return an error.
4024		 */
4025		if (! vm_map_lookup_entry(map, start, &entry)) {
4026			vm_map_unlock(map);
4027			return(KERN_INVALID_ADDRESS);
4028		}
4029
4030		if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
4031			start = SUPERPAGE_ROUND_DOWN(start);
4032			continue;
4033		}
4034		break;
4035 	}
4036	if (entry->superpage_size)
4037 		end = SUPERPAGE_ROUND_UP(end);
4038
4039	/*
4040	 *	Make a first pass to check for protection and address
4041	 *	violations.
4042	 */
4043
4044	current = entry;
4045	prev = current->vme_start;
4046	while ((current != vm_map_to_entry(map)) &&
4047	       (current->vme_start < end)) {
4048
4049		/*
4050		 * If there is a hole, return an error.
4051		 */
4052		if (current->vme_start != prev) {
4053			vm_map_unlock(map);
4054			return(KERN_INVALID_ADDRESS);
4055		}
4056
4057		new_max = current->max_protection;
4058		if(new_prot & VM_PROT_COPY) {
4059			new_max |= VM_PROT_WRITE;
4060			if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
4061				vm_map_unlock(map);
4062				return(KERN_PROTECTION_FAILURE);
4063			}
4064		} else {
4065			if ((new_prot & new_max) != new_prot) {
4066				vm_map_unlock(map);
4067				return(KERN_PROTECTION_FAILURE);
4068			}
4069		}
4070
4071
4072		prev = current->vme_end;
4073		current = current->vme_next;
4074	}
4075	if (end > prev) {
4076		vm_map_unlock(map);
4077		return(KERN_INVALID_ADDRESS);
4078	}
4079
4080	/*
4081	 *	Go back and fix up protections.
4082	 *	Clip to start here if the range starts within
4083	 *	the entry.
4084	 */
4085
4086	current = entry;
4087	if (current != vm_map_to_entry(map)) {
4088		/* clip and unnest if necessary */
4089		vm_map_clip_start(map, current, start);
4090	}
4091
4092	while ((current != vm_map_to_entry(map)) &&
4093	       (current->vme_start < end)) {
4094
4095		vm_prot_t	old_prot;
4096
4097		vm_map_clip_end(map, current, end);
4098
4099		if (current->is_sub_map) {
4100			/* clipping did unnest if needed */
4101			assert(!current->use_pmap);
4102		}
4103
4104		old_prot = current->protection;
4105
4106		if(new_prot & VM_PROT_COPY) {
4107			/* caller is asking specifically to copy the      */
4108			/* mapped data, this implies that max protection  */
4109			/* will include write.  Caller must be prepared   */
4110			/* for loss of shared memory communication in the */
4111			/* target area after taking this step */
4112
4113			if (current->is_sub_map == FALSE && current->object.vm_object == VM_OBJECT_NULL){
4114				current->object.vm_object = vm_object_allocate((vm_map_size_t)(current->vme_end - current->vme_start));
4115				current->offset = 0;
4116				assert(current->use_pmap);
4117			}
4118			current->needs_copy = TRUE;
4119			current->max_protection |= VM_PROT_WRITE;
4120		}
4121
4122		if (set_max)
4123			current->protection =
4124				(current->max_protection =
4125				 new_prot & ~VM_PROT_COPY) &
4126				old_prot;
4127		else
4128			current->protection = new_prot & ~VM_PROT_COPY;
4129
4130		/*
4131		 *	Update physical map if necessary.
4132		 *	If the request is to turn off write protection,
4133		 *	we won't do it for real (in pmap). This is because
4134		 *	it would cause copy-on-write to fail.  We've already
4135		 *	set, the new protection in the map, so if a
4136		 *	write-protect fault occurred, it will be fixed up
4137		 *	properly, COW or not.
4138		 */
4139		if (current->protection != old_prot) {
4140			/* Look one level in we support nested pmaps */
4141			/* from mapped submaps which are direct entries */
4142			/* in our map */
4143
4144			vm_prot_t prot;
4145
4146			prot = current->protection & ~VM_PROT_WRITE;
4147
4148			if (override_nx(map, current->alias) && prot)
4149			        prot |= VM_PROT_EXECUTE;
4150
4151			if (current->is_sub_map && current->use_pmap) {
4152				pmap_protect(current->object.sub_map->pmap,
4153					     current->vme_start,
4154					     current->vme_end,
4155					     prot);
4156			} else {
4157				pmap_protect(map->pmap,
4158					     current->vme_start,
4159					     current->vme_end,
4160					     prot);
4161			}
4162		}
4163		current = current->vme_next;
4164	}
4165
4166	current = entry;
4167	while ((current != vm_map_to_entry(map)) &&
4168	       (current->vme_start <= end)) {
4169		vm_map_simplify_entry(map, current);
4170		current = current->vme_next;
4171	}
4172
4173	vm_map_unlock(map);
4174	return(KERN_SUCCESS);
4175}
4176
4177/*
4178 *	vm_map_inherit:
4179 *
4180 *	Sets the inheritance of the specified address
4181 *	range in the target map.  Inheritance
4182 *	affects how the map will be shared with
4183 *	child maps at the time of vm_map_fork.
4184 */
4185kern_return_t
4186vm_map_inherit(
4187	register vm_map_t	map,
4188	register vm_map_offset_t	start,
4189	register vm_map_offset_t	end,
4190	register vm_inherit_t	new_inheritance)
4191{
4192	register vm_map_entry_t	entry;
4193	vm_map_entry_t	temp_entry;
4194
4195	vm_map_lock(map);
4196
4197	VM_MAP_RANGE_CHECK(map, start, end);
4198
4199	if (vm_map_lookup_entry(map, start, &temp_entry)) {
4200		entry = temp_entry;
4201	}
4202	else {
4203		temp_entry = temp_entry->vme_next;
4204		entry = temp_entry;
4205	}
4206
4207	/* first check entire range for submaps which can't support the */
4208	/* given inheritance. */
4209	while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4210		if(entry->is_sub_map) {
4211			if(new_inheritance == VM_INHERIT_COPY) {
4212				vm_map_unlock(map);
4213				return(KERN_INVALID_ARGUMENT);
4214			}
4215		}
4216
4217		entry = entry->vme_next;
4218	}
4219
4220	entry = temp_entry;
4221	if (entry != vm_map_to_entry(map)) {
4222		/* clip and unnest if necessary */
4223		vm_map_clip_start(map, entry, start);
4224	}
4225
4226	while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4227		vm_map_clip_end(map, entry, end);
4228		if (entry->is_sub_map) {
4229			/* clip did unnest if needed */
4230			assert(!entry->use_pmap);
4231		}
4232
4233		entry->inheritance = new_inheritance;
4234
4235		entry = entry->vme_next;
4236	}
4237
4238	vm_map_unlock(map);
4239	return(KERN_SUCCESS);
4240}
4241
4242/*
4243 * Update the accounting for the amount of wired memory in this map.  If the user has
4244 * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
4245 */
4246
4247static kern_return_t
4248add_wire_counts(
4249	vm_map_t	map,
4250	vm_map_entry_t	entry,
4251	boolean_t	user_wire)
4252{
4253	vm_map_size_t	size;
4254
4255	if (user_wire) {
4256		unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
4257
4258		/*
4259		 * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
4260		 * this map entry.
4261		 */
4262
4263		if (entry->user_wired_count == 0) {
4264			size = entry->vme_end - entry->vme_start;
4265
4266			/*
4267			 * Since this is the first time the user is wiring this map entry, check to see if we're
4268			 * exceeding the user wire limits.  There is a per map limit which is the smaller of either
4269			 * the process's rlimit or the global vm_user_wire_limit which caps this value.  There is also
4270			 * a system-wide limit on the amount of memory all users can wire.  If the user is over either
4271			 * limit, then we fail.
4272			 */
4273
4274			if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
4275			   size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
4276		    	   size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
4277				return KERN_RESOURCE_SHORTAGE;
4278
4279			/*
4280			 * The first time the user wires an entry, we also increment the wired_count and add this to
4281			 * the total that has been wired in the map.
4282			 */
4283
4284			if (entry->wired_count >= MAX_WIRE_COUNT)
4285				return KERN_FAILURE;
4286
4287			entry->wired_count++;
4288			map->user_wire_size += size;
4289		}
4290
4291		if (entry->user_wired_count >= MAX_WIRE_COUNT)
4292			return KERN_FAILURE;
4293
4294		entry->user_wired_count++;
4295
4296	} else {
4297
4298		/*
4299		 * The kernel's wiring the memory.  Just bump the count and continue.
4300		 */
4301
4302		if (entry->wired_count >= MAX_WIRE_COUNT)
4303			panic("vm_map_wire: too many wirings");
4304
4305		entry->wired_count++;
4306	}
4307
4308	return KERN_SUCCESS;
4309}
4310
4311/*
4312 * Update the memory wiring accounting now that the given map entry is being unwired.
4313 */
4314
4315static void
4316subtract_wire_counts(
4317	vm_map_t	map,
4318	vm_map_entry_t	entry,
4319	boolean_t	user_wire)
4320{
4321
4322	if (user_wire) {
4323
4324		/*
4325		 * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
4326		 */
4327
4328		if (entry->user_wired_count == 1) {
4329
4330			/*
4331			 * We're removing the last user wire reference.  Decrement the wired_count and the total
4332			 * user wired memory for this map.
4333			 */
4334
4335			assert(entry->wired_count >= 1);
4336			entry->wired_count--;
4337			map->user_wire_size -= entry->vme_end - entry->vme_start;
4338		}
4339
4340		assert(entry->user_wired_count >= 1);
4341		entry->user_wired_count--;
4342
4343	} else {
4344
4345		/*
4346		 * The kernel is unwiring the memory.   Just update the count.
4347		 */
4348
4349		assert(entry->wired_count >= 1);
4350		entry->wired_count--;
4351	}
4352}
4353
4354/*
4355 *	vm_map_wire:
4356 *
4357 *	Sets the pageability of the specified address range in the
4358 *	target map as wired.  Regions specified as not pageable require
4359 *	locked-down physical memory and physical page maps.  The
4360 *	access_type variable indicates types of accesses that must not
4361 *	generate page faults.  This is checked against protection of
4362 *	memory being locked-down.
4363 *
4364 *	The map must not be locked, but a reference must remain to the
4365 *	map throughout the call.
4366 */
4367static kern_return_t
4368vm_map_wire_nested(
4369	register vm_map_t	map,
4370	register vm_map_offset_t	start,
4371	register vm_map_offset_t	end,
4372	register vm_prot_t	access_type,
4373	boolean_t		user_wire,
4374	pmap_t			map_pmap,
4375	vm_map_offset_t		pmap_addr,
4376	ppnum_t			*physpage_p)
4377{
4378	register vm_map_entry_t	entry;
4379	struct vm_map_entry	*first_entry, tmp_entry;
4380	vm_map_t		real_map;
4381	register vm_map_offset_t	s,e;
4382	kern_return_t		rc;
4383	boolean_t		need_wakeup;
4384	boolean_t		main_map = FALSE;
4385	wait_interrupt_t	interruptible_state;
4386	thread_t		cur_thread;
4387	unsigned int		last_timestamp;
4388	vm_map_size_t		size;
4389	boolean_t		wire_and_extract;
4390
4391	wire_and_extract = FALSE;
4392	if (physpage_p != NULL) {
4393		/*
4394		 * The caller wants the physical page number of the
4395		 * wired page.  We return only one physical page number
4396		 * so this works for only one page at a time.
4397		 */
4398		if ((end - start) != PAGE_SIZE) {
4399			return KERN_INVALID_ARGUMENT;
4400		}
4401		wire_and_extract = TRUE;
4402		*physpage_p = 0;
4403	}
4404
4405	vm_map_lock(map);
4406	if(map_pmap == NULL)
4407		main_map = TRUE;
4408	last_timestamp = map->timestamp;
4409
4410	VM_MAP_RANGE_CHECK(map, start, end);
4411	assert(page_aligned(start));
4412	assert(page_aligned(end));
4413	assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
4414	assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
4415	if (start == end) {
4416		/* We wired what the caller asked for, zero pages */
4417		vm_map_unlock(map);
4418		return KERN_SUCCESS;
4419	}
4420
4421	need_wakeup = FALSE;
4422	cur_thread = current_thread();
4423
4424	s = start;
4425	rc = KERN_SUCCESS;
4426
4427	if (vm_map_lookup_entry(map, s, &first_entry)) {
4428		entry = first_entry;
4429		/*
4430		 * vm_map_clip_start will be done later.
4431		 * We don't want to unnest any nested submaps here !
4432		 */
4433	} else {
4434		/* Start address is not in map */
4435		rc = KERN_INVALID_ADDRESS;
4436		goto done;
4437	}
4438
4439	while ((entry != vm_map_to_entry(map)) && (s < end)) {
4440		/*
4441		 * At this point, we have wired from "start" to "s".
4442		 * We still need to wire from "s" to "end".
4443		 *
4444		 * "entry" hasn't been clipped, so it could start before "s"
4445		 * and/or end after "end".
4446		 */
4447
4448		/* "e" is how far we want to wire in this entry */
4449		e = entry->vme_end;
4450		if (e > end)
4451			e = end;
4452
4453		/*
4454		 * If another thread is wiring/unwiring this entry then
4455		 * block after informing other thread to wake us up.
4456		 */
4457		if (entry->in_transition) {
4458			wait_result_t wait_result;
4459
4460			/*
4461			 * We have not clipped the entry.  Make sure that
4462			 * the start address is in range so that the lookup
4463			 * below will succeed.
4464			 * "s" is the current starting point: we've already
4465			 * wired from "start" to "s" and we still have
4466			 * to wire from "s" to "end".
4467			 */
4468
4469			entry->needs_wakeup = TRUE;
4470
4471			/*
4472			 * wake up anybody waiting on entries that we have
4473			 * already wired.
4474			 */
4475			if (need_wakeup) {
4476				vm_map_entry_wakeup(map);
4477				need_wakeup = FALSE;
4478			}
4479			/*
4480			 * User wiring is interruptible
4481			 */
4482			wait_result = vm_map_entry_wait(map,
4483							(user_wire) ? THREAD_ABORTSAFE :
4484							THREAD_UNINT);
4485			if (user_wire && wait_result ==	THREAD_INTERRUPTED) {
4486				/*
4487				 * undo the wirings we have done so far
4488				 * We do not clear the needs_wakeup flag,
4489				 * because we cannot tell if we were the
4490				 * only one waiting.
4491				 */
4492				rc = KERN_FAILURE;
4493				goto done;
4494			}
4495
4496			/*
4497			 * Cannot avoid a lookup here. reset timestamp.
4498			 */
4499			last_timestamp = map->timestamp;
4500
4501			/*
4502			 * The entry could have been clipped, look it up again.
4503			 * Worse that can happen is, it may not exist anymore.
4504			 */
4505			if (!vm_map_lookup_entry(map, s, &first_entry)) {
4506				/*
4507				 * User: undo everything upto the previous
4508				 * entry.  let vm_map_unwire worry about
4509				 * checking the validity of the range.
4510				 */
4511				rc = KERN_FAILURE;
4512				goto done;
4513			}
4514			entry = first_entry;
4515			continue;
4516		}
4517
4518		if (entry->is_sub_map) {
4519			vm_map_offset_t	sub_start;
4520			vm_map_offset_t	sub_end;
4521			vm_map_offset_t	local_start;
4522			vm_map_offset_t	local_end;
4523			pmap_t		pmap;
4524
4525			if (wire_and_extract) {
4526				/*
4527				 * Wiring would result in copy-on-write
4528				 * which would not be compatible with
4529				 * the sharing we have with the original
4530				 * provider of this memory.
4531				 */
4532				rc = KERN_INVALID_ARGUMENT;
4533				goto done;
4534			}
4535
4536			vm_map_clip_start(map, entry, s);
4537			vm_map_clip_end(map, entry, end);
4538
4539			sub_start = entry->offset;
4540			sub_end = entry->vme_end;
4541			sub_end += entry->offset - entry->vme_start;
4542
4543			local_end = entry->vme_end;
4544			if(map_pmap == NULL) {
4545				vm_object_t		object;
4546				vm_object_offset_t	offset;
4547				vm_prot_t		prot;
4548				boolean_t		wired;
4549				vm_map_entry_t		local_entry;
4550				vm_map_version_t	 version;
4551				vm_map_t		lookup_map;
4552
4553				if(entry->use_pmap) {
4554					pmap = entry->object.sub_map->pmap;
4555					/* ppc implementation requires that */
4556					/* submaps pmap address ranges line */
4557					/* up with parent map */
4558#ifdef notdef
4559					pmap_addr = sub_start;
4560#endif
4561					pmap_addr = s;
4562				} else {
4563					pmap = map->pmap;
4564					pmap_addr = s;
4565				}
4566
4567				if (entry->wired_count) {
4568					if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4569						goto done;
4570
4571					/*
4572					 * The map was not unlocked:
4573					 * no need to goto re-lookup.
4574					 * Just go directly to next entry.
4575					 */
4576					entry = entry->vme_next;
4577					s = entry->vme_start;
4578					continue;
4579
4580				}
4581
4582				/* call vm_map_lookup_locked to */
4583				/* cause any needs copy to be   */
4584				/* evaluated */
4585				local_start = entry->vme_start;
4586				lookup_map = map;
4587				vm_map_lock_write_to_read(map);
4588				if(vm_map_lookup_locked(
4589					   &lookup_map, local_start,
4590					   access_type,
4591					   OBJECT_LOCK_EXCLUSIVE,
4592					   &version, &object,
4593					   &offset, &prot, &wired,
4594					   NULL,
4595					   &real_map)) {
4596
4597					vm_map_unlock_read(lookup_map);
4598					vm_map_unwire(map, start,
4599						      s, user_wire);
4600					return(KERN_FAILURE);
4601				}
4602				vm_object_unlock(object);
4603				if(real_map != lookup_map)
4604					vm_map_unlock(real_map);
4605				vm_map_unlock_read(lookup_map);
4606				vm_map_lock(map);
4607
4608				/* we unlocked, so must re-lookup */
4609				if (!vm_map_lookup_entry(map,
4610							 local_start,
4611							 &local_entry)) {
4612					rc = KERN_FAILURE;
4613					goto done;
4614				}
4615
4616				/*
4617				 * entry could have been "simplified",
4618				 * so re-clip
4619				 */
4620				entry = local_entry;
4621				assert(s == local_start);
4622				vm_map_clip_start(map, entry, s);
4623				vm_map_clip_end(map, entry, end);
4624				/* re-compute "e" */
4625				e = entry->vme_end;
4626				if (e > end)
4627					e = end;
4628
4629				/* did we have a change of type? */
4630				if (!entry->is_sub_map) {
4631					last_timestamp = map->timestamp;
4632					continue;
4633				}
4634			} else {
4635				local_start = entry->vme_start;
4636				pmap = map_pmap;
4637			}
4638
4639			if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4640				goto done;
4641
4642			entry->in_transition = TRUE;
4643
4644			vm_map_unlock(map);
4645			rc = vm_map_wire_nested(entry->object.sub_map,
4646						sub_start, sub_end,
4647						access_type,
4648						user_wire, pmap, pmap_addr,
4649						NULL);
4650			vm_map_lock(map);
4651
4652			/*
4653			 * Find the entry again.  It could have been clipped
4654			 * after we unlocked the map.
4655			 */
4656			if (!vm_map_lookup_entry(map, local_start,
4657						 &first_entry))
4658				panic("vm_map_wire: re-lookup failed");
4659			entry = first_entry;
4660
4661			assert(local_start == s);
4662			/* re-compute "e" */
4663			e = entry->vme_end;
4664			if (e > end)
4665				e = end;
4666
4667			last_timestamp = map->timestamp;
4668			while ((entry != vm_map_to_entry(map)) &&
4669			       (entry->vme_start < e)) {
4670				assert(entry->in_transition);
4671				entry->in_transition = FALSE;
4672				if (entry->needs_wakeup) {
4673					entry->needs_wakeup = FALSE;
4674					need_wakeup = TRUE;
4675				}
4676				if (rc != KERN_SUCCESS) {/* from vm_*_wire */
4677					subtract_wire_counts(map, entry, user_wire);
4678				}
4679				entry = entry->vme_next;
4680			}
4681			if (rc != KERN_SUCCESS) {	/* from vm_*_wire */
4682				goto done;
4683			}
4684
4685			/* no need to relookup again */
4686			s = entry->vme_start;
4687			continue;
4688		}
4689
4690		/*
4691		 * If this entry is already wired then increment
4692		 * the appropriate wire reference count.
4693		 */
4694		if (entry->wired_count) {
4695
4696			if ((entry->protection & access_type) != access_type) {
4697				/* found a protection problem */
4698
4699				/*
4700				 * XXX FBDP
4701				 * We should always return an error
4702				 * in this case but since we didn't
4703				 * enforce it before, let's do
4704				 * it only for the new "wire_and_extract"
4705				 * code path for now...
4706				 */
4707				if (wire_and_extract) {
4708					rc = KERN_PROTECTION_FAILURE;
4709					goto done;
4710				}
4711			}
4712
4713			/*
4714			 * entry is already wired down, get our reference
4715			 * after clipping to our range.
4716			 */
4717			vm_map_clip_start(map, entry, s);
4718			vm_map_clip_end(map, entry, end);
4719
4720			if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4721				goto done;
4722
4723			if (wire_and_extract) {
4724				vm_object_t		object;
4725				vm_object_offset_t	offset;
4726				vm_page_t		m;
4727
4728				/*
4729				 * We don't have to "wire" the page again
4730				 * bit we still have to "extract" its
4731				 * physical page number, after some sanity
4732				 * checks.
4733				 */
4734				assert((entry->vme_end - entry->vme_start)
4735				       == PAGE_SIZE);
4736				assert(!entry->needs_copy);
4737				assert(!entry->is_sub_map);
4738				assert(entry->object.vm_object);
4739				if (((entry->vme_end - entry->vme_start)
4740				     != PAGE_SIZE) ||
4741				    entry->needs_copy ||
4742				    entry->is_sub_map ||
4743				    entry->object.vm_object == VM_OBJECT_NULL) {
4744					rc = KERN_INVALID_ARGUMENT;
4745					goto done;
4746				}
4747
4748				object = entry->object.vm_object;
4749				offset = entry->offset;
4750				/* need exclusive lock to update m->dirty */
4751				if (entry->protection & VM_PROT_WRITE) {
4752					vm_object_lock(object);
4753				} else {
4754					vm_object_lock_shared(object);
4755				}
4756				m = vm_page_lookup(object, offset);
4757				assert(m != VM_PAGE_NULL);
4758				assert(m->wire_count);
4759				if (m != VM_PAGE_NULL && m->wire_count) {
4760					*physpage_p = m->phys_page;
4761					if (entry->protection & VM_PROT_WRITE) {
4762						vm_object_lock_assert_exclusive(
4763							m->object);
4764						m->dirty = TRUE;
4765					}
4766				} else {
4767					/* not already wired !? */
4768					*physpage_p = 0;
4769				}
4770				vm_object_unlock(object);
4771			}
4772
4773			/* map was not unlocked: no need to relookup */
4774			entry = entry->vme_next;
4775			s = entry->vme_start;
4776			continue;
4777		}
4778
4779		/*
4780		 * Unwired entry or wire request transmitted via submap
4781		 */
4782
4783
4784		/*
4785		 * Perform actions of vm_map_lookup that need the write
4786		 * lock on the map: create a shadow object for a
4787		 * copy-on-write region, or an object for a zero-fill
4788		 * region.
4789		 */
4790		size = entry->vme_end - entry->vme_start;
4791		/*
4792		 * If wiring a copy-on-write page, we need to copy it now
4793		 * even if we're only (currently) requesting read access.
4794		 * This is aggressive, but once it's wired we can't move it.
4795		 */
4796		if (entry->needs_copy) {
4797			if (wire_and_extract) {
4798				/*
4799				 * We're supposed to share with the original
4800				 * provider so should not be "needs_copy"
4801				 */
4802				rc = KERN_INVALID_ARGUMENT;
4803				goto done;
4804			}
4805
4806			vm_object_shadow(&entry->object.vm_object,
4807					 &entry->offset, size);
4808			entry->needs_copy = FALSE;
4809		} else if (entry->object.vm_object == VM_OBJECT_NULL) {
4810			if (wire_and_extract) {
4811				/*
4812				 * We're supposed to share with the original
4813				 * provider so should already have an object.
4814				 */
4815				rc = KERN_INVALID_ARGUMENT;
4816				goto done;
4817			}
4818			entry->object.vm_object = vm_object_allocate(size);
4819			entry->offset = (vm_object_offset_t)0;
4820			assert(entry->use_pmap);
4821		}
4822
4823		vm_map_clip_start(map, entry, s);
4824		vm_map_clip_end(map, entry, end);
4825
4826		/* re-compute "e" */
4827		e = entry->vme_end;
4828		if (e > end)
4829			e = end;
4830
4831		/*
4832		 * Check for holes and protection mismatch.
4833		 * Holes: Next entry should be contiguous unless this
4834		 *	  is the end of the region.
4835		 * Protection: Access requested must be allowed, unless
4836		 *	wiring is by protection class
4837		 */
4838		if ((entry->vme_end < end) &&
4839		    ((entry->vme_next == vm_map_to_entry(map)) ||
4840		     (entry->vme_next->vme_start > entry->vme_end))) {
4841			/* found a hole */
4842			rc = KERN_INVALID_ADDRESS;
4843			goto done;
4844		}
4845		if ((entry->protection & access_type) != access_type) {
4846			/* found a protection problem */
4847			rc = KERN_PROTECTION_FAILURE;
4848			goto done;
4849		}
4850
4851		assert(entry->wired_count == 0 && entry->user_wired_count == 0);
4852
4853		if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4854			goto done;
4855
4856		entry->in_transition = TRUE;
4857
4858		/*
4859		 * This entry might get split once we unlock the map.
4860		 * In vm_fault_wire(), we need the current range as
4861		 * defined by this entry.  In order for this to work
4862		 * along with a simultaneous clip operation, we make a
4863		 * temporary copy of this entry and use that for the
4864		 * wiring.  Note that the underlying objects do not
4865		 * change during a clip.
4866		 */
4867		tmp_entry = *entry;
4868
4869		/*
4870		 * The in_transition state guarentees that the entry
4871		 * (or entries for this range, if split occured) will be
4872		 * there when the map lock is acquired for the second time.
4873		 */
4874		vm_map_unlock(map);
4875
4876		if (!user_wire && cur_thread != THREAD_NULL)
4877			interruptible_state = thread_interrupt_level(THREAD_UNINT);
4878		else
4879			interruptible_state = THREAD_UNINT;
4880
4881		if(map_pmap)
4882			rc = vm_fault_wire(map,
4883					   &tmp_entry, map_pmap, pmap_addr,
4884					   physpage_p);
4885		else
4886			rc = vm_fault_wire(map,
4887					   &tmp_entry, map->pmap,
4888					   tmp_entry.vme_start,
4889					   physpage_p);
4890
4891		if (!user_wire && cur_thread != THREAD_NULL)
4892			thread_interrupt_level(interruptible_state);
4893
4894		vm_map_lock(map);
4895
4896		if (last_timestamp+1 != map->timestamp) {
4897			/*
4898			 * Find the entry again.  It could have been clipped
4899			 * after we unlocked the map.
4900			 */
4901			if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4902						 &first_entry))
4903				panic("vm_map_wire: re-lookup failed");
4904
4905			entry = first_entry;
4906		}
4907
4908		last_timestamp = map->timestamp;
4909
4910		while ((entry != vm_map_to_entry(map)) &&
4911		       (entry->vme_start < tmp_entry.vme_end)) {
4912			assert(entry->in_transition);
4913			entry->in_transition = FALSE;
4914			if (entry->needs_wakeup) {
4915				entry->needs_wakeup = FALSE;
4916				need_wakeup = TRUE;
4917			}
4918			if (rc != KERN_SUCCESS) {	/* from vm_*_wire */
4919				subtract_wire_counts(map, entry, user_wire);
4920			}
4921			entry = entry->vme_next;
4922		}
4923
4924		if (rc != KERN_SUCCESS) {		/* from vm_*_wire */
4925			goto done;
4926		}
4927
4928		s = entry->vme_start;
4929	} /* end while loop through map entries */
4930
4931done:
4932	if (rc == KERN_SUCCESS) {
4933		/* repair any damage we may have made to the VM map */
4934		vm_map_simplify_range(map, start, end);
4935	}
4936
4937	vm_map_unlock(map);
4938
4939	/*
4940	 * wake up anybody waiting on entries we wired.
4941	 */
4942	if (need_wakeup)
4943		vm_map_entry_wakeup(map);
4944
4945	if (rc != KERN_SUCCESS) {
4946		/* undo what has been wired so far */
4947		vm_map_unwire(map, start, s, user_wire);
4948		if (physpage_p) {
4949			*physpage_p = 0;
4950		}
4951	}
4952
4953	return rc;
4954
4955}
4956
4957kern_return_t
4958vm_map_wire(
4959	register vm_map_t	map,
4960	register vm_map_offset_t	start,
4961	register vm_map_offset_t	end,
4962	register vm_prot_t	access_type,
4963	boolean_t		user_wire)
4964{
4965
4966	kern_return_t	kret;
4967
4968	kret = vm_map_wire_nested(map, start, end, access_type,
4969				  user_wire, (pmap_t)NULL, 0, NULL);
4970	return kret;
4971}
4972
4973kern_return_t
4974vm_map_wire_and_extract(
4975	vm_map_t	map,
4976	vm_map_offset_t	start,
4977	vm_prot_t	access_type,
4978	boolean_t	user_wire,
4979	ppnum_t		*physpage_p)
4980{
4981
4982	kern_return_t	kret;
4983
4984	kret = vm_map_wire_nested(map,
4985				  start,
4986				  start+VM_MAP_PAGE_SIZE(map),
4987				  access_type,
4988				  user_wire,
4989				  (pmap_t)NULL,
4990				  0,
4991				  physpage_p);
4992	if (kret != KERN_SUCCESS &&
4993	    physpage_p != NULL) {
4994		*physpage_p = 0;
4995	}
4996	return kret;
4997}
4998
4999/*
5000 *	vm_map_unwire:
5001 *
5002 *	Sets the pageability of the specified address range in the target
5003 *	as pageable.  Regions specified must have been wired previously.
5004 *
5005 *	The map must not be locked, but a reference must remain to the map
5006 *	throughout the call.
5007 *
5008 *	Kernel will panic on failures.  User unwire ignores holes and
5009 *	unwired and intransition entries to avoid losing memory by leaving
5010 *	it unwired.
5011 */
5012static kern_return_t
5013vm_map_unwire_nested(
5014	register vm_map_t	map,
5015	register vm_map_offset_t	start,
5016	register vm_map_offset_t	end,
5017	boolean_t		user_wire,
5018	pmap_t			map_pmap,
5019	vm_map_offset_t		pmap_addr)
5020{
5021	register vm_map_entry_t	entry;
5022	struct vm_map_entry	*first_entry, tmp_entry;
5023	boolean_t		need_wakeup;
5024	boolean_t		main_map = FALSE;
5025	unsigned int		last_timestamp;
5026
5027	vm_map_lock(map);
5028	if(map_pmap == NULL)
5029		main_map = TRUE;
5030	last_timestamp = map->timestamp;
5031
5032	VM_MAP_RANGE_CHECK(map, start, end);
5033	assert(page_aligned(start));
5034	assert(page_aligned(end));
5035	assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
5036	assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
5037
5038	if (start == end) {
5039		/* We unwired what the caller asked for: zero pages */
5040		vm_map_unlock(map);
5041		return KERN_SUCCESS;
5042	}
5043
5044	if (vm_map_lookup_entry(map, start, &first_entry)) {
5045		entry = first_entry;
5046		/*
5047		 * vm_map_clip_start will be done later.
5048		 * We don't want to unnest any nested sub maps here !
5049		 */
5050	}
5051	else {
5052		if (!user_wire) {
5053			panic("vm_map_unwire: start not found");
5054		}
5055		/*	Start address is not in map. */
5056		vm_map_unlock(map);
5057		return(KERN_INVALID_ADDRESS);
5058	}
5059
5060	if (entry->superpage_size) {
5061		/* superpages are always wired */
5062		vm_map_unlock(map);
5063		return KERN_INVALID_ADDRESS;
5064	}
5065
5066	need_wakeup = FALSE;
5067	while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5068		if (entry->in_transition) {
5069			/*
5070			 * 1)
5071			 * Another thread is wiring down this entry. Note
5072			 * that if it is not for the other thread we would
5073			 * be unwiring an unwired entry.  This is not
5074			 * permitted.  If we wait, we will be unwiring memory
5075			 * we did not wire.
5076			 *
5077			 * 2)
5078			 * Another thread is unwiring this entry.  We did not
5079			 * have a reference to it, because if we did, this
5080			 * entry will not be getting unwired now.
5081			 */
5082			if (!user_wire) {
5083				/*
5084				 * XXX FBDP
5085				 * This could happen:  there could be some
5086				 * overlapping vslock/vsunlock operations
5087				 * going on.
5088				 * We should probably just wait and retry,
5089				 * but then we have to be careful that this
5090				 * entry could get "simplified" after
5091				 * "in_transition" gets unset and before
5092				 * we re-lookup the entry, so we would
5093				 * have to re-clip the entry to avoid
5094				 * re-unwiring what we have already unwired...
5095				 * See vm_map_wire_nested().
5096				 *
5097				 * Or we could just ignore "in_transition"
5098				 * here and proceed to decement the wired
5099				 * count(s) on this entry.  That should be fine
5100				 * as long as "wired_count" doesn't drop all
5101				 * the way to 0 (and we should panic if THAT
5102				 * happens).
5103				 */
5104				panic("vm_map_unwire: in_transition entry");
5105			}
5106
5107			entry = entry->vme_next;
5108			continue;
5109		}
5110
5111		if (entry->is_sub_map) {
5112			vm_map_offset_t	sub_start;
5113			vm_map_offset_t	sub_end;
5114			vm_map_offset_t	local_end;
5115			pmap_t		pmap;
5116
5117			vm_map_clip_start(map, entry, start);
5118			vm_map_clip_end(map, entry, end);
5119
5120			sub_start = entry->offset;
5121			sub_end = entry->vme_end - entry->vme_start;
5122			sub_end += entry->offset;
5123			local_end = entry->vme_end;
5124			if(map_pmap == NULL) {
5125				if(entry->use_pmap) {
5126					pmap = entry->object.sub_map->pmap;
5127					pmap_addr = sub_start;
5128				} else {
5129					pmap = map->pmap;
5130					pmap_addr = start;
5131				}
5132				if (entry->wired_count == 0 ||
5133				    (user_wire && entry->user_wired_count == 0)) {
5134					if (!user_wire)
5135						panic("vm_map_unwire: entry is unwired");
5136					entry = entry->vme_next;
5137					continue;
5138				}
5139
5140				/*
5141				 * Check for holes
5142				 * Holes: Next entry should be contiguous unless
5143				 * this is the end of the region.
5144				 */
5145				if (((entry->vme_end < end) &&
5146				     ((entry->vme_next == vm_map_to_entry(map)) ||
5147				      (entry->vme_next->vme_start
5148				       > entry->vme_end)))) {
5149					if (!user_wire)
5150						panic("vm_map_unwire: non-contiguous region");
5151/*
5152					entry = entry->vme_next;
5153					continue;
5154*/
5155				}
5156
5157				subtract_wire_counts(map, entry, user_wire);
5158
5159				if (entry->wired_count != 0) {
5160					entry = entry->vme_next;
5161					continue;
5162				}
5163
5164				entry->in_transition = TRUE;
5165				tmp_entry = *entry;/* see comment in vm_map_wire() */
5166
5167				/*
5168				 * We can unlock the map now. The in_transition state
5169				 * guarantees existance of the entry.
5170				 */
5171				vm_map_unlock(map);
5172				vm_map_unwire_nested(entry->object.sub_map,
5173						     sub_start, sub_end, user_wire, pmap, pmap_addr);
5174				vm_map_lock(map);
5175
5176				if (last_timestamp+1 != map->timestamp) {
5177					/*
5178					 * Find the entry again.  It could have been
5179					 * clipped or deleted after we unlocked the map.
5180					 */
5181					if (!vm_map_lookup_entry(map,
5182								 tmp_entry.vme_start,
5183								 &first_entry)) {
5184						if (!user_wire)
5185							panic("vm_map_unwire: re-lookup failed");
5186						entry = first_entry->vme_next;
5187					} else
5188						entry = first_entry;
5189				}
5190				last_timestamp = map->timestamp;
5191
5192				/*
5193				 * clear transition bit for all constituent entries
5194				 * that were in the original entry (saved in
5195				 * tmp_entry).  Also check for waiters.
5196				 */
5197				while ((entry != vm_map_to_entry(map)) &&
5198				       (entry->vme_start < tmp_entry.vme_end)) {
5199					assert(entry->in_transition);
5200					entry->in_transition = FALSE;
5201					if (entry->needs_wakeup) {
5202						entry->needs_wakeup = FALSE;
5203						need_wakeup = TRUE;
5204					}
5205					entry = entry->vme_next;
5206				}
5207				continue;
5208			} else {
5209				vm_map_unlock(map);
5210				vm_map_unwire_nested(entry->object.sub_map,
5211						     sub_start, sub_end, user_wire, map_pmap,
5212						     pmap_addr);
5213				vm_map_lock(map);
5214
5215				if (last_timestamp+1 != map->timestamp) {
5216					/*
5217					 * Find the entry again.  It could have been
5218					 * clipped or deleted after we unlocked the map.
5219					 */
5220					if (!vm_map_lookup_entry(map,
5221								 tmp_entry.vme_start,
5222								 &first_entry)) {
5223						if (!user_wire)
5224							panic("vm_map_unwire: re-lookup failed");
5225						entry = first_entry->vme_next;
5226					} else
5227						entry = first_entry;
5228				}
5229				last_timestamp = map->timestamp;
5230			}
5231		}
5232
5233
5234		if ((entry->wired_count == 0) ||
5235		    (user_wire && entry->user_wired_count == 0)) {
5236			if (!user_wire)
5237				panic("vm_map_unwire: entry is unwired");
5238
5239			entry = entry->vme_next;
5240			continue;
5241		}
5242
5243		assert(entry->wired_count > 0 &&
5244		       (!user_wire || entry->user_wired_count > 0));
5245
5246		vm_map_clip_start(map, entry, start);
5247		vm_map_clip_end(map, entry, end);
5248
5249		/*
5250		 * Check for holes
5251		 * Holes: Next entry should be contiguous unless
5252		 *	  this is the end of the region.
5253		 */
5254		if (((entry->vme_end < end) &&
5255		     ((entry->vme_next == vm_map_to_entry(map)) ||
5256		      (entry->vme_next->vme_start > entry->vme_end)))) {
5257
5258			if (!user_wire)
5259				panic("vm_map_unwire: non-contiguous region");
5260			entry = entry->vme_next;
5261			continue;
5262		}
5263
5264		subtract_wire_counts(map, entry, user_wire);
5265
5266		if (entry->wired_count != 0) {
5267			entry = entry->vme_next;
5268			continue;
5269		}
5270
5271		if(entry->zero_wired_pages) {
5272			entry->zero_wired_pages = FALSE;
5273		}
5274
5275		entry->in_transition = TRUE;
5276		tmp_entry = *entry;	/* see comment in vm_map_wire() */
5277
5278		/*
5279		 * We can unlock the map now. The in_transition state
5280		 * guarantees existance of the entry.
5281		 */
5282		vm_map_unlock(map);
5283		if(map_pmap) {
5284			vm_fault_unwire(map,
5285					&tmp_entry, FALSE, map_pmap, pmap_addr);
5286		} else {
5287			vm_fault_unwire(map,
5288					&tmp_entry, FALSE, map->pmap,
5289					tmp_entry.vme_start);
5290		}
5291		vm_map_lock(map);
5292
5293		if (last_timestamp+1 != map->timestamp) {
5294			/*
5295			 * Find the entry again.  It could have been clipped
5296			 * or deleted after we unlocked the map.
5297			 */
5298			if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
5299						 &first_entry)) {
5300				if (!user_wire)
5301					panic("vm_map_unwire: re-lookup failed");
5302				entry = first_entry->vme_next;
5303			} else
5304				entry = first_entry;
5305		}
5306		last_timestamp = map->timestamp;
5307
5308		/*
5309		 * clear transition bit for all constituent entries that
5310		 * were in the original entry (saved in tmp_entry).  Also
5311		 * check for waiters.
5312		 */
5313		while ((entry != vm_map_to_entry(map)) &&
5314		       (entry->vme_start < tmp_entry.vme_end)) {
5315			assert(entry->in_transition);
5316			entry->in_transition = FALSE;
5317			if (entry->needs_wakeup) {
5318				entry->needs_wakeup = FALSE;
5319				need_wakeup = TRUE;
5320			}
5321			entry = entry->vme_next;
5322		}
5323	}
5324
5325	/*
5326	 * We might have fragmented the address space when we wired this
5327	 * range of addresses.  Attempt to re-coalesce these VM map entries
5328	 * with their neighbors now that they're no longer wired.
5329	 * Under some circumstances, address space fragmentation can
5330	 * prevent VM object shadow chain collapsing, which can cause
5331	 * swap space leaks.
5332	 */
5333	vm_map_simplify_range(map, start, end);
5334
5335	vm_map_unlock(map);
5336	/*
5337	 * wake up anybody waiting on entries that we have unwired.
5338	 */
5339	if (need_wakeup)
5340		vm_map_entry_wakeup(map);
5341	return(KERN_SUCCESS);
5342
5343}
5344
5345kern_return_t
5346vm_map_unwire(
5347	register vm_map_t	map,
5348	register vm_map_offset_t	start,
5349	register vm_map_offset_t	end,
5350	boolean_t		user_wire)
5351{
5352	return vm_map_unwire_nested(map, start, end,
5353				    user_wire, (pmap_t)NULL, 0);
5354}
5355
5356
5357/*
5358 *	vm_map_entry_delete:	[ internal use only ]
5359 *
5360 *	Deallocate the given entry from the target map.
5361 */
5362static void
5363vm_map_entry_delete(
5364	register vm_map_t	map,
5365	register vm_map_entry_t	entry)
5366{
5367	register vm_map_offset_t	s, e;
5368	register vm_object_t	object;
5369	register vm_map_t	submap;
5370
5371	s = entry->vme_start;
5372	e = entry->vme_end;
5373	assert(page_aligned(s));
5374	assert(page_aligned(e));
5375	if (entry->map_aligned == TRUE) {
5376		assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
5377		assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
5378	}
5379	assert(entry->wired_count == 0);
5380	assert(entry->user_wired_count == 0);
5381	assert(!entry->permanent);
5382
5383	if (entry->is_sub_map) {
5384		object = NULL;
5385		submap = entry->object.sub_map;
5386	} else {
5387		submap = NULL;
5388		object = entry->object.vm_object;
5389	}
5390
5391	vm_map_store_entry_unlink(map, entry);
5392	map->size -= e - s;
5393
5394	vm_map_entry_dispose(map, entry);
5395
5396	vm_map_unlock(map);
5397	/*
5398	 *	Deallocate the object only after removing all
5399	 *	pmap entries pointing to its pages.
5400	 */
5401	if (submap)
5402		vm_map_deallocate(submap);
5403	else
5404		vm_object_deallocate(object);
5405
5406}
5407
5408void
5409vm_map_submap_pmap_clean(
5410	vm_map_t	map,
5411	vm_map_offset_t	start,
5412	vm_map_offset_t	end,
5413	vm_map_t	sub_map,
5414	vm_map_offset_t	offset)
5415{
5416	vm_map_offset_t	submap_start;
5417	vm_map_offset_t	submap_end;
5418	vm_map_size_t	remove_size;
5419	vm_map_entry_t	entry;
5420
5421	submap_end = offset + (end - start);
5422	submap_start = offset;
5423
5424	vm_map_lock_read(sub_map);
5425	if(vm_map_lookup_entry(sub_map, offset, &entry)) {
5426
5427		remove_size = (entry->vme_end - entry->vme_start);
5428		if(offset > entry->vme_start)
5429			remove_size -= offset - entry->vme_start;
5430
5431
5432		if(submap_end < entry->vme_end) {
5433			remove_size -=
5434				entry->vme_end - submap_end;
5435		}
5436		if(entry->is_sub_map) {
5437			vm_map_submap_pmap_clean(
5438				sub_map,
5439				start,
5440				start + remove_size,
5441				entry->object.sub_map,
5442				entry->offset);
5443		} else {
5444
5445			if((map->mapped_in_other_pmaps) && (map->ref_count)
5446			   && (entry->object.vm_object != NULL)) {
5447				vm_object_pmap_protect(
5448					entry->object.vm_object,
5449					entry->offset+(offset-entry->vme_start),
5450					remove_size,
5451					PMAP_NULL,
5452					entry->vme_start,
5453					VM_PROT_NONE);
5454			} else {
5455				pmap_remove(map->pmap,
5456					    (addr64_t)start,
5457					    (addr64_t)(start + remove_size));
5458			}
5459		}
5460	}
5461
5462	entry = entry->vme_next;
5463
5464	while((entry != vm_map_to_entry(sub_map))
5465	      && (entry->vme_start < submap_end)) {
5466		remove_size = (entry->vme_end - entry->vme_start);
5467		if(submap_end < entry->vme_end) {
5468			remove_size -= entry->vme_end - submap_end;
5469		}
5470		if(entry->is_sub_map) {
5471			vm_map_submap_pmap_clean(
5472				sub_map,
5473				(start + entry->vme_start) - offset,
5474				((start + entry->vme_start) - offset) + remove_size,
5475				entry->object.sub_map,
5476				entry->offset);
5477		} else {
5478			if((map->mapped_in_other_pmaps) && (map->ref_count)
5479			   && (entry->object.vm_object != NULL)) {
5480				vm_object_pmap_protect(
5481					entry->object.vm_object,
5482					entry->offset,
5483					remove_size,
5484					PMAP_NULL,
5485					entry->vme_start,
5486					VM_PROT_NONE);
5487			} else {
5488				pmap_remove(map->pmap,
5489					    (addr64_t)((start + entry->vme_start)
5490						       - offset),
5491					    (addr64_t)(((start + entry->vme_start)
5492							- offset) + remove_size));
5493			}
5494		}
5495		entry = entry->vme_next;
5496	}
5497	vm_map_unlock_read(sub_map);
5498	return;
5499}
5500
5501/*
5502 *	vm_map_delete:	[ internal use only ]
5503 *
5504 *	Deallocates the given address range from the target map.
5505 *	Removes all user wirings. Unwires one kernel wiring if
5506 *	VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
5507 *	away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
5508 *	interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
5509 *
5510 *	This routine is called with map locked and leaves map locked.
5511 */
5512static kern_return_t
5513vm_map_delete(
5514	vm_map_t		map,
5515	vm_map_offset_t		start,
5516	vm_map_offset_t		end,
5517	int			flags,
5518	vm_map_t		zap_map)
5519{
5520	vm_map_entry_t		entry, next;
5521	struct	 vm_map_entry	*first_entry, tmp_entry;
5522	register vm_map_offset_t s;
5523	register vm_object_t	object;
5524	boolean_t		need_wakeup;
5525	unsigned int		last_timestamp = ~0; /* unlikely value */
5526	int			interruptible;
5527
5528	interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
5529		THREAD_ABORTSAFE : THREAD_UNINT;
5530
5531	/*
5532	 * All our DMA I/O operations in IOKit are currently done by
5533	 * wiring through the map entries of the task requesting the I/O.
5534	 * Because of this, we must always wait for kernel wirings
5535	 * to go away on the entries before deleting them.
5536	 *
5537	 * Any caller who wants to actually remove a kernel wiring
5538	 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
5539	 * properly remove one wiring instead of blasting through
5540	 * them all.
5541	 */
5542	flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
5543
5544	while(1) {
5545		/*
5546		 *	Find the start of the region, and clip it
5547		 */
5548		if (vm_map_lookup_entry(map, start, &first_entry)) {
5549			entry = first_entry;
5550			if (map == kalloc_map &&
5551			    (entry->vme_start != start ||
5552			     entry->vme_end != end)) {
5553				panic("vm_map_delete(%p,0x%llx,0x%llx): "
5554				      "mismatched entry %p [0x%llx:0x%llx]\n",
5555				      map,
5556				      (uint64_t)start,
5557				      (uint64_t)end,
5558				      entry,
5559				      (uint64_t)entry->vme_start,
5560				      (uint64_t)entry->vme_end);
5561			}
5562			if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */				start = SUPERPAGE_ROUND_DOWN(start);
5563				start = SUPERPAGE_ROUND_DOWN(start);
5564				continue;
5565			}
5566			if (start == entry->vme_start) {
5567				/*
5568				 * No need to clip.  We don't want to cause
5569				 * any unnecessary unnesting in this case...
5570				 */
5571			} else {
5572				if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
5573				    entry->map_aligned &&
5574				    !VM_MAP_PAGE_ALIGNED(
5575					    start,
5576					    VM_MAP_PAGE_MASK(map))) {
5577					/*
5578					 * The entry will no longer be
5579					 * map-aligned after clipping
5580					 * and the caller said it's OK.
5581					 */
5582					entry->map_aligned = FALSE;
5583				}
5584				if (map == kalloc_map) {
5585					panic("vm_map_delete(%p,0x%llx,0x%llx):"
5586					      " clipping %p at 0x%llx\n",
5587					      map,
5588					      (uint64_t)start,
5589					      (uint64_t)end,
5590					      entry,
5591					      (uint64_t)start);
5592				}
5593				vm_map_clip_start(map, entry, start);
5594			}
5595
5596			/*
5597			 *	Fix the lookup hint now, rather than each
5598			 *	time through the loop.
5599			 */
5600			SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5601		} else {
5602			if (map->pmap == kernel_pmap &&
5603			    map->ref_count != 0) {
5604				panic("vm_map_delete(%p,0x%llx,0x%llx): "
5605				      "no map entry at 0x%llx\n",
5606				      map,
5607				      (uint64_t)start,
5608				      (uint64_t)end,
5609				      (uint64_t)start);
5610			}
5611			entry = first_entry->vme_next;
5612		}
5613		break;
5614	}
5615	if (entry->superpage_size)
5616		end = SUPERPAGE_ROUND_UP(end);
5617
5618	need_wakeup = FALSE;
5619	/*
5620	 *	Step through all entries in this region
5621	 */
5622	s = entry->vme_start;
5623	while ((entry != vm_map_to_entry(map)) && (s < end)) {
5624		/*
5625		 * At this point, we have deleted all the memory entries
5626		 * between "start" and "s".  We still need to delete
5627		 * all memory entries between "s" and "end".
5628		 * While we were blocked and the map was unlocked, some
5629		 * new memory entries could have been re-allocated between
5630		 * "start" and "s" and we don't want to mess with those.
5631		 * Some of those entries could even have been re-assembled
5632		 * with an entry after "s" (in vm_map_simplify_entry()), so
5633		 * we may have to vm_map_clip_start() again.
5634		 */
5635
5636		if (entry->vme_start >= s) {
5637			/*
5638			 * This entry starts on or after "s"
5639			 * so no need to clip its start.
5640			 */
5641		} else {
5642			/*
5643			 * This entry has been re-assembled by a
5644			 * vm_map_simplify_entry().  We need to
5645			 * re-clip its start.
5646			 */
5647			if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
5648			    entry->map_aligned &&
5649			    !VM_MAP_PAGE_ALIGNED(s,
5650						 VM_MAP_PAGE_MASK(map))) {
5651				/*
5652				 * The entry will no longer be map-aligned
5653				 * after clipping and the caller said it's OK.
5654				 */
5655				entry->map_aligned = FALSE;
5656			}
5657			if (map == kalloc_map) {
5658				panic("vm_map_delete(%p,0x%llx,0x%llx): "
5659				      "clipping %p at 0x%llx\n",
5660				      map,
5661				      (uint64_t)start,
5662				      (uint64_t)end,
5663				      entry,
5664				      (uint64_t)s);
5665			}
5666			vm_map_clip_start(map, entry, s);
5667		}
5668		if (entry->vme_end <= end) {
5669			/*
5670			 * This entry is going away completely, so no need
5671			 * to clip and possibly cause an unnecessary unnesting.
5672			 */
5673		} else {
5674			if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
5675			    entry->map_aligned &&
5676			    !VM_MAP_PAGE_ALIGNED(end,
5677						 VM_MAP_PAGE_MASK(map))) {
5678				/*
5679				 * The entry will no longer be map-aligned
5680				 * after clipping and the caller said it's OK.
5681				 */
5682				entry->map_aligned = FALSE;
5683			}
5684			if (map == kalloc_map) {
5685				panic("vm_map_delete(%p,0x%llx,0x%llx): "
5686				      "clipping %p at 0x%llx\n",
5687				      map,
5688				      (uint64_t)start,
5689				      (uint64_t)end,
5690				      entry,
5691				      (uint64_t)end);
5692			}
5693			vm_map_clip_end(map, entry, end);
5694		}
5695
5696		if (entry->permanent) {
5697			panic("attempt to remove permanent VM map entry "
5698			      "%p [0x%llx:0x%llx]\n",
5699			      entry, (uint64_t) s, (uint64_t) end);
5700		}
5701
5702
5703		if (entry->in_transition) {
5704			wait_result_t wait_result;
5705
5706			/*
5707			 * Another thread is wiring/unwiring this entry.
5708			 * Let the other thread know we are waiting.
5709			 */
5710			assert(s == entry->vme_start);
5711			entry->needs_wakeup = TRUE;
5712
5713			/*
5714			 * wake up anybody waiting on entries that we have
5715			 * already unwired/deleted.
5716			 */
5717			if (need_wakeup) {
5718				vm_map_entry_wakeup(map);
5719				need_wakeup = FALSE;
5720			}
5721
5722			wait_result = vm_map_entry_wait(map, interruptible);
5723
5724			if (interruptible &&
5725			    wait_result == THREAD_INTERRUPTED) {
5726				/*
5727				 * We do not clear the needs_wakeup flag,
5728				 * since we cannot tell if we were the only one.
5729				 */
5730				return KERN_ABORTED;
5731			}
5732
5733			/*
5734			 * The entry could have been clipped or it
5735			 * may not exist anymore.  Look it up again.
5736			 */
5737			if (!vm_map_lookup_entry(map, s, &first_entry)) {
5738				assert((map != kernel_map) &&
5739				       (!entry->is_sub_map));
5740				/*
5741				 * User: use the next entry
5742				 */
5743				entry = first_entry->vme_next;
5744				s = entry->vme_start;
5745			} else {
5746				entry = first_entry;
5747				SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5748			}
5749			last_timestamp = map->timestamp;
5750			continue;
5751		} /* end in_transition */
5752
5753		if (entry->wired_count) {
5754			boolean_t	user_wire;
5755
5756			user_wire = entry->user_wired_count > 0;
5757
5758			/*
5759			 * 	Remove a kernel wiring if requested
5760			 */
5761			if (flags & VM_MAP_REMOVE_KUNWIRE) {
5762				entry->wired_count--;
5763			}
5764
5765			/*
5766			 *	Remove all user wirings for proper accounting
5767			 */
5768			if (entry->user_wired_count > 0) {
5769				while (entry->user_wired_count)
5770					subtract_wire_counts(map, entry, user_wire);
5771			}
5772
5773			if (entry->wired_count != 0) {
5774				assert(map != kernel_map);
5775				/*
5776				 * Cannot continue.  Typical case is when
5777				 * a user thread has physical io pending on
5778				 * on this page.  Either wait for the
5779				 * kernel wiring to go away or return an
5780				 * error.
5781				 */
5782				if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
5783					wait_result_t wait_result;
5784
5785					assert(s == entry->vme_start);
5786					entry->needs_wakeup = TRUE;
5787					wait_result = vm_map_entry_wait(map,
5788									interruptible);
5789
5790					if (interruptible &&
5791					    wait_result == THREAD_INTERRUPTED) {
5792						/*
5793						 * We do not clear the
5794						 * needs_wakeup flag, since we
5795						 * cannot tell if we were the
5796						 * only one.
5797						 */
5798						return KERN_ABORTED;
5799					}
5800
5801					/*
5802					 * The entry could have been clipped or
5803					 * it may not exist anymore.  Look it
5804					 * up again.
5805					 */
5806					if (!vm_map_lookup_entry(map, s,
5807								 &first_entry)) {
5808						assert(map != kernel_map);
5809						/*
5810						 * User: use the next entry
5811						 */
5812						entry = first_entry->vme_next;
5813						s = entry->vme_start;
5814					} else {
5815						entry = first_entry;
5816						SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5817					}
5818					last_timestamp = map->timestamp;
5819					continue;
5820				}
5821				else {
5822					return KERN_FAILURE;
5823				}
5824			}
5825
5826			entry->in_transition = TRUE;
5827			/*
5828			 * copy current entry.  see comment in vm_map_wire()
5829			 */
5830			tmp_entry = *entry;
5831			assert(s == entry->vme_start);
5832
5833			/*
5834			 * We can unlock the map now. The in_transition
5835			 * state guarentees existance of the entry.
5836			 */
5837			vm_map_unlock(map);
5838
5839			if (tmp_entry.is_sub_map) {
5840				vm_map_t sub_map;
5841				vm_map_offset_t sub_start, sub_end;
5842				pmap_t pmap;
5843				vm_map_offset_t pmap_addr;
5844
5845
5846				sub_map = tmp_entry.object.sub_map;
5847				sub_start = tmp_entry.offset;
5848				sub_end = sub_start + (tmp_entry.vme_end -
5849						       tmp_entry.vme_start);
5850				if (tmp_entry.use_pmap) {
5851					pmap = sub_map->pmap;
5852					pmap_addr = tmp_entry.vme_start;
5853				} else {
5854					pmap = map->pmap;
5855					pmap_addr = tmp_entry.vme_start;
5856				}
5857				(void) vm_map_unwire_nested(sub_map,
5858							    sub_start, sub_end,
5859							    user_wire,
5860							    pmap, pmap_addr);
5861			} else {
5862
5863				if (tmp_entry.object.vm_object == kernel_object) {
5864					pmap_protect_options(
5865						map->pmap,
5866						tmp_entry.vme_start,
5867						tmp_entry.vme_end,
5868						VM_PROT_NONE,
5869						PMAP_OPTIONS_REMOVE,
5870						NULL);
5871				}
5872				vm_fault_unwire(map, &tmp_entry,
5873						tmp_entry.object.vm_object == kernel_object,
5874						map->pmap, tmp_entry.vme_start);
5875			}
5876
5877			vm_map_lock(map);
5878
5879			if (last_timestamp+1 != map->timestamp) {
5880				/*
5881				 * Find the entry again.  It could have
5882				 * been clipped after we unlocked the map.
5883				 */
5884				if (!vm_map_lookup_entry(map, s, &first_entry)){
5885					assert((map != kernel_map) &&
5886					       (!entry->is_sub_map));
5887					first_entry = first_entry->vme_next;
5888					s = first_entry->vme_start;
5889				} else {
5890					SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5891				}
5892			} else {
5893				SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5894				first_entry = entry;
5895			}
5896
5897			last_timestamp = map->timestamp;
5898
5899			entry = first_entry;
5900			while ((entry != vm_map_to_entry(map)) &&
5901			       (entry->vme_start < tmp_entry.vme_end)) {
5902				assert(entry->in_transition);
5903				entry->in_transition = FALSE;
5904				if (entry->needs_wakeup) {
5905					entry->needs_wakeup = FALSE;
5906					need_wakeup = TRUE;
5907				}
5908				entry = entry->vme_next;
5909			}
5910			/*
5911			 * We have unwired the entry(s).  Go back and
5912			 * delete them.
5913			 */
5914			entry = first_entry;
5915			continue;
5916		}
5917
5918		/* entry is unwired */
5919		assert(entry->wired_count == 0);
5920		assert(entry->user_wired_count == 0);
5921
5922		assert(s == entry->vme_start);
5923
5924		if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
5925			/*
5926			 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
5927			 * vm_map_delete(), some map entries might have been
5928			 * transferred to a "zap_map", which doesn't have a
5929			 * pmap.  The original pmap has already been flushed
5930			 * in the vm_map_delete() call targeting the original
5931			 * map, but when we get to destroying the "zap_map",
5932			 * we don't have any pmap to flush, so let's just skip
5933			 * all this.
5934			 */
5935		} else if (entry->is_sub_map) {
5936			if (entry->use_pmap) {
5937#ifndef NO_NESTED_PMAP
5938				pmap_unnest(map->pmap,
5939					    (addr64_t)entry->vme_start,
5940					    entry->vme_end - entry->vme_start);
5941#endif	/* NO_NESTED_PMAP */
5942				if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
5943					/* clean up parent map/maps */
5944					vm_map_submap_pmap_clean(
5945						map, entry->vme_start,
5946						entry->vme_end,
5947						entry->object.sub_map,
5948						entry->offset);
5949				}
5950			} else {
5951				vm_map_submap_pmap_clean(
5952					map, entry->vme_start, entry->vme_end,
5953					entry->object.sub_map,
5954					entry->offset);
5955			}
5956		} else if (entry->object.vm_object != kernel_object &&
5957			   entry->object.vm_object != compressor_object) {
5958			object = entry->object.vm_object;
5959			if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
5960				vm_object_pmap_protect_options(
5961					object, entry->offset,
5962					entry->vme_end - entry->vme_start,
5963					PMAP_NULL,
5964					entry->vme_start,
5965					VM_PROT_NONE,
5966					PMAP_OPTIONS_REMOVE);
5967			} else if ((entry->object.vm_object !=
5968				    VM_OBJECT_NULL) ||
5969				   (map->pmap == kernel_pmap)) {
5970				/* Remove translations associated
5971				 * with this range unless the entry
5972				 * does not have an object, or
5973				 * it's the kernel map or a descendant
5974				 * since the platform could potentially
5975				 * create "backdoor" mappings invisible
5976				 * to the VM. It is expected that
5977				 * objectless, non-kernel ranges
5978				 * do not have such VM invisible
5979				 * translations.
5980				 */
5981				pmap_remove_options(map->pmap,
5982						    (addr64_t)entry->vme_start,
5983						    (addr64_t)entry->vme_end,
5984						    PMAP_OPTIONS_REMOVE);
5985			}
5986		}
5987
5988		if (entry->iokit_acct) {
5989			/* alternate accounting */
5990			vm_map_iokit_unmapped_region(map,
5991						     (entry->vme_end -
5992						      entry->vme_start));
5993			entry->iokit_acct = FALSE;
5994		}
5995
5996		/*
5997		 * All pmap mappings for this map entry must have been
5998		 * cleared by now.
5999		 */
6000#if DEBUG
6001		assert(vm_map_pmap_is_empty(map,
6002					    entry->vme_start,
6003					    entry->vme_end));
6004#endif /* DEBUG */
6005
6006		next = entry->vme_next;
6007
6008		if (map->pmap == kernel_pmap &&
6009		    map->ref_count != 0 &&
6010		    entry->vme_end < end &&
6011		    (next == vm_map_to_entry(map) ||
6012		     next->vme_start != entry->vme_end)) {
6013			panic("vm_map_delete(%p,0x%llx,0x%llx): "
6014			      "hole after %p at 0x%llx\n",
6015			      map,
6016			      (uint64_t)start,
6017			      (uint64_t)end,
6018			      entry,
6019			      (uint64_t)entry->vme_end);
6020		}
6021
6022		s = next->vme_start;
6023		last_timestamp = map->timestamp;
6024
6025		if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
6026		    zap_map != VM_MAP_NULL) {
6027			vm_map_size_t entry_size;
6028			/*
6029			 * The caller wants to save the affected VM map entries
6030			 * into the "zap_map".  The caller will take care of
6031			 * these entries.
6032			 */
6033			/* unlink the entry from "map" ... */
6034			vm_map_store_entry_unlink(map, entry);
6035			/* ... and add it to the end of the "zap_map" */
6036			vm_map_store_entry_link(zap_map,
6037					  vm_map_last_entry(zap_map),
6038					  entry);
6039			entry_size = entry->vme_end - entry->vme_start;
6040			map->size -= entry_size;
6041			zap_map->size += entry_size;
6042			/* we didn't unlock the map, so no timestamp increase */
6043			last_timestamp--;
6044		} else {
6045			vm_map_entry_delete(map, entry);
6046			/* vm_map_entry_delete unlocks the map */
6047			vm_map_lock(map);
6048		}
6049
6050		entry = next;
6051
6052		if(entry == vm_map_to_entry(map)) {
6053			break;
6054		}
6055		if (last_timestamp+1 != map->timestamp) {
6056			/*
6057			 * we are responsible for deleting everything
6058			 * from the give space, if someone has interfered
6059			 * we pick up where we left off, back fills should
6060			 * be all right for anyone except map_delete and
6061			 * we have to assume that the task has been fully
6062			 * disabled before we get here
6063			 */
6064        		if (!vm_map_lookup_entry(map, s, &entry)){
6065	               		entry = entry->vme_next;
6066				s = entry->vme_start;
6067        		} else {
6068				SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6069       		 	}
6070			/*
6071			 * others can not only allocate behind us, we can
6072			 * also see coalesce while we don't have the map lock
6073			 */
6074			if(entry == vm_map_to_entry(map)) {
6075				break;
6076			}
6077		}
6078		last_timestamp = map->timestamp;
6079	}
6080
6081	if (map->wait_for_space)
6082		thread_wakeup((event_t) map);
6083	/*
6084	 * wake up anybody waiting on entries that we have already deleted.
6085	 */
6086	if (need_wakeup)
6087		vm_map_entry_wakeup(map);
6088
6089	return KERN_SUCCESS;
6090}
6091
6092/*
6093 *	vm_map_remove:
6094 *
6095 *	Remove the given address range from the target map.
6096 *	This is the exported form of vm_map_delete.
6097 */
6098kern_return_t
6099vm_map_remove(
6100	register vm_map_t	map,
6101	register vm_map_offset_t	start,
6102	register vm_map_offset_t	end,
6103	register boolean_t	flags)
6104{
6105	register kern_return_t	result;
6106
6107	vm_map_lock(map);
6108	VM_MAP_RANGE_CHECK(map, start, end);
6109	/*
6110	 * For the zone_map, the kernel controls the allocation/freeing of memory.
6111	 * Any free to the zone_map should be within the bounds of the map and
6112	 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
6113	 * free to the zone_map into a no-op, there is a problem and we should
6114	 * panic.
6115	 */
6116	if ((map == zone_map) && (start == end))
6117		panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
6118	result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
6119	vm_map_unlock(map);
6120
6121	return(result);
6122}
6123
6124
6125/*
6126 *	Routine:	vm_map_copy_discard
6127 *
6128 *	Description:
6129 *		Dispose of a map copy object (returned by
6130 *		vm_map_copyin).
6131 */
6132void
6133vm_map_copy_discard(
6134	vm_map_copy_t	copy)
6135{
6136	if (copy == VM_MAP_COPY_NULL)
6137		return;
6138
6139	switch (copy->type) {
6140	case VM_MAP_COPY_ENTRY_LIST:
6141		while (vm_map_copy_first_entry(copy) !=
6142		       vm_map_copy_to_entry(copy)) {
6143			vm_map_entry_t	entry = vm_map_copy_first_entry(copy);
6144
6145			vm_map_copy_entry_unlink(copy, entry);
6146			if (entry->is_sub_map) {
6147				vm_map_deallocate(entry->object.sub_map);
6148			} else {
6149				vm_object_deallocate(entry->object.vm_object);
6150			}
6151			vm_map_copy_entry_dispose(copy, entry);
6152		}
6153		break;
6154        case VM_MAP_COPY_OBJECT:
6155		vm_object_deallocate(copy->cpy_object);
6156		break;
6157	case VM_MAP_COPY_KERNEL_BUFFER:
6158
6159		/*
6160		 * The vm_map_copy_t and possibly the data buffer were
6161		 * allocated by a single call to kalloc(), i.e. the
6162		 * vm_map_copy_t was not allocated out of the zone.
6163		 */
6164		kfree(copy, copy->cpy_kalloc_size);
6165		return;
6166	}
6167	zfree(vm_map_copy_zone, copy);
6168}
6169
6170/*
6171 *	Routine:	vm_map_copy_copy
6172 *
6173 *	Description:
6174 *			Move the information in a map copy object to
6175 *			a new map copy object, leaving the old one
6176 *			empty.
6177 *
6178 *			This is used by kernel routines that need
6179 *			to look at out-of-line data (in copyin form)
6180 *			before deciding whether to return SUCCESS.
6181 *			If the routine returns FAILURE, the original
6182 *			copy object will be deallocated; therefore,
6183 *			these routines must make a copy of the copy
6184 *			object and leave the original empty so that
6185 *			deallocation will not fail.
6186 */
6187vm_map_copy_t
6188vm_map_copy_copy(
6189	vm_map_copy_t	copy)
6190{
6191	vm_map_copy_t	new_copy;
6192
6193	if (copy == VM_MAP_COPY_NULL)
6194		return VM_MAP_COPY_NULL;
6195
6196	/*
6197	 * Allocate a new copy object, and copy the information
6198	 * from the old one into it.
6199	 */
6200
6201	new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6202	*new_copy = *copy;
6203
6204	if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
6205		/*
6206		 * The links in the entry chain must be
6207		 * changed to point to the new copy object.
6208		 */
6209		vm_map_copy_first_entry(copy)->vme_prev
6210			= vm_map_copy_to_entry(new_copy);
6211		vm_map_copy_last_entry(copy)->vme_next
6212			= vm_map_copy_to_entry(new_copy);
6213	}
6214
6215	/*
6216	 * Change the old copy object into one that contains
6217	 * nothing to be deallocated.
6218	 */
6219	copy->type = VM_MAP_COPY_OBJECT;
6220	copy->cpy_object = VM_OBJECT_NULL;
6221
6222	/*
6223	 * Return the new object.
6224	 */
6225	return new_copy;
6226}
6227
6228static kern_return_t
6229vm_map_overwrite_submap_recurse(
6230	vm_map_t	dst_map,
6231	vm_map_offset_t	dst_addr,
6232	vm_map_size_t	dst_size)
6233{
6234	vm_map_offset_t	dst_end;
6235	vm_map_entry_t	tmp_entry;
6236	vm_map_entry_t	entry;
6237	kern_return_t	result;
6238	boolean_t	encountered_sub_map = FALSE;
6239
6240
6241
6242	/*
6243	 *	Verify that the destination is all writeable
6244	 *	initially.  We have to trunc the destination
6245	 *	address and round the copy size or we'll end up
6246	 *	splitting entries in strange ways.
6247	 */
6248
6249	dst_end = vm_map_round_page(dst_addr + dst_size,
6250				    VM_MAP_PAGE_MASK(dst_map));
6251	vm_map_lock(dst_map);
6252
6253start_pass_1:
6254	if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
6255		vm_map_unlock(dst_map);
6256		return(KERN_INVALID_ADDRESS);
6257	}
6258
6259	vm_map_clip_start(dst_map,
6260			  tmp_entry,
6261			  vm_map_trunc_page(dst_addr,
6262					    VM_MAP_PAGE_MASK(dst_map)));
6263	if (tmp_entry->is_sub_map) {
6264		/* clipping did unnest if needed */
6265		assert(!tmp_entry->use_pmap);
6266	}
6267
6268	for (entry = tmp_entry;;) {
6269		vm_map_entry_t	next;
6270
6271		next = entry->vme_next;
6272		while(entry->is_sub_map) {
6273			vm_map_offset_t	sub_start;
6274			vm_map_offset_t	sub_end;
6275			vm_map_offset_t	local_end;
6276
6277			if (entry->in_transition) {
6278				/*
6279				 * Say that we are waiting, and wait for entry.
6280				 */
6281                        	entry->needs_wakeup = TRUE;
6282                        	vm_map_entry_wait(dst_map, THREAD_UNINT);
6283
6284				goto start_pass_1;
6285			}
6286
6287			encountered_sub_map = TRUE;
6288			sub_start = entry->offset;
6289
6290			if(entry->vme_end < dst_end)
6291				sub_end = entry->vme_end;
6292			else
6293				sub_end = dst_end;
6294			sub_end -= entry->vme_start;
6295			sub_end += entry->offset;
6296			local_end = entry->vme_end;
6297			vm_map_unlock(dst_map);
6298
6299			result = vm_map_overwrite_submap_recurse(
6300				entry->object.sub_map,
6301				sub_start,
6302				sub_end - sub_start);
6303
6304			if(result != KERN_SUCCESS)
6305				return result;
6306			if (dst_end <= entry->vme_end)
6307				return KERN_SUCCESS;
6308			vm_map_lock(dst_map);
6309			if(!vm_map_lookup_entry(dst_map, local_end,
6310						&tmp_entry)) {
6311				vm_map_unlock(dst_map);
6312				return(KERN_INVALID_ADDRESS);
6313			}
6314			entry = tmp_entry;
6315			next = entry->vme_next;
6316		}
6317
6318		if ( ! (entry->protection & VM_PROT_WRITE)) {
6319			vm_map_unlock(dst_map);
6320			return(KERN_PROTECTION_FAILURE);
6321		}
6322
6323		/*
6324		 *	If the entry is in transition, we must wait
6325		 *	for it to exit that state.  Anything could happen
6326		 *	when we unlock the map, so start over.
6327		 */
6328                if (entry->in_transition) {
6329
6330                        /*
6331                         * Say that we are waiting, and wait for entry.
6332                         */
6333                        entry->needs_wakeup = TRUE;
6334                        vm_map_entry_wait(dst_map, THREAD_UNINT);
6335
6336			goto start_pass_1;
6337		}
6338
6339/*
6340 *		our range is contained completely within this map entry
6341 */
6342		if (dst_end <= entry->vme_end) {
6343			vm_map_unlock(dst_map);
6344			return KERN_SUCCESS;
6345		}
6346/*
6347 *		check that range specified is contiguous region
6348 */
6349		if ((next == vm_map_to_entry(dst_map)) ||
6350		    (next->vme_start != entry->vme_end)) {
6351			vm_map_unlock(dst_map);
6352			return(KERN_INVALID_ADDRESS);
6353		}
6354
6355		/*
6356		 *	Check for permanent objects in the destination.
6357		 */
6358		if ((entry->object.vm_object != VM_OBJECT_NULL) &&
6359		    ((!entry->object.vm_object->internal) ||
6360		     (entry->object.vm_object->true_share))) {
6361			if(encountered_sub_map) {
6362				vm_map_unlock(dst_map);
6363				return(KERN_FAILURE);
6364			}
6365		}
6366
6367
6368		entry = next;
6369	}/* for */
6370	vm_map_unlock(dst_map);
6371	return(KERN_SUCCESS);
6372}
6373
6374/*
6375 *	Routine:	vm_map_copy_overwrite
6376 *
6377 *	Description:
6378 *		Copy the memory described by the map copy
6379 *		object (copy; returned by vm_map_copyin) onto
6380 *		the specified destination region (dst_map, dst_addr).
6381 *		The destination must be writeable.
6382 *
6383 *		Unlike vm_map_copyout, this routine actually
6384 *		writes over previously-mapped memory.  If the
6385 *		previous mapping was to a permanent (user-supplied)
6386 *		memory object, it is preserved.
6387 *
6388 *		The attributes (protection and inheritance) of the
6389 *		destination region are preserved.
6390 *
6391 *		If successful, consumes the copy object.
6392 *		Otherwise, the caller is responsible for it.
6393 *
6394 *	Implementation notes:
6395 *		To overwrite aligned temporary virtual memory, it is
6396 *		sufficient to remove the previous mapping and insert
6397 *		the new copy.  This replacement is done either on
6398 *		the whole region (if no permanent virtual memory
6399 *		objects are embedded in the destination region) or
6400 *		in individual map entries.
6401 *
6402 *		To overwrite permanent virtual memory , it is necessary
6403 *		to copy each page, as the external memory management
6404 *		interface currently does not provide any optimizations.
6405 *
6406 *		Unaligned memory also has to be copied.  It is possible
6407 *		to use 'vm_trickery' to copy the aligned data.  This is
6408 *		not done but not hard to implement.
6409 *
6410 *		Once a page of permanent memory has been overwritten,
6411 *		it is impossible to interrupt this function; otherwise,
6412 *		the call would be neither atomic nor location-independent.
6413 *		The kernel-state portion of a user thread must be
6414 *		interruptible.
6415 *
6416 *		It may be expensive to forward all requests that might
6417 *		overwrite permanent memory (vm_write, vm_copy) to
6418 *		uninterruptible kernel threads.  This routine may be
6419 *		called by interruptible threads; however, success is
6420 *		not guaranteed -- if the request cannot be performed
6421 *		atomically and interruptibly, an error indication is
6422 *		returned.
6423 */
6424
6425static kern_return_t
6426vm_map_copy_overwrite_nested(
6427	vm_map_t		dst_map,
6428	vm_map_address_t	dst_addr,
6429	vm_map_copy_t		copy,
6430	boolean_t		interruptible,
6431	pmap_t			pmap,
6432	boolean_t		discard_on_success)
6433{
6434	vm_map_offset_t		dst_end;
6435	vm_map_entry_t		tmp_entry;
6436	vm_map_entry_t		entry;
6437	kern_return_t		kr;
6438	boolean_t		aligned = TRUE;
6439	boolean_t		contains_permanent_objects = FALSE;
6440	boolean_t		encountered_sub_map = FALSE;
6441	vm_map_offset_t		base_addr;
6442	vm_map_size_t		copy_size;
6443	vm_map_size_t		total_size;
6444
6445
6446	/*
6447	 *	Check for null copy object.
6448	 */
6449
6450	if (copy == VM_MAP_COPY_NULL)
6451		return(KERN_SUCCESS);
6452
6453	/*
6454	 *	Check for special kernel buffer allocated
6455	 *	by new_ipc_kmsg_copyin.
6456	 */
6457
6458	if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
6459		return(vm_map_copyout_kernel_buffer(
6460			       dst_map, &dst_addr,
6461			       copy, TRUE, discard_on_success));
6462	}
6463
6464	/*
6465	 *      Only works for entry lists at the moment.  Will
6466	 *	support page lists later.
6467	 */
6468
6469	assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
6470
6471	if (copy->size == 0) {
6472		if (discard_on_success)
6473			vm_map_copy_discard(copy);
6474		return(KERN_SUCCESS);
6475	}
6476
6477	/*
6478	 *	Verify that the destination is all writeable
6479	 *	initially.  We have to trunc the destination
6480	 *	address and round the copy size or we'll end up
6481	 *	splitting entries in strange ways.
6482	 */
6483
6484	if (!VM_MAP_PAGE_ALIGNED(copy->size,
6485				 VM_MAP_PAGE_MASK(dst_map)) ||
6486	    !VM_MAP_PAGE_ALIGNED(copy->offset,
6487				 VM_MAP_PAGE_MASK(dst_map)) ||
6488	    !VM_MAP_PAGE_ALIGNED(dst_addr,
6489				 VM_MAP_PAGE_MASK(dst_map)))
6490	{
6491		aligned = FALSE;
6492		dst_end = vm_map_round_page(dst_addr + copy->size,
6493					    VM_MAP_PAGE_MASK(dst_map));
6494	} else {
6495		dst_end = dst_addr + copy->size;
6496	}
6497
6498	vm_map_lock(dst_map);
6499
6500	/* LP64todo - remove this check when vm_map_commpage64()
6501	 * no longer has to stuff in a map_entry for the commpage
6502	 * above the map's max_offset.
6503	 */
6504	if (dst_addr >= dst_map->max_offset) {
6505		vm_map_unlock(dst_map);
6506		return(KERN_INVALID_ADDRESS);
6507	}
6508
6509start_pass_1:
6510	if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
6511		vm_map_unlock(dst_map);
6512		return(KERN_INVALID_ADDRESS);
6513	}
6514	vm_map_clip_start(dst_map,
6515			  tmp_entry,
6516			  vm_map_trunc_page(dst_addr,
6517					    VM_MAP_PAGE_MASK(dst_map)));
6518	for (entry = tmp_entry;;) {
6519		vm_map_entry_t	next = entry->vme_next;
6520
6521		while(entry->is_sub_map) {
6522			vm_map_offset_t	sub_start;
6523			vm_map_offset_t	sub_end;
6524			vm_map_offset_t	local_end;
6525
6526                	if (entry->in_transition) {
6527
6528				/*
6529				 * Say that we are waiting, and wait for entry.
6530				 */
6531                        	entry->needs_wakeup = TRUE;
6532                        	vm_map_entry_wait(dst_map, THREAD_UNINT);
6533
6534				goto start_pass_1;
6535			}
6536
6537			local_end = entry->vme_end;
6538		        if (!(entry->needs_copy)) {
6539				/* if needs_copy we are a COW submap */
6540				/* in such a case we just replace so */
6541				/* there is no need for the follow-  */
6542				/* ing check.                        */
6543				encountered_sub_map = TRUE;
6544				sub_start = entry->offset;
6545
6546				if(entry->vme_end < dst_end)
6547					sub_end = entry->vme_end;
6548				else
6549					sub_end = dst_end;
6550				sub_end -= entry->vme_start;
6551				sub_end += entry->offset;
6552				vm_map_unlock(dst_map);
6553
6554				kr = vm_map_overwrite_submap_recurse(
6555					entry->object.sub_map,
6556					sub_start,
6557					sub_end - sub_start);
6558				if(kr != KERN_SUCCESS)
6559					return kr;
6560				vm_map_lock(dst_map);
6561			}
6562
6563			if (dst_end <= entry->vme_end)
6564				goto start_overwrite;
6565			if(!vm_map_lookup_entry(dst_map, local_end,
6566						&entry)) {
6567				vm_map_unlock(dst_map);
6568				return(KERN_INVALID_ADDRESS);
6569			}
6570			next = entry->vme_next;
6571		}
6572
6573		if ( ! (entry->protection & VM_PROT_WRITE)) {
6574			vm_map_unlock(dst_map);
6575			return(KERN_PROTECTION_FAILURE);
6576		}
6577
6578		/*
6579		 *	If the entry is in transition, we must wait
6580		 *	for it to exit that state.  Anything could happen
6581		 *	when we unlock the map, so start over.
6582		 */
6583                if (entry->in_transition) {
6584
6585                        /*
6586                         * Say that we are waiting, and wait for entry.
6587                         */
6588                        entry->needs_wakeup = TRUE;
6589                        vm_map_entry_wait(dst_map, THREAD_UNINT);
6590
6591			goto start_pass_1;
6592		}
6593
6594/*
6595 *		our range is contained completely within this map entry
6596 */
6597		if (dst_end <= entry->vme_end)
6598			break;
6599/*
6600 *		check that range specified is contiguous region
6601 */
6602		if ((next == vm_map_to_entry(dst_map)) ||
6603		    (next->vme_start != entry->vme_end)) {
6604			vm_map_unlock(dst_map);
6605			return(KERN_INVALID_ADDRESS);
6606		}
6607
6608
6609		/*
6610		 *	Check for permanent objects in the destination.
6611		 */
6612		if ((entry->object.vm_object != VM_OBJECT_NULL) &&
6613		    ((!entry->object.vm_object->internal) ||
6614		     (entry->object.vm_object->true_share))) {
6615			contains_permanent_objects = TRUE;
6616		}
6617
6618		entry = next;
6619	}/* for */
6620
6621start_overwrite:
6622	/*
6623	 *	If there are permanent objects in the destination, then
6624	 *	the copy cannot be interrupted.
6625	 */
6626
6627	if (interruptible && contains_permanent_objects) {
6628		vm_map_unlock(dst_map);
6629		return(KERN_FAILURE);	/* XXX */
6630	}
6631
6632	/*
6633 	 *
6634	 *	Make a second pass, overwriting the data
6635	 *	At the beginning of each loop iteration,
6636	 *	the next entry to be overwritten is "tmp_entry"
6637	 *	(initially, the value returned from the lookup above),
6638	 *	and the starting address expected in that entry
6639	 *	is "start".
6640	 */
6641
6642	total_size = copy->size;
6643	if(encountered_sub_map) {
6644		copy_size = 0;
6645		/* re-calculate tmp_entry since we've had the map */
6646		/* unlocked */
6647		if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
6648			vm_map_unlock(dst_map);
6649			return(KERN_INVALID_ADDRESS);
6650		}
6651	} else {
6652		copy_size = copy->size;
6653	}
6654
6655	base_addr = dst_addr;
6656	while(TRUE) {
6657		/* deconstruct the copy object and do in parts */
6658		/* only in sub_map, interruptable case */
6659		vm_map_entry_t	copy_entry;
6660		vm_map_entry_t	previous_prev = VM_MAP_ENTRY_NULL;
6661		vm_map_entry_t	next_copy = VM_MAP_ENTRY_NULL;
6662		int		nentries;
6663		int		remaining_entries = 0;
6664		vm_map_offset_t	new_offset = 0;
6665
6666		for (entry = tmp_entry; copy_size == 0;) {
6667			vm_map_entry_t	next;
6668
6669			next = entry->vme_next;
6670
6671			/* tmp_entry and base address are moved along */
6672			/* each time we encounter a sub-map.  Otherwise */
6673			/* entry can outpase tmp_entry, and the copy_size */
6674			/* may reflect the distance between them */
6675			/* if the current entry is found to be in transition */
6676			/* we will start over at the beginning or the last */
6677			/* encounter of a submap as dictated by base_addr */
6678			/* we will zero copy_size accordingly. */
6679			if (entry->in_transition) {
6680                       		/*
6681                       		 * Say that we are waiting, and wait for entry.
6682                       		 */
6683                       		entry->needs_wakeup = TRUE;
6684                       		vm_map_entry_wait(dst_map, THREAD_UNINT);
6685
6686				if(!vm_map_lookup_entry(dst_map, base_addr,
6687							&tmp_entry)) {
6688					vm_map_unlock(dst_map);
6689					return(KERN_INVALID_ADDRESS);
6690				}
6691				copy_size = 0;
6692				entry = tmp_entry;
6693				continue;
6694			}
6695			if(entry->is_sub_map) {
6696				vm_map_offset_t	sub_start;
6697				vm_map_offset_t	sub_end;
6698				vm_map_offset_t	local_end;
6699
6700		        	if (entry->needs_copy) {
6701					/* if this is a COW submap */
6702					/* just back the range with a */
6703					/* anonymous entry */
6704					if(entry->vme_end < dst_end)
6705						sub_end = entry->vme_end;
6706					else
6707						sub_end = dst_end;
6708					if(entry->vme_start < base_addr)
6709						sub_start = base_addr;
6710					else
6711						sub_start = entry->vme_start;
6712					vm_map_clip_end(
6713						dst_map, entry, sub_end);
6714					vm_map_clip_start(
6715						dst_map, entry, sub_start);
6716					assert(!entry->use_pmap);
6717					entry->is_sub_map = FALSE;
6718					vm_map_deallocate(
6719						entry->object.sub_map);
6720					entry->object.sub_map = NULL;
6721					entry->is_shared = FALSE;
6722					entry->needs_copy = FALSE;
6723					entry->offset = 0;
6724					/*
6725					 * XXX FBDP
6726					 * We should propagate the protections
6727					 * of the submap entry here instead
6728					 * of forcing them to VM_PROT_ALL...
6729					 * Or better yet, we should inherit
6730					 * the protection of the copy_entry.
6731					 */
6732					entry->protection = VM_PROT_ALL;
6733					entry->max_protection = VM_PROT_ALL;
6734					entry->wired_count = 0;
6735					entry->user_wired_count = 0;
6736					if(entry->inheritance
6737					   == VM_INHERIT_SHARE)
6738						entry->inheritance = VM_INHERIT_COPY;
6739					continue;
6740				}
6741				/* first take care of any non-sub_map */
6742				/* entries to send */
6743				if(base_addr < entry->vme_start) {
6744					/* stuff to send */
6745					copy_size =
6746						entry->vme_start - base_addr;
6747					break;
6748				}
6749				sub_start = entry->offset;
6750
6751				if(entry->vme_end < dst_end)
6752					sub_end = entry->vme_end;
6753				else
6754					sub_end = dst_end;
6755				sub_end -= entry->vme_start;
6756				sub_end += entry->offset;
6757				local_end = entry->vme_end;
6758				vm_map_unlock(dst_map);
6759				copy_size = sub_end - sub_start;
6760
6761				/* adjust the copy object */
6762				if (total_size > copy_size) {
6763					vm_map_size_t	local_size = 0;
6764					vm_map_size_t	entry_size;
6765
6766					nentries = 1;
6767					new_offset = copy->offset;
6768					copy_entry = vm_map_copy_first_entry(copy);
6769					while(copy_entry !=
6770					      vm_map_copy_to_entry(copy)){
6771						entry_size = copy_entry->vme_end -
6772							copy_entry->vme_start;
6773						if((local_size < copy_size) &&
6774						   ((local_size + entry_size)
6775						    >= copy_size)) {
6776							vm_map_copy_clip_end(copy,
6777									     copy_entry,
6778									     copy_entry->vme_start +
6779									     (copy_size - local_size));
6780							entry_size = copy_entry->vme_end -
6781								copy_entry->vme_start;
6782							local_size += entry_size;
6783							new_offset += entry_size;
6784						}
6785						if(local_size >= copy_size) {
6786							next_copy = copy_entry->vme_next;
6787							copy_entry->vme_next =
6788								vm_map_copy_to_entry(copy);
6789							previous_prev =
6790								copy->cpy_hdr.links.prev;
6791							copy->cpy_hdr.links.prev = copy_entry;
6792							copy->size = copy_size;
6793							remaining_entries =
6794								copy->cpy_hdr.nentries;
6795							remaining_entries -= nentries;
6796							copy->cpy_hdr.nentries = nentries;
6797							break;
6798						} else {
6799							local_size += entry_size;
6800							new_offset += entry_size;
6801							nentries++;
6802						}
6803						copy_entry = copy_entry->vme_next;
6804					}
6805				}
6806
6807				if((entry->use_pmap) && (pmap == NULL)) {
6808					kr = vm_map_copy_overwrite_nested(
6809						entry->object.sub_map,
6810						sub_start,
6811						copy,
6812						interruptible,
6813						entry->object.sub_map->pmap,
6814						TRUE);
6815				} else if (pmap != NULL) {
6816					kr = vm_map_copy_overwrite_nested(
6817						entry->object.sub_map,
6818						sub_start,
6819						copy,
6820						interruptible, pmap,
6821						TRUE);
6822				} else {
6823					kr = vm_map_copy_overwrite_nested(
6824						entry->object.sub_map,
6825						sub_start,
6826						copy,
6827						interruptible,
6828						dst_map->pmap,
6829						TRUE);
6830				}
6831				if(kr != KERN_SUCCESS) {
6832					if(next_copy != NULL) {
6833						copy->cpy_hdr.nentries +=
6834							remaining_entries;
6835						copy->cpy_hdr.links.prev->vme_next =
6836							next_copy;
6837						copy->cpy_hdr.links.prev
6838							= previous_prev;
6839						copy->size = total_size;
6840					}
6841					return kr;
6842				}
6843				if (dst_end <= local_end) {
6844					return(KERN_SUCCESS);
6845				}
6846				/* otherwise copy no longer exists, it was */
6847				/* destroyed after successful copy_overwrite */
6848			        copy = (vm_map_copy_t)
6849					zalloc(vm_map_copy_zone);
6850				vm_map_copy_first_entry(copy) =
6851					vm_map_copy_last_entry(copy) =
6852					vm_map_copy_to_entry(copy);
6853				copy->type = VM_MAP_COPY_ENTRY_LIST;
6854				copy->offset = new_offset;
6855
6856				/*
6857				 * XXX FBDP
6858				 * this does not seem to deal with
6859				 * the VM map store (R&B tree)
6860				 */
6861
6862				total_size -= copy_size;
6863				copy_size = 0;
6864				/* put back remainder of copy in container */
6865				if(next_copy != NULL) {
6866					copy->cpy_hdr.nentries = remaining_entries;
6867					copy->cpy_hdr.links.next = next_copy;
6868					copy->cpy_hdr.links.prev = previous_prev;
6869					copy->size = total_size;
6870					next_copy->vme_prev =
6871						vm_map_copy_to_entry(copy);
6872					next_copy = NULL;
6873				}
6874				base_addr = local_end;
6875				vm_map_lock(dst_map);
6876				if(!vm_map_lookup_entry(dst_map,
6877							local_end, &tmp_entry)) {
6878					vm_map_unlock(dst_map);
6879					return(KERN_INVALID_ADDRESS);
6880				}
6881				entry = tmp_entry;
6882				continue;
6883			}
6884			if (dst_end <= entry->vme_end) {
6885				copy_size = dst_end - base_addr;
6886				break;
6887			}
6888
6889			if ((next == vm_map_to_entry(dst_map)) ||
6890			    (next->vme_start != entry->vme_end)) {
6891				vm_map_unlock(dst_map);
6892				return(KERN_INVALID_ADDRESS);
6893			}
6894
6895			entry = next;
6896		}/* for */
6897
6898		next_copy = NULL;
6899		nentries = 1;
6900
6901		/* adjust the copy object */
6902		if (total_size > copy_size) {
6903			vm_map_size_t	local_size = 0;
6904			vm_map_size_t	entry_size;
6905
6906			new_offset = copy->offset;
6907			copy_entry = vm_map_copy_first_entry(copy);
6908			while(copy_entry != vm_map_copy_to_entry(copy)) {
6909				entry_size = copy_entry->vme_end -
6910					copy_entry->vme_start;
6911				if((local_size < copy_size) &&
6912				   ((local_size + entry_size)
6913				    >= copy_size)) {
6914					vm_map_copy_clip_end(copy, copy_entry,
6915							     copy_entry->vme_start +
6916							     (copy_size - local_size));
6917					entry_size = copy_entry->vme_end -
6918						copy_entry->vme_start;
6919					local_size += entry_size;
6920					new_offset += entry_size;
6921				}
6922				if(local_size >= copy_size) {
6923					next_copy = copy_entry->vme_next;
6924					copy_entry->vme_next =
6925						vm_map_copy_to_entry(copy);
6926					previous_prev =
6927						copy->cpy_hdr.links.prev;
6928					copy->cpy_hdr.links.prev = copy_entry;
6929					copy->size = copy_size;
6930					remaining_entries =
6931						copy->cpy_hdr.nentries;
6932					remaining_entries -= nentries;
6933					copy->cpy_hdr.nentries = nentries;
6934					break;
6935				} else {
6936					local_size += entry_size;
6937					new_offset += entry_size;
6938					nentries++;
6939				}
6940				copy_entry = copy_entry->vme_next;
6941			}
6942		}
6943
6944		if (aligned) {
6945			pmap_t	local_pmap;
6946
6947			if(pmap)
6948				local_pmap = pmap;
6949			else
6950				local_pmap = dst_map->pmap;
6951
6952			if ((kr =  vm_map_copy_overwrite_aligned(
6953				     dst_map, tmp_entry, copy,
6954				     base_addr, local_pmap)) != KERN_SUCCESS) {
6955				if(next_copy != NULL) {
6956					copy->cpy_hdr.nentries +=
6957						remaining_entries;
6958				        copy->cpy_hdr.links.prev->vme_next =
6959						next_copy;
6960			       		copy->cpy_hdr.links.prev =
6961						previous_prev;
6962					copy->size += copy_size;
6963				}
6964				return kr;
6965			}
6966			vm_map_unlock(dst_map);
6967		} else {
6968			/*
6969			 * Performance gain:
6970			 *
6971			 * if the copy and dst address are misaligned but the same
6972			 * offset within the page we can copy_not_aligned the
6973			 * misaligned parts and copy aligned the rest.  If they are
6974			 * aligned but len is unaligned we simply need to copy
6975			 * the end bit unaligned.  We'll need to split the misaligned
6976			 * bits of the region in this case !
6977			 */
6978			/* ALWAYS UNLOCKS THE dst_map MAP */
6979			kr = vm_map_copy_overwrite_unaligned(
6980				dst_map,
6981				tmp_entry,
6982				copy,
6983				base_addr,
6984				discard_on_success);
6985			if (kr != KERN_SUCCESS) {
6986				if(next_copy != NULL) {
6987					copy->cpy_hdr.nentries +=
6988						remaining_entries;
6989			       		copy->cpy_hdr.links.prev->vme_next =
6990						next_copy;
6991			       		copy->cpy_hdr.links.prev =
6992						previous_prev;
6993					copy->size += copy_size;
6994				}
6995				return kr;
6996			}
6997		}
6998		total_size -= copy_size;
6999		if(total_size == 0)
7000			break;
7001		base_addr += copy_size;
7002		copy_size = 0;
7003		copy->offset = new_offset;
7004		if(next_copy != NULL) {
7005			copy->cpy_hdr.nentries = remaining_entries;
7006			copy->cpy_hdr.links.next = next_copy;
7007			copy->cpy_hdr.links.prev = previous_prev;
7008			next_copy->vme_prev = vm_map_copy_to_entry(copy);
7009			copy->size = total_size;
7010		}
7011		vm_map_lock(dst_map);
7012		while(TRUE) {
7013			if (!vm_map_lookup_entry(dst_map,
7014						 base_addr, &tmp_entry)) {
7015				vm_map_unlock(dst_map);
7016				return(KERN_INVALID_ADDRESS);
7017			}
7018                	if (tmp_entry->in_transition) {
7019                       		entry->needs_wakeup = TRUE;
7020                       		vm_map_entry_wait(dst_map, THREAD_UNINT);
7021			} else {
7022				break;
7023			}
7024		}
7025		vm_map_clip_start(dst_map,
7026				  tmp_entry,
7027				  vm_map_trunc_page(base_addr,
7028						    VM_MAP_PAGE_MASK(dst_map)));
7029
7030		entry = tmp_entry;
7031	} /* while */
7032
7033	/*
7034	 *	Throw away the vm_map_copy object
7035	 */
7036	if (discard_on_success)
7037		vm_map_copy_discard(copy);
7038
7039	return(KERN_SUCCESS);
7040}/* vm_map_copy_overwrite */
7041
7042kern_return_t
7043vm_map_copy_overwrite(
7044	vm_map_t	dst_map,
7045	vm_map_offset_t	dst_addr,
7046	vm_map_copy_t	copy,
7047	boolean_t	interruptible)
7048{
7049	vm_map_size_t	head_size, tail_size;
7050	vm_map_copy_t	head_copy, tail_copy;
7051	vm_map_offset_t	head_addr, tail_addr;
7052	vm_map_entry_t	entry;
7053	kern_return_t	kr;
7054
7055	head_size = 0;
7056	tail_size = 0;
7057	head_copy = NULL;
7058	tail_copy = NULL;
7059	head_addr = 0;
7060	tail_addr = 0;
7061
7062	if (interruptible ||
7063	    copy == VM_MAP_COPY_NULL ||
7064	    copy->type != VM_MAP_COPY_ENTRY_LIST) {
7065		/*
7066		 * We can't split the "copy" map if we're interruptible
7067		 * or if we don't have a "copy" map...
7068		 */
7069	blunt_copy:
7070		return vm_map_copy_overwrite_nested(dst_map,
7071						    dst_addr,
7072						    copy,
7073						    interruptible,
7074						    (pmap_t) NULL,
7075						    TRUE);
7076	}
7077
7078	if (copy->size < 3 * PAGE_SIZE) {
7079		/*
7080		 * Too small to bother with optimizing...
7081		 */
7082		goto blunt_copy;
7083	}
7084
7085	if ((dst_addr & VM_MAP_PAGE_MASK(dst_map)) !=
7086	    (copy->offset & VM_MAP_PAGE_MASK(dst_map))) {
7087		/*
7088		 * Incompatible mis-alignment of source and destination...
7089		 */
7090		goto blunt_copy;
7091	}
7092
7093	/*
7094	 * Proper alignment or identical mis-alignment at the beginning.
7095	 * Let's try and do a small unaligned copy first (if needed)
7096	 * and then an aligned copy for the rest.
7097	 */
7098	if (!page_aligned(dst_addr)) {
7099		head_addr = dst_addr;
7100		head_size = (VM_MAP_PAGE_SIZE(dst_map) -
7101			     (copy->offset & VM_MAP_PAGE_MASK(dst_map)));
7102	}
7103	if (!page_aligned(copy->offset + copy->size)) {
7104		/*
7105		 * Mis-alignment at the end.
7106		 * Do an aligned copy up to the last page and
7107		 * then an unaligned copy for the remaining bytes.
7108		 */
7109		tail_size = ((copy->offset + copy->size) &
7110			     VM_MAP_PAGE_MASK(dst_map));
7111		tail_addr = dst_addr + copy->size - tail_size;
7112	}
7113
7114	if (head_size + tail_size == copy->size) {
7115		/*
7116		 * It's all unaligned, no optimization possible...
7117		 */
7118		goto blunt_copy;
7119	}
7120
7121	/*
7122	 * Can't optimize if there are any submaps in the
7123	 * destination due to the way we free the "copy" map
7124	 * progressively in vm_map_copy_overwrite_nested()
7125	 * in that case.
7126	 */
7127	vm_map_lock_read(dst_map);
7128	if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
7129		vm_map_unlock_read(dst_map);
7130		goto blunt_copy;
7131	}
7132	for (;
7133	     (entry != vm_map_copy_to_entry(copy) &&
7134	      entry->vme_start < dst_addr + copy->size);
7135	     entry = entry->vme_next) {
7136		if (entry->is_sub_map) {
7137			vm_map_unlock_read(dst_map);
7138			goto blunt_copy;
7139		}
7140	}
7141	vm_map_unlock_read(dst_map);
7142
7143	if (head_size) {
7144		/*
7145		 * Unaligned copy of the first "head_size" bytes, to reach
7146		 * a page boundary.
7147		 */
7148
7149		/*
7150		 * Extract "head_copy" out of "copy".
7151		 */
7152		head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7153		vm_map_copy_first_entry(head_copy) =
7154			vm_map_copy_to_entry(head_copy);
7155		vm_map_copy_last_entry(head_copy) =
7156			vm_map_copy_to_entry(head_copy);
7157		head_copy->type = VM_MAP_COPY_ENTRY_LIST;
7158		head_copy->cpy_hdr.nentries = 0;
7159		head_copy->cpy_hdr.entries_pageable =
7160			copy->cpy_hdr.entries_pageable;
7161		vm_map_store_init(&head_copy->cpy_hdr);
7162
7163		head_copy->offset = copy->offset;
7164		head_copy->size = head_size;
7165
7166		copy->offset += head_size;
7167		copy->size -= head_size;
7168
7169		entry = vm_map_copy_first_entry(copy);
7170		vm_map_copy_clip_end(copy, entry, copy->offset);
7171		vm_map_copy_entry_unlink(copy, entry);
7172		vm_map_copy_entry_link(head_copy,
7173				       vm_map_copy_to_entry(head_copy),
7174				       entry);
7175
7176		/*
7177		 * Do the unaligned copy.
7178		 */
7179		kr = vm_map_copy_overwrite_nested(dst_map,
7180						  head_addr,
7181						  head_copy,
7182						  interruptible,
7183						  (pmap_t) NULL,
7184						  FALSE);
7185		if (kr != KERN_SUCCESS)
7186			goto done;
7187	}
7188
7189	if (tail_size) {
7190		/*
7191		 * Extract "tail_copy" out of "copy".
7192		 */
7193		tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7194		vm_map_copy_first_entry(tail_copy) =
7195			vm_map_copy_to_entry(tail_copy);
7196		vm_map_copy_last_entry(tail_copy) =
7197			vm_map_copy_to_entry(tail_copy);
7198		tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
7199		tail_copy->cpy_hdr.nentries = 0;
7200		tail_copy->cpy_hdr.entries_pageable =
7201			copy->cpy_hdr.entries_pageable;
7202		vm_map_store_init(&tail_copy->cpy_hdr);
7203
7204		tail_copy->offset = copy->offset + copy->size - tail_size;
7205		tail_copy->size = tail_size;
7206
7207		copy->size -= tail_size;
7208
7209		entry = vm_map_copy_last_entry(copy);
7210		vm_map_copy_clip_start(copy, entry, tail_copy->offset);
7211		entry = vm_map_copy_last_entry(copy);
7212		vm_map_copy_entry_unlink(copy, entry);
7213		vm_map_copy_entry_link(tail_copy,
7214				       vm_map_copy_last_entry(tail_copy),
7215				       entry);
7216	}
7217
7218	/*
7219	 * Copy most (or possibly all) of the data.
7220	 */
7221	kr = vm_map_copy_overwrite_nested(dst_map,
7222					  dst_addr + head_size,
7223					  copy,
7224					  interruptible,
7225					  (pmap_t) NULL,
7226					  FALSE);
7227	if (kr != KERN_SUCCESS) {
7228		goto done;
7229	}
7230
7231	if (tail_size) {
7232		kr = vm_map_copy_overwrite_nested(dst_map,
7233						  tail_addr,
7234						  tail_copy,
7235						  interruptible,
7236						  (pmap_t) NULL,
7237						  FALSE);
7238	}
7239
7240done:
7241	assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
7242	if (kr == KERN_SUCCESS) {
7243		/*
7244		 * Discard all the copy maps.
7245		 */
7246		if (head_copy) {
7247			vm_map_copy_discard(head_copy);
7248			head_copy = NULL;
7249		}
7250		vm_map_copy_discard(copy);
7251		if (tail_copy) {
7252			vm_map_copy_discard(tail_copy);
7253			tail_copy = NULL;
7254		}
7255	} else {
7256		/*
7257		 * Re-assemble the original copy map.
7258		 */
7259		if (head_copy) {
7260			entry = vm_map_copy_first_entry(head_copy);
7261			vm_map_copy_entry_unlink(head_copy, entry);
7262			vm_map_copy_entry_link(copy,
7263					       vm_map_copy_to_entry(copy),
7264					       entry);
7265			copy->offset -= head_size;
7266			copy->size += head_size;
7267			vm_map_copy_discard(head_copy);
7268			head_copy = NULL;
7269		}
7270		if (tail_copy) {
7271			entry = vm_map_copy_last_entry(tail_copy);
7272			vm_map_copy_entry_unlink(tail_copy, entry);
7273			vm_map_copy_entry_link(copy,
7274					       vm_map_copy_last_entry(copy),
7275					       entry);
7276			copy->size += tail_size;
7277			vm_map_copy_discard(tail_copy);
7278			tail_copy = NULL;
7279		}
7280	}
7281	return kr;
7282}
7283
7284
7285/*
7286 *	Routine: vm_map_copy_overwrite_unaligned	[internal use only]
7287 *
7288 *	Decription:
7289 *	Physically copy unaligned data
7290 *
7291 *	Implementation:
7292 *	Unaligned parts of pages have to be physically copied.  We use
7293 *	a modified form of vm_fault_copy (which understands none-aligned
7294 *	page offsets and sizes) to do the copy.  We attempt to copy as
7295 *	much memory in one go as possibly, however vm_fault_copy copies
7296 *	within 1 memory object so we have to find the smaller of "amount left"
7297 *	"source object data size" and "target object data size".  With
7298 *	unaligned data we don't need to split regions, therefore the source
7299 *	(copy) object should be one map entry, the target range may be split
7300 *	over multiple map entries however.  In any event we are pessimistic
7301 *	about these assumptions.
7302 *
7303 *	Assumptions:
7304 *	dst_map is locked on entry and is return locked on success,
7305 *	unlocked on error.
7306 */
7307
7308static kern_return_t
7309vm_map_copy_overwrite_unaligned(
7310	vm_map_t	dst_map,
7311	vm_map_entry_t	entry,
7312	vm_map_copy_t	copy,
7313	vm_map_offset_t	start,
7314	boolean_t	discard_on_success)
7315{
7316	vm_map_entry_t		copy_entry;
7317	vm_map_entry_t		copy_entry_next;
7318	vm_map_version_t	version;
7319	vm_object_t		dst_object;
7320	vm_object_offset_t	dst_offset;
7321	vm_object_offset_t	src_offset;
7322	vm_object_offset_t	entry_offset;
7323	vm_map_offset_t		entry_end;
7324	vm_map_size_t		src_size,
7325				dst_size,
7326				copy_size,
7327				amount_left;
7328	kern_return_t		kr = KERN_SUCCESS;
7329
7330
7331	copy_entry = vm_map_copy_first_entry(copy);
7332
7333	vm_map_lock_write_to_read(dst_map);
7334
7335	src_offset = copy->offset - vm_object_trunc_page(copy->offset);
7336	amount_left = copy->size;
7337/*
7338 *	unaligned so we never clipped this entry, we need the offset into
7339 *	the vm_object not just the data.
7340 */
7341	while (amount_left > 0) {
7342
7343		if (entry == vm_map_to_entry(dst_map)) {
7344			vm_map_unlock_read(dst_map);
7345			return KERN_INVALID_ADDRESS;
7346		}
7347
7348		/* "start" must be within the current map entry */
7349		assert ((start>=entry->vme_start) && (start<entry->vme_end));
7350
7351		dst_offset = start - entry->vme_start;
7352
7353		dst_size = entry->vme_end - start;
7354
7355		src_size = copy_entry->vme_end -
7356			(copy_entry->vme_start + src_offset);
7357
7358		if (dst_size < src_size) {
7359/*
7360 *			we can only copy dst_size bytes before
7361 *			we have to get the next destination entry
7362 */
7363			copy_size = dst_size;
7364		} else {
7365/*
7366 *			we can only copy src_size bytes before
7367 *			we have to get the next source copy entry
7368 */
7369			copy_size = src_size;
7370		}
7371
7372		if (copy_size > amount_left) {
7373			copy_size = amount_left;
7374		}
7375/*
7376 *		Entry needs copy, create a shadow shadow object for
7377 *		Copy on write region.
7378 */
7379		if (entry->needs_copy &&
7380		    ((entry->protection & VM_PROT_WRITE) != 0))
7381		{
7382			if (vm_map_lock_read_to_write(dst_map)) {
7383				vm_map_lock_read(dst_map);
7384				goto RetryLookup;
7385			}
7386			vm_object_shadow(&entry->object.vm_object,
7387					 &entry->offset,
7388					 (vm_map_size_t)(entry->vme_end
7389							 - entry->vme_start));
7390			entry->needs_copy = FALSE;
7391			vm_map_lock_write_to_read(dst_map);
7392		}
7393		dst_object = entry->object.vm_object;
7394/*
7395 *		unlike with the virtual (aligned) copy we're going
7396 *		to fault on it therefore we need a target object.
7397 */
7398                if (dst_object == VM_OBJECT_NULL) {
7399			if (vm_map_lock_read_to_write(dst_map)) {
7400				vm_map_lock_read(dst_map);
7401				goto RetryLookup;
7402			}
7403			dst_object = vm_object_allocate((vm_map_size_t)
7404							entry->vme_end - entry->vme_start);
7405			entry->object.vm_object = dst_object;
7406			entry->offset = 0;
7407			assert(entry->use_pmap);
7408			vm_map_lock_write_to_read(dst_map);
7409		}
7410/*
7411 *		Take an object reference and unlock map. The "entry" may
7412 *		disappear or change when the map is unlocked.
7413 */
7414		vm_object_reference(dst_object);
7415		version.main_timestamp = dst_map->timestamp;
7416		entry_offset = entry->offset;
7417		entry_end = entry->vme_end;
7418		vm_map_unlock_read(dst_map);
7419/*
7420 *		Copy as much as possible in one pass
7421 */
7422		kr = vm_fault_copy(
7423			copy_entry->object.vm_object,
7424			copy_entry->offset + src_offset,
7425			&copy_size,
7426			dst_object,
7427			entry_offset + dst_offset,
7428			dst_map,
7429			&version,
7430			THREAD_UNINT );
7431
7432		start += copy_size;
7433		src_offset += copy_size;
7434		amount_left -= copy_size;
7435/*
7436 *		Release the object reference
7437 */
7438		vm_object_deallocate(dst_object);
7439/*
7440 *		If a hard error occurred, return it now
7441 */
7442		if (kr != KERN_SUCCESS)
7443			return kr;
7444
7445		if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
7446		    || amount_left == 0)
7447		{
7448/*
7449 *			all done with this copy entry, dispose.
7450 */
7451			copy_entry_next = copy_entry->vme_next;
7452
7453			if (discard_on_success) {
7454				vm_map_copy_entry_unlink(copy, copy_entry);
7455				assert(!copy_entry->is_sub_map);
7456				vm_object_deallocate(
7457					copy_entry->object.vm_object);
7458				vm_map_copy_entry_dispose(copy, copy_entry);
7459			}
7460
7461			if (copy_entry_next == vm_map_copy_to_entry(copy) &&
7462			    amount_left) {
7463/*
7464 *				not finished copying but run out of source
7465 */
7466				return KERN_INVALID_ADDRESS;
7467			}
7468
7469			copy_entry = copy_entry_next;
7470
7471			src_offset = 0;
7472		}
7473
7474		if (amount_left == 0)
7475			return KERN_SUCCESS;
7476
7477		vm_map_lock_read(dst_map);
7478		if (version.main_timestamp == dst_map->timestamp) {
7479			if (start == entry_end) {
7480/*
7481 *				destination region is split.  Use the version
7482 *				information to avoid a lookup in the normal
7483 *				case.
7484 */
7485				entry = entry->vme_next;
7486/*
7487 *				should be contiguous. Fail if we encounter
7488 *				a hole in the destination.
7489 */
7490				if (start != entry->vme_start) {
7491					vm_map_unlock_read(dst_map);
7492					return KERN_INVALID_ADDRESS ;
7493				}
7494			}
7495		} else {
7496/*
7497 *			Map version check failed.
7498 *			we must lookup the entry because somebody
7499 *			might have changed the map behind our backs.
7500 */
7501		RetryLookup:
7502			if (!vm_map_lookup_entry(dst_map, start, &entry))
7503			{
7504				vm_map_unlock_read(dst_map);
7505				return KERN_INVALID_ADDRESS ;
7506			}
7507		}
7508	}/* while */
7509
7510	return KERN_SUCCESS;
7511}/* vm_map_copy_overwrite_unaligned */
7512
7513/*
7514 *	Routine: vm_map_copy_overwrite_aligned	[internal use only]
7515 *
7516 *	Description:
7517 *	Does all the vm_trickery possible for whole pages.
7518 *
7519 *	Implementation:
7520 *
7521 *	If there are no permanent objects in the destination,
7522 *	and the source and destination map entry zones match,
7523 *	and the destination map entry is not shared,
7524 *	then the map entries can be deleted and replaced
7525 *	with those from the copy.  The following code is the
7526 *	basic idea of what to do, but there are lots of annoying
7527 *	little details about getting protection and inheritance
7528 *	right.  Should add protection, inheritance, and sharing checks
7529 *	to the above pass and make sure that no wiring is involved.
7530 */
7531
7532int vm_map_copy_overwrite_aligned_src_not_internal = 0;
7533int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
7534int vm_map_copy_overwrite_aligned_src_large = 0;
7535
7536static kern_return_t
7537vm_map_copy_overwrite_aligned(
7538	vm_map_t	dst_map,
7539	vm_map_entry_t	tmp_entry,
7540	vm_map_copy_t	copy,
7541	vm_map_offset_t	start,
7542	__unused pmap_t	pmap)
7543{
7544	vm_object_t	object;
7545	vm_map_entry_t	copy_entry;
7546	vm_map_size_t	copy_size;
7547	vm_map_size_t	size;
7548	vm_map_entry_t	entry;
7549
7550	while ((copy_entry = vm_map_copy_first_entry(copy))
7551	       != vm_map_copy_to_entry(copy))
7552	{
7553		copy_size = (copy_entry->vme_end - copy_entry->vme_start);
7554
7555		entry = tmp_entry;
7556		if (entry->is_sub_map) {
7557			/* unnested when clipped earlier */
7558			assert(!entry->use_pmap);
7559		}
7560		if (entry == vm_map_to_entry(dst_map)) {
7561			vm_map_unlock(dst_map);
7562			return KERN_INVALID_ADDRESS;
7563		}
7564		size = (entry->vme_end - entry->vme_start);
7565		/*
7566		 *	Make sure that no holes popped up in the
7567		 *	address map, and that the protection is
7568		 *	still valid, in case the map was unlocked
7569		 *	earlier.
7570		 */
7571
7572		if ((entry->vme_start != start) || ((entry->is_sub_map)
7573						    && !entry->needs_copy)) {
7574			vm_map_unlock(dst_map);
7575			return(KERN_INVALID_ADDRESS);
7576		}
7577		assert(entry != vm_map_to_entry(dst_map));
7578
7579		/*
7580		 *	Check protection again
7581		 */
7582
7583		if ( ! (entry->protection & VM_PROT_WRITE)) {
7584			vm_map_unlock(dst_map);
7585			return(KERN_PROTECTION_FAILURE);
7586		}
7587
7588		/*
7589		 *	Adjust to source size first
7590		 */
7591
7592		if (copy_size < size) {
7593			if (entry->map_aligned &&
7594			    !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
7595						 VM_MAP_PAGE_MASK(dst_map))) {
7596				/* no longer map-aligned */
7597				entry->map_aligned = FALSE;
7598			}
7599			vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
7600			size = copy_size;
7601		}
7602
7603		/*
7604		 *	Adjust to destination size
7605		 */
7606
7607		if (size < copy_size) {
7608			vm_map_copy_clip_end(copy, copy_entry,
7609					     copy_entry->vme_start + size);
7610			copy_size = size;
7611		}
7612
7613		assert((entry->vme_end - entry->vme_start) == size);
7614		assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
7615		assert((copy_entry->vme_end - copy_entry->vme_start) == size);
7616
7617		/*
7618		 *	If the destination contains temporary unshared memory,
7619		 *	we can perform the copy by throwing it away and
7620		 *	installing the source data.
7621		 */
7622
7623		object = entry->object.vm_object;
7624		if ((!entry->is_shared &&
7625		     ((object == VM_OBJECT_NULL) ||
7626		      (object->internal && !object->true_share))) ||
7627		    entry->needs_copy) {
7628			vm_object_t	old_object = entry->object.vm_object;
7629			vm_object_offset_t	old_offset = entry->offset;
7630			vm_object_offset_t	offset;
7631
7632			/*
7633			 * Ensure that the source and destination aren't
7634			 * identical
7635			 */
7636			if (old_object == copy_entry->object.vm_object &&
7637			    old_offset == copy_entry->offset) {
7638				vm_map_copy_entry_unlink(copy, copy_entry);
7639				vm_map_copy_entry_dispose(copy, copy_entry);
7640
7641				if (old_object != VM_OBJECT_NULL)
7642					vm_object_deallocate(old_object);
7643
7644				start = tmp_entry->vme_end;
7645				tmp_entry = tmp_entry->vme_next;
7646				continue;
7647			}
7648
7649#define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024)	/* 64 MB */
7650#define __TRADEOFF1_COPY_SIZE (128 * 1024)	/* 128 KB */
7651			if (copy_entry->object.vm_object != VM_OBJECT_NULL &&
7652			    copy_entry->object.vm_object->vo_size >= __TRADEOFF1_OBJ_SIZE &&
7653			    copy_size <= __TRADEOFF1_COPY_SIZE) {
7654				/*
7655				 * Virtual vs. Physical copy tradeoff #1.
7656				 *
7657				 * Copying only a few pages out of a large
7658				 * object:  do a physical copy instead of
7659				 * a virtual copy, to avoid possibly keeping
7660				 * the entire large object alive because of
7661				 * those few copy-on-write pages.
7662				 */
7663				vm_map_copy_overwrite_aligned_src_large++;
7664				goto slow_copy;
7665			}
7666
7667			if (entry->alias >= VM_MEMORY_MALLOC &&
7668			    entry->alias <= VM_MEMORY_MALLOC_LARGE_REUSED) {
7669				vm_object_t new_object, new_shadow;
7670
7671				/*
7672				 * We're about to map something over a mapping
7673				 * established by malloc()...
7674				 */
7675				new_object = copy_entry->object.vm_object;
7676				if (new_object != VM_OBJECT_NULL) {
7677					vm_object_lock_shared(new_object);
7678				}
7679				while (new_object != VM_OBJECT_NULL &&
7680				       !new_object->true_share &&
7681				       new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
7682				       new_object->internal) {
7683					new_shadow = new_object->shadow;
7684					if (new_shadow == VM_OBJECT_NULL) {
7685						break;
7686					}
7687					vm_object_lock_shared(new_shadow);
7688					vm_object_unlock(new_object);
7689					new_object = new_shadow;
7690				}
7691				if (new_object != VM_OBJECT_NULL) {
7692					if (!new_object->internal) {
7693						/*
7694						 * The new mapping is backed
7695						 * by an external object.  We
7696						 * don't want malloc'ed memory
7697						 * to be replaced with such a
7698						 * non-anonymous mapping, so
7699						 * let's go off the optimized
7700						 * path...
7701						 */
7702						vm_map_copy_overwrite_aligned_src_not_internal++;
7703						vm_object_unlock(new_object);
7704						goto slow_copy;
7705					}
7706					if (new_object->true_share ||
7707					    new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
7708						/*
7709						 * Same if there's a "true_share"
7710						 * object in the shadow chain, or
7711						 * an object with a non-default
7712						 * (SYMMETRIC) copy strategy.
7713						 */
7714						vm_map_copy_overwrite_aligned_src_not_symmetric++;
7715						vm_object_unlock(new_object);
7716						goto slow_copy;
7717					}
7718					vm_object_unlock(new_object);
7719				}
7720				/*
7721				 * The new mapping is still backed by
7722				 * anonymous (internal) memory, so it's
7723				 * OK to substitute it for the original
7724				 * malloc() mapping.
7725				 */
7726			}
7727
7728			if (old_object != VM_OBJECT_NULL) {
7729				if(entry->is_sub_map) {
7730					if(entry->use_pmap) {
7731#ifndef NO_NESTED_PMAP
7732						pmap_unnest(dst_map->pmap,
7733							    (addr64_t)entry->vme_start,
7734							    entry->vme_end - entry->vme_start);
7735#endif	/* NO_NESTED_PMAP */
7736						if(dst_map->mapped_in_other_pmaps) {
7737							/* clean up parent */
7738							/* map/maps */
7739							vm_map_submap_pmap_clean(
7740								dst_map, entry->vme_start,
7741								entry->vme_end,
7742								entry->object.sub_map,
7743								entry->offset);
7744						}
7745					} else {
7746						vm_map_submap_pmap_clean(
7747							dst_map, entry->vme_start,
7748							entry->vme_end,
7749							entry->object.sub_map,
7750							entry->offset);
7751					}
7752				   	vm_map_deallocate(
7753						entry->object.sub_map);
7754			   	} else {
7755					if(dst_map->mapped_in_other_pmaps) {
7756						vm_object_pmap_protect_options(
7757							entry->object.vm_object,
7758							entry->offset,
7759							entry->vme_end
7760							- entry->vme_start,
7761							PMAP_NULL,
7762							entry->vme_start,
7763							VM_PROT_NONE,
7764							PMAP_OPTIONS_REMOVE);
7765					} else {
7766						pmap_remove_options(
7767							dst_map->pmap,
7768							(addr64_t)(entry->vme_start),
7769							(addr64_t)(entry->vme_end),
7770							PMAP_OPTIONS_REMOVE);
7771					}
7772					vm_object_deallocate(old_object);
7773			   	}
7774			}
7775
7776			entry->is_sub_map = FALSE;
7777			entry->object = copy_entry->object;
7778			object = entry->object.vm_object;
7779			entry->needs_copy = copy_entry->needs_copy;
7780			entry->wired_count = 0;
7781			entry->user_wired_count = 0;
7782			offset = entry->offset = copy_entry->offset;
7783
7784			vm_map_copy_entry_unlink(copy, copy_entry);
7785			vm_map_copy_entry_dispose(copy, copy_entry);
7786
7787			/*
7788			 * we could try to push pages into the pmap at this point, BUT
7789			 * this optimization only saved on average 2 us per page if ALL
7790			 * the pages in the source were currently mapped
7791			 * and ALL the pages in the dest were touched, if there were fewer
7792			 * than 2/3 of the pages touched, this optimization actually cost more cycles
7793			 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
7794			 */
7795
7796			/*
7797			 *	Set up for the next iteration.  The map
7798			 *	has not been unlocked, so the next
7799			 *	address should be at the end of this
7800			 *	entry, and the next map entry should be
7801			 *	the one following it.
7802			 */
7803
7804			start = tmp_entry->vme_end;
7805			tmp_entry = tmp_entry->vme_next;
7806		} else {
7807			vm_map_version_t	version;
7808			vm_object_t		dst_object;
7809			vm_object_offset_t	dst_offset;
7810			kern_return_t		r;
7811
7812		slow_copy:
7813			if (entry->needs_copy) {
7814				vm_object_shadow(&entry->object.vm_object,
7815						 &entry->offset,
7816						 (entry->vme_end -
7817						  entry->vme_start));
7818				entry->needs_copy = FALSE;
7819			}
7820
7821			dst_object = entry->object.vm_object;
7822			dst_offset = entry->offset;
7823
7824			/*
7825			 *	Take an object reference, and record
7826			 *	the map version information so that the
7827			 *	map can be safely unlocked.
7828			 */
7829
7830			if (dst_object == VM_OBJECT_NULL) {
7831				/*
7832				 * We would usually have just taken the
7833				 * optimized path above if the destination
7834				 * object has not been allocated yet.  But we
7835				 * now disable that optimization if the copy
7836				 * entry's object is not backed by anonymous
7837				 * memory to avoid replacing malloc'ed
7838				 * (i.e. re-usable) anonymous memory with a
7839				 * not-so-anonymous mapping.
7840				 * So we have to handle this case here and
7841				 * allocate a new VM object for this map entry.
7842				 */
7843				dst_object = vm_object_allocate(
7844					entry->vme_end - entry->vme_start);
7845				dst_offset = 0;
7846				entry->object.vm_object = dst_object;
7847				entry->offset = dst_offset;
7848				assert(entry->use_pmap);
7849
7850			}
7851
7852			vm_object_reference(dst_object);
7853
7854			/* account for unlock bumping up timestamp */
7855			version.main_timestamp = dst_map->timestamp + 1;
7856
7857			vm_map_unlock(dst_map);
7858
7859			/*
7860			 *	Copy as much as possible in one pass
7861			 */
7862
7863			copy_size = size;
7864			r = vm_fault_copy(
7865				copy_entry->object.vm_object,
7866				copy_entry->offset,
7867				&copy_size,
7868				dst_object,
7869				dst_offset,
7870				dst_map,
7871				&version,
7872				THREAD_UNINT );
7873
7874			/*
7875			 *	Release the object reference
7876			 */
7877
7878			vm_object_deallocate(dst_object);
7879
7880			/*
7881			 *	If a hard error occurred, return it now
7882			 */
7883
7884			if (r != KERN_SUCCESS)
7885				return(r);
7886
7887			if (copy_size != 0) {
7888				/*
7889				 *	Dispose of the copied region
7890				 */
7891
7892				vm_map_copy_clip_end(copy, copy_entry,
7893						     copy_entry->vme_start + copy_size);
7894				vm_map_copy_entry_unlink(copy, copy_entry);
7895				vm_object_deallocate(copy_entry->object.vm_object);
7896				vm_map_copy_entry_dispose(copy, copy_entry);
7897			}
7898
7899			/*
7900			 *	Pick up in the destination map where we left off.
7901			 *
7902			 *	Use the version information to avoid a lookup
7903			 *	in the normal case.
7904			 */
7905
7906			start += copy_size;
7907			vm_map_lock(dst_map);
7908			if (version.main_timestamp == dst_map->timestamp &&
7909			    copy_size != 0) {
7910				/* We can safely use saved tmp_entry value */
7911
7912				if (tmp_entry->map_aligned &&
7913				    !VM_MAP_PAGE_ALIGNED(
7914					    start,
7915					    VM_MAP_PAGE_MASK(dst_map))) {
7916					/* no longer map-aligned */
7917					tmp_entry->map_aligned = FALSE;
7918				}
7919				vm_map_clip_end(dst_map, tmp_entry, start);
7920				tmp_entry = tmp_entry->vme_next;
7921			} else {
7922				/* Must do lookup of tmp_entry */
7923
7924				if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
7925					vm_map_unlock(dst_map);
7926					return(KERN_INVALID_ADDRESS);
7927				}
7928				if (tmp_entry->map_aligned &&
7929				    !VM_MAP_PAGE_ALIGNED(
7930					    start,
7931					    VM_MAP_PAGE_MASK(dst_map))) {
7932					/* no longer map-aligned */
7933					tmp_entry->map_aligned = FALSE;
7934				}
7935				vm_map_clip_start(dst_map, tmp_entry, start);
7936			}
7937		}
7938	}/* while */
7939
7940	return(KERN_SUCCESS);
7941}/* vm_map_copy_overwrite_aligned */
7942
7943/*
7944 *	Routine: vm_map_copyin_kernel_buffer [internal use only]
7945 *
7946 *	Description:
7947 *		Copy in data to a kernel buffer from space in the
7948 *		source map. The original space may be optionally
7949 *		deallocated.
7950 *
7951 *		If successful, returns a new copy object.
7952 */
7953static kern_return_t
7954vm_map_copyin_kernel_buffer(
7955	vm_map_t	src_map,
7956	vm_map_offset_t	src_addr,
7957	vm_map_size_t	len,
7958	boolean_t	src_destroy,
7959	vm_map_copy_t	*copy_result)
7960{
7961	kern_return_t kr;
7962	vm_map_copy_t copy;
7963	vm_size_t kalloc_size;
7964
7965	if ((vm_size_t) len != len) {
7966		/* "len" is too big and doesn't fit in a "vm_size_t" */
7967		return KERN_RESOURCE_SHORTAGE;
7968	}
7969	kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len);
7970	assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len);
7971
7972	copy = (vm_map_copy_t) kalloc(kalloc_size);
7973	if (copy == VM_MAP_COPY_NULL) {
7974		return KERN_RESOURCE_SHORTAGE;
7975	}
7976	copy->type = VM_MAP_COPY_KERNEL_BUFFER;
7977	copy->size = len;
7978	copy->offset = 0;
7979	copy->cpy_kdata = (void *) (copy + 1);
7980	copy->cpy_kalloc_size = kalloc_size;
7981
7982	kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len);
7983	if (kr != KERN_SUCCESS) {
7984		kfree(copy, kalloc_size);
7985		return kr;
7986	}
7987	if (src_destroy) {
7988		(void) vm_map_remove(
7989			src_map,
7990			vm_map_trunc_page(src_addr,
7991					  VM_MAP_PAGE_MASK(src_map)),
7992			vm_map_round_page(src_addr + len,
7993					  VM_MAP_PAGE_MASK(src_map)),
7994			(VM_MAP_REMOVE_INTERRUPTIBLE |
7995			 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
7996			 (src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0));
7997	}
7998	*copy_result = copy;
7999	return KERN_SUCCESS;
8000}
8001
8002/*
8003 *	Routine: vm_map_copyout_kernel_buffer	[internal use only]
8004 *
8005 *	Description:
8006 *		Copy out data from a kernel buffer into space in the
8007 *		destination map. The space may be otpionally dynamically
8008 *		allocated.
8009 *
8010 *		If successful, consumes the copy object.
8011 *		Otherwise, the caller is responsible for it.
8012 */
8013static int vm_map_copyout_kernel_buffer_failures = 0;
8014static kern_return_t
8015vm_map_copyout_kernel_buffer(
8016	vm_map_t		map,
8017	vm_map_address_t	*addr,	/* IN/OUT */
8018	vm_map_copy_t		copy,
8019	boolean_t		overwrite,
8020	boolean_t		consume_on_success)
8021{
8022	kern_return_t kr = KERN_SUCCESS;
8023	thread_t thread = current_thread();
8024
8025	if (!overwrite) {
8026
8027		/*
8028		 * Allocate space in the target map for the data
8029		 */
8030		*addr = 0;
8031		kr = vm_map_enter(map,
8032				  addr,
8033				  vm_map_round_page(copy->size,
8034						    VM_MAP_PAGE_MASK(map)),
8035				  (vm_map_offset_t) 0,
8036				  VM_FLAGS_ANYWHERE,
8037				  VM_OBJECT_NULL,
8038				  (vm_object_offset_t) 0,
8039				  FALSE,
8040				  VM_PROT_DEFAULT,
8041				  VM_PROT_ALL,
8042				  VM_INHERIT_DEFAULT);
8043		if (kr != KERN_SUCCESS)
8044			return kr;
8045	}
8046
8047	/*
8048	 * Copyout the data from the kernel buffer to the target map.
8049	 */
8050	if (thread->map == map) {
8051
8052		/*
8053		 * If the target map is the current map, just do
8054		 * the copy.
8055		 */
8056		assert((vm_size_t) copy->size == copy->size);
8057		if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
8058			kr = KERN_INVALID_ADDRESS;
8059		}
8060	}
8061	else {
8062		vm_map_t oldmap;
8063
8064		/*
8065		 * If the target map is another map, assume the
8066		 * target's address space identity for the duration
8067		 * of the copy.
8068		 */
8069		vm_map_reference(map);
8070		oldmap = vm_map_switch(map);
8071
8072		assert((vm_size_t) copy->size == copy->size);
8073		if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
8074			vm_map_copyout_kernel_buffer_failures++;
8075			kr = KERN_INVALID_ADDRESS;
8076		}
8077
8078		(void) vm_map_switch(oldmap);
8079		vm_map_deallocate(map);
8080	}
8081
8082	if (kr != KERN_SUCCESS) {
8083		/* the copy failed, clean up */
8084		if (!overwrite) {
8085			/*
8086			 * Deallocate the space we allocated in the target map.
8087			 */
8088			(void) vm_map_remove(
8089				map,
8090				vm_map_trunc_page(*addr,
8091						  VM_MAP_PAGE_MASK(map)),
8092				vm_map_round_page((*addr +
8093						   vm_map_round_page(copy->size,
8094								     VM_MAP_PAGE_MASK(map))),
8095						  VM_MAP_PAGE_MASK(map)),
8096				VM_MAP_NO_FLAGS);
8097			*addr = 0;
8098		}
8099	} else {
8100		/* copy was successful, dicard the copy structure */
8101		if (consume_on_success) {
8102			kfree(copy, copy->cpy_kalloc_size);
8103		}
8104	}
8105
8106	return kr;
8107}
8108
8109/*
8110 *	Macro:		vm_map_copy_insert
8111 *
8112 *	Description:
8113 *		Link a copy chain ("copy") into a map at the
8114 *		specified location (after "where").
8115 *	Side effects:
8116 *		The copy chain is destroyed.
8117 *	Warning:
8118 *		The arguments are evaluated multiple times.
8119 */
8120#define	vm_map_copy_insert(map, where, copy)				\
8121MACRO_BEGIN								\
8122	vm_map_store_copy_insert(map, where, copy);	  \
8123	zfree(vm_map_copy_zone, copy);		\
8124MACRO_END
8125
8126void
8127vm_map_copy_remap(
8128	vm_map_t	map,
8129	vm_map_entry_t	where,
8130	vm_map_copy_t	copy,
8131	vm_map_offset_t	adjustment,
8132	vm_prot_t	cur_prot,
8133	vm_prot_t	max_prot,
8134	vm_inherit_t	inheritance)
8135{
8136	vm_map_entry_t	copy_entry, new_entry;
8137
8138	for (copy_entry = vm_map_copy_first_entry(copy);
8139	     copy_entry != vm_map_copy_to_entry(copy);
8140	     copy_entry = copy_entry->vme_next) {
8141		/* get a new VM map entry for the map */
8142		new_entry = vm_map_entry_create(map,
8143						!map->hdr.entries_pageable);
8144		/* copy the "copy entry" to the new entry */
8145		vm_map_entry_copy(new_entry, copy_entry);
8146		/* adjust "start" and "end" */
8147		new_entry->vme_start += adjustment;
8148		new_entry->vme_end += adjustment;
8149		/* clear some attributes */
8150		new_entry->inheritance = inheritance;
8151		new_entry->protection = cur_prot;
8152		new_entry->max_protection = max_prot;
8153		new_entry->behavior = VM_BEHAVIOR_DEFAULT;
8154		/* take an extra reference on the entry's "object" */
8155		if (new_entry->is_sub_map) {
8156			assert(!new_entry->use_pmap); /* not nested */
8157			vm_map_lock(new_entry->object.sub_map);
8158			vm_map_reference(new_entry->object.sub_map);
8159			vm_map_unlock(new_entry->object.sub_map);
8160		} else {
8161			vm_object_reference(new_entry->object.vm_object);
8162		}
8163		/* insert the new entry in the map */
8164		vm_map_store_entry_link(map, where, new_entry);
8165		/* continue inserting the "copy entries" after the new entry */
8166		where = new_entry;
8167	}
8168}
8169
8170/*
8171 *	Routine:	vm_map_copyout
8172 *
8173 *	Description:
8174 *		Copy out a copy chain ("copy") into newly-allocated
8175 *		space in the destination map.
8176 *
8177 *		If successful, consumes the copy object.
8178 *		Otherwise, the caller is responsible for it.
8179 */
8180
8181kern_return_t
8182vm_map_copyout(
8183	vm_map_t		dst_map,
8184	vm_map_address_t	*dst_addr,	/* OUT */
8185	vm_map_copy_t		copy)
8186{
8187	return vm_map_copyout_internal(dst_map, dst_addr, copy,
8188				       TRUE, /* consume_on_success */
8189				       VM_PROT_DEFAULT,
8190				       VM_PROT_ALL,
8191				       VM_INHERIT_DEFAULT);
8192}
8193
8194kern_return_t
8195vm_map_copyout_internal(
8196	vm_map_t		dst_map,
8197	vm_map_address_t	*dst_addr,	/* OUT */
8198	vm_map_copy_t		copy,
8199	boolean_t		consume_on_success,
8200	vm_prot_t		cur_protection,
8201	vm_prot_t		max_protection,
8202	vm_inherit_t		inheritance)
8203{
8204	vm_map_size_t		size;
8205	vm_map_size_t		adjustment;
8206	vm_map_offset_t		start;
8207	vm_object_offset_t	vm_copy_start;
8208	vm_map_entry_t		last;
8209	vm_map_entry_t		entry;
8210
8211	/*
8212	 *	Check for null copy object.
8213	 */
8214
8215	if (copy == VM_MAP_COPY_NULL) {
8216		*dst_addr = 0;
8217		return(KERN_SUCCESS);
8218	}
8219
8220	/*
8221	 *	Check for special copy object, created
8222	 *	by vm_map_copyin_object.
8223	 */
8224
8225	if (copy->type == VM_MAP_COPY_OBJECT) {
8226		vm_object_t 		object = copy->cpy_object;
8227		kern_return_t 		kr;
8228		vm_object_offset_t	offset;
8229
8230		offset = vm_object_trunc_page(copy->offset);
8231		size = vm_map_round_page((copy->size +
8232					  (vm_map_size_t)(copy->offset -
8233							  offset)),
8234					 VM_MAP_PAGE_MASK(dst_map));
8235		*dst_addr = 0;
8236		kr = vm_map_enter(dst_map, dst_addr, size,
8237				  (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
8238				  object, offset, FALSE,
8239				  VM_PROT_DEFAULT, VM_PROT_ALL,
8240				  VM_INHERIT_DEFAULT);
8241		if (kr != KERN_SUCCESS)
8242			return(kr);
8243		/* Account for non-pagealigned copy object */
8244		*dst_addr += (vm_map_offset_t)(copy->offset - offset);
8245		if (consume_on_success)
8246			zfree(vm_map_copy_zone, copy);
8247		return(KERN_SUCCESS);
8248	}
8249
8250	/*
8251	 *	Check for special kernel buffer allocated
8252	 *	by new_ipc_kmsg_copyin.
8253	 */
8254
8255	if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8256		return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
8257						    copy, FALSE,
8258						    consume_on_success);
8259	}
8260
8261
8262	/*
8263	 *	Find space for the data
8264	 */
8265
8266	vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
8267					  VM_MAP_COPY_PAGE_MASK(copy));
8268	size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size,
8269				 VM_MAP_COPY_PAGE_MASK(copy))
8270		- vm_copy_start;
8271
8272
8273StartAgain: ;
8274
8275	vm_map_lock(dst_map);
8276	if( dst_map->disable_vmentry_reuse == TRUE) {
8277		VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
8278		last = entry;
8279	} else {
8280		assert(first_free_is_valid(dst_map));
8281		start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
8282		vm_map_min(dst_map) : last->vme_end;
8283		start = vm_map_round_page(start,
8284					  VM_MAP_PAGE_MASK(dst_map));
8285	}
8286
8287	while (TRUE) {
8288		vm_map_entry_t	next = last->vme_next;
8289		vm_map_offset_t	end = start + size;
8290
8291		if ((end > dst_map->max_offset) || (end < start)) {
8292			if (dst_map->wait_for_space) {
8293				if (size <= (dst_map->max_offset - dst_map->min_offset)) {
8294					assert_wait((event_t) dst_map,
8295						    THREAD_INTERRUPTIBLE);
8296					vm_map_unlock(dst_map);
8297					thread_block(THREAD_CONTINUE_NULL);
8298					goto StartAgain;
8299				}
8300			}
8301			vm_map_unlock(dst_map);
8302			return(KERN_NO_SPACE);
8303		}
8304
8305		if ((next == vm_map_to_entry(dst_map)) ||
8306		    (next->vme_start >= end))
8307			break;
8308
8309		last = next;
8310		start = last->vme_end;
8311		start = vm_map_round_page(start,
8312					  VM_MAP_PAGE_MASK(dst_map));
8313	}
8314
8315	adjustment = start - vm_copy_start;
8316	if (! consume_on_success) {
8317		/*
8318		 * We're not allowed to consume "copy", so we'll have to
8319		 * copy its map entries into the destination map below.
8320		 * No need to re-allocate map entries from the correct
8321		 * (pageable or not) zone, since we'll get new map entries
8322		 * during the transfer.
8323		 * We'll also adjust the map entries's "start" and "end"
8324		 * during the transfer, to keep "copy"'s entries consistent
8325		 * with its "offset".
8326		 */
8327		goto after_adjustments;
8328	}
8329
8330	/*
8331	 *	Since we're going to just drop the map
8332	 *	entries from the copy into the destination
8333	 *	map, they must come from the same pool.
8334	 */
8335
8336	if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
8337		/*
8338		 * Mismatches occur when dealing with the default
8339		 * pager.
8340		 */
8341		zone_t		old_zone;
8342		vm_map_entry_t	next, new;
8343
8344		/*
8345		 * Find the zone that the copies were allocated from
8346		 */
8347
8348		entry = vm_map_copy_first_entry(copy);
8349
8350		/*
8351		 * Reinitialize the copy so that vm_map_copy_entry_link
8352		 * will work.
8353		 */
8354		vm_map_store_copy_reset(copy, entry);
8355		copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
8356
8357		/*
8358		 * Copy each entry.
8359		 */
8360		while (entry != vm_map_copy_to_entry(copy)) {
8361			new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
8362			vm_map_entry_copy_full(new, entry);
8363			assert(!new->iokit_acct);
8364			if (new->is_sub_map) {
8365				/* clr address space specifics */
8366				new->use_pmap = FALSE;
8367			}
8368			vm_map_copy_entry_link(copy,
8369					       vm_map_copy_last_entry(copy),
8370					       new);
8371			next = entry->vme_next;
8372			old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
8373			zfree(old_zone, entry);
8374			entry = next;
8375		}
8376	}
8377
8378	/*
8379	 *	Adjust the addresses in the copy chain, and
8380	 *	reset the region attributes.
8381	 */
8382
8383	for (entry = vm_map_copy_first_entry(copy);
8384	     entry != vm_map_copy_to_entry(copy);
8385	     entry = entry->vme_next) {
8386		if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
8387			/*
8388			 * We're injecting this copy entry into a map that
8389			 * has the standard page alignment, so clear
8390			 * "map_aligned" (which might have been inherited
8391			 * from the original map entry).
8392			 */
8393			entry->map_aligned = FALSE;
8394		}
8395
8396		entry->vme_start += adjustment;
8397		entry->vme_end += adjustment;
8398
8399		if (entry->map_aligned) {
8400			assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
8401						   VM_MAP_PAGE_MASK(dst_map)));
8402			assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
8403						   VM_MAP_PAGE_MASK(dst_map)));
8404		}
8405
8406		entry->inheritance = VM_INHERIT_DEFAULT;
8407		entry->protection = VM_PROT_DEFAULT;
8408		entry->max_protection = VM_PROT_ALL;
8409		entry->behavior = VM_BEHAVIOR_DEFAULT;
8410
8411		/*
8412		 * If the entry is now wired,
8413		 * map the pages into the destination map.
8414		 */
8415		if (entry->wired_count != 0) {
8416			register vm_map_offset_t va;
8417			vm_object_offset_t	 offset;
8418			register vm_object_t object;
8419			vm_prot_t prot;
8420			int	type_of_fault;
8421
8422			object = entry->object.vm_object;
8423			offset = entry->offset;
8424			va = entry->vme_start;
8425
8426			pmap_pageable(dst_map->pmap,
8427				      entry->vme_start,
8428				      entry->vme_end,
8429				      TRUE);
8430
8431			while (va < entry->vme_end) {
8432				register vm_page_t	m;
8433
8434				/*
8435				 * Look up the page in the object.
8436				 * Assert that the page will be found in the
8437				 * top object:
8438				 * either
8439				 *	the object was newly created by
8440				 *	vm_object_copy_slowly, and has
8441				 *	copies of all of the pages from
8442				 *	the source object
8443				 * or
8444				 *	the object was moved from the old
8445				 *	map entry; because the old map
8446				 *	entry was wired, all of the pages
8447				 *	were in the top-level object.
8448				 *	(XXX not true if we wire pages for
8449				 *	 reading)
8450				 */
8451				vm_object_lock(object);
8452
8453				m = vm_page_lookup(object, offset);
8454				if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
8455				    m->absent)
8456					panic("vm_map_copyout: wiring %p", m);
8457
8458				/*
8459				 * ENCRYPTED SWAP:
8460				 * The page is assumed to be wired here, so it
8461				 * shouldn't be encrypted.  Otherwise, we
8462				 * couldn't enter it in the page table, since
8463				 * we don't want the user to see the encrypted
8464				 * data.
8465				 */
8466				ASSERT_PAGE_DECRYPTED(m);
8467
8468				prot = entry->protection;
8469
8470				if (override_nx(dst_map, entry->alias) && prot)
8471				        prot |= VM_PROT_EXECUTE;
8472
8473				type_of_fault = DBG_CACHE_HIT_FAULT;
8474
8475				vm_fault_enter(m, dst_map->pmap, va, prot, prot,
8476					       VM_PAGE_WIRED(m), FALSE, FALSE,
8477					       FALSE, entry->alias,
8478					       ((entry->iokit_acct ||
8479						 (!entry->is_sub_map &&
8480						  !entry->use_pmap))
8481						? PMAP_OPTIONS_ALT_ACCT
8482						: 0),
8483					       NULL, &type_of_fault);
8484
8485				vm_object_unlock(object);
8486
8487				offset += PAGE_SIZE_64;
8488				va += PAGE_SIZE;
8489			}
8490		}
8491	}
8492
8493after_adjustments:
8494
8495	/*
8496	 *	Correct the page alignment for the result
8497	 */
8498
8499	*dst_addr = start + (copy->offset - vm_copy_start);
8500
8501	/*
8502	 *	Update the hints and the map size
8503	 */
8504
8505	if (consume_on_success) {
8506		SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
8507	} else {
8508		SAVE_HINT_MAP_WRITE(dst_map, last);
8509	}
8510
8511	dst_map->size += size;
8512
8513	/*
8514	 *	Link in the copy
8515	 */
8516
8517	if (consume_on_success) {
8518		vm_map_copy_insert(dst_map, last, copy);
8519	} else {
8520		vm_map_copy_remap(dst_map, last, copy, adjustment,
8521				  cur_protection, max_protection,
8522				  inheritance);
8523	}
8524
8525	vm_map_unlock(dst_map);
8526
8527	/*
8528	 * XXX	If wiring_required, call vm_map_pageable
8529	 */
8530
8531	return(KERN_SUCCESS);
8532}
8533
8534/*
8535 *	Routine:	vm_map_copyin
8536 *
8537 *	Description:
8538 *		see vm_map_copyin_common.  Exported via Unsupported.exports.
8539 *
8540 */
8541
8542#undef vm_map_copyin
8543
8544kern_return_t
8545vm_map_copyin(
8546	vm_map_t			src_map,
8547	vm_map_address_t	src_addr,
8548	vm_map_size_t		len,
8549	boolean_t			src_destroy,
8550	vm_map_copy_t		*copy_result)	/* OUT */
8551{
8552	return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
8553					FALSE, copy_result, FALSE));
8554}
8555
8556/*
8557 *	Routine:	vm_map_copyin_common
8558 *
8559 *	Description:
8560 *		Copy the specified region (src_addr, len) from the
8561 *		source address space (src_map), possibly removing
8562 *		the region from the source address space (src_destroy).
8563 *
8564 *	Returns:
8565 *		A vm_map_copy_t object (copy_result), suitable for
8566 *		insertion into another address space (using vm_map_copyout),
8567 *		copying over another address space region (using
8568 *		vm_map_copy_overwrite).  If the copy is unused, it
8569 *		should be destroyed (using vm_map_copy_discard).
8570 *
8571 *	In/out conditions:
8572 *		The source map should not be locked on entry.
8573 */
8574
8575typedef struct submap_map {
8576	vm_map_t	parent_map;
8577	vm_map_offset_t	base_start;
8578	vm_map_offset_t	base_end;
8579	vm_map_size_t	base_len;
8580	struct submap_map *next;
8581} submap_map_t;
8582
8583kern_return_t
8584vm_map_copyin_common(
8585	vm_map_t	src_map,
8586	vm_map_address_t src_addr,
8587	vm_map_size_t	len,
8588	boolean_t	src_destroy,
8589	__unused boolean_t	src_volatile,
8590	vm_map_copy_t	*copy_result,	/* OUT */
8591	boolean_t	use_maxprot)
8592{
8593	vm_map_entry_t	tmp_entry;	/* Result of last map lookup --
8594					 * in multi-level lookup, this
8595					 * entry contains the actual
8596					 * vm_object/offset.
8597					 */
8598	register
8599	vm_map_entry_t	new_entry = VM_MAP_ENTRY_NULL;	/* Map entry for copy */
8600
8601	vm_map_offset_t	src_start;	/* Start of current entry --
8602					 * where copy is taking place now
8603					 */
8604	vm_map_offset_t	src_end;	/* End of entire region to be
8605					 * copied */
8606	vm_map_offset_t src_base;
8607	vm_map_t	base_map = src_map;
8608	boolean_t	map_share=FALSE;
8609	submap_map_t	*parent_maps = NULL;
8610
8611	register
8612	vm_map_copy_t	copy;		/* Resulting copy */
8613	vm_map_address_t copy_addr;
8614	vm_map_size_t	copy_size;
8615
8616	/*
8617	 *	Check for copies of zero bytes.
8618	 */
8619
8620	if (len == 0) {
8621		*copy_result = VM_MAP_COPY_NULL;
8622		return(KERN_SUCCESS);
8623	}
8624
8625	/*
8626	 *	Check that the end address doesn't overflow
8627	 */
8628	src_end = src_addr + len;
8629	if (src_end < src_addr)
8630		return KERN_INVALID_ADDRESS;
8631
8632	/*
8633	 * If the copy is sufficiently small, use a kernel buffer instead
8634	 * of making a virtual copy.  The theory being that the cost of
8635	 * setting up VM (and taking C-O-W faults) dominates the copy costs
8636	 * for small regions.
8637	 */
8638	if ((len < msg_ool_size_small) && !use_maxprot)
8639		return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
8640						   src_destroy, copy_result);
8641
8642	/*
8643	 *	Compute (page aligned) start and end of region
8644	 */
8645	src_start = vm_map_trunc_page(src_addr,
8646				      VM_MAP_PAGE_MASK(src_map));
8647	src_end = vm_map_round_page(src_end,
8648				    VM_MAP_PAGE_MASK(src_map));
8649
8650	XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
8651
8652	/*
8653	 *	Allocate a header element for the list.
8654	 *
8655	 *	Use the start and end in the header to
8656	 *	remember the endpoints prior to rounding.
8657	 */
8658
8659	copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8660	vm_map_copy_first_entry(copy) =
8661		vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
8662	copy->type = VM_MAP_COPY_ENTRY_LIST;
8663	copy->cpy_hdr.nentries = 0;
8664	copy->cpy_hdr.entries_pageable = TRUE;
8665#if 00
8666	copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
8667#else
8668	/*
8669	 * The copy entries can be broken down for a variety of reasons,
8670	 * so we can't guarantee that they will remain map-aligned...
8671	 * Will need to adjust the first copy_entry's "vme_start" and
8672	 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
8673	 * rather than the original map's alignment.
8674	 */
8675	copy->cpy_hdr.page_shift = PAGE_SHIFT;
8676#endif
8677
8678	vm_map_store_init( &(copy->cpy_hdr) );
8679
8680	copy->offset = src_addr;
8681	copy->size = len;
8682
8683	new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
8684
8685#define	RETURN(x)						\
8686	MACRO_BEGIN						\
8687	vm_map_unlock(src_map);					\
8688	if(src_map != base_map)					\
8689		vm_map_deallocate(src_map);			\
8690	if (new_entry != VM_MAP_ENTRY_NULL)			\
8691		vm_map_copy_entry_dispose(copy,new_entry);	\
8692	vm_map_copy_discard(copy);				\
8693	{							\
8694		submap_map_t	*_ptr;				\
8695								\
8696		for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
8697			parent_maps=parent_maps->next;		\
8698			if (_ptr->parent_map != base_map)	\
8699				vm_map_deallocate(_ptr->parent_map);	\
8700			kfree(_ptr, sizeof(submap_map_t));	\
8701		}						\
8702	}							\
8703	MACRO_RETURN(x);					\
8704	MACRO_END
8705
8706	/*
8707	 *	Find the beginning of the region.
8708	 */
8709
8710 	vm_map_lock(src_map);
8711
8712	/*
8713	 * Lookup the original "src_addr" rather than the truncated
8714	 * "src_start", in case "src_start" falls in a non-map-aligned
8715	 * map entry *before* the map entry that contains "src_addr"...
8716	 */
8717	if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry))
8718		RETURN(KERN_INVALID_ADDRESS);
8719	if(!tmp_entry->is_sub_map) {
8720		/*
8721		 * ... but clip to the map-rounded "src_start" rather than
8722		 * "src_addr" to preserve map-alignment.  We'll adjust the
8723		 * first copy entry at the end, if needed.
8724		 */
8725		vm_map_clip_start(src_map, tmp_entry, src_start);
8726	}
8727	if (src_start < tmp_entry->vme_start) {
8728		/*
8729		 * Move "src_start" up to the start of the
8730		 * first map entry to copy.
8731		 */
8732		src_start = tmp_entry->vme_start;
8733	}
8734	/* set for later submap fix-up */
8735	copy_addr = src_start;
8736
8737	/*
8738	 *	Go through entries until we get to the end.
8739	 */
8740
8741	while (TRUE) {
8742		register
8743		vm_map_entry_t	src_entry = tmp_entry;	/* Top-level entry */
8744		vm_map_size_t	src_size;		/* Size of source
8745							 * map entry (in both
8746							 * maps)
8747							 */
8748
8749		register
8750		vm_object_t		src_object;	/* Object to copy */
8751		vm_object_offset_t	src_offset;
8752
8753		boolean_t	src_needs_copy;		/* Should source map
8754							 * be made read-only
8755							 * for copy-on-write?
8756							 */
8757
8758		boolean_t	new_entry_needs_copy;	/* Will new entry be COW? */
8759
8760		boolean_t	was_wired;		/* Was source wired? */
8761		vm_map_version_t version;		/* Version before locks
8762							 * dropped to make copy
8763							 */
8764		kern_return_t	result;			/* Return value from
8765							 * copy_strategically.
8766							 */
8767		while(tmp_entry->is_sub_map) {
8768			vm_map_size_t submap_len;
8769			submap_map_t *ptr;
8770
8771			ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
8772			ptr->next = parent_maps;
8773			parent_maps = ptr;
8774			ptr->parent_map = src_map;
8775			ptr->base_start = src_start;
8776			ptr->base_end = src_end;
8777			submap_len = tmp_entry->vme_end - src_start;
8778			if(submap_len > (src_end-src_start))
8779				submap_len = src_end-src_start;
8780			ptr->base_len = submap_len;
8781
8782			src_start -= tmp_entry->vme_start;
8783			src_start += tmp_entry->offset;
8784			src_end = src_start + submap_len;
8785			src_map = tmp_entry->object.sub_map;
8786			vm_map_lock(src_map);
8787			/* keep an outstanding reference for all maps in */
8788			/* the parents tree except the base map */
8789			vm_map_reference(src_map);
8790			vm_map_unlock(ptr->parent_map);
8791			if (!vm_map_lookup_entry(
8792				    src_map, src_start, &tmp_entry))
8793				RETURN(KERN_INVALID_ADDRESS);
8794			map_share = TRUE;
8795			if(!tmp_entry->is_sub_map)
8796				vm_map_clip_start(src_map, tmp_entry, src_start);
8797			src_entry = tmp_entry;
8798		}
8799		/* we are now in the lowest level submap... */
8800
8801		if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
8802		    (tmp_entry->object.vm_object->phys_contiguous)) {
8803			/* This is not, supported for now.In future */
8804			/* we will need to detect the phys_contig   */
8805			/* condition and then upgrade copy_slowly   */
8806			/* to do physical copy from the device mem  */
8807			/* based object. We can piggy-back off of   */
8808			/* the was wired boolean to set-up the      */
8809			/* proper handling */
8810			RETURN(KERN_PROTECTION_FAILURE);
8811		}
8812		/*
8813		 *	Create a new address map entry to hold the result.
8814		 *	Fill in the fields from the appropriate source entries.
8815		 *	We must unlock the source map to do this if we need
8816		 *	to allocate a map entry.
8817		 */
8818		if (new_entry == VM_MAP_ENTRY_NULL) {
8819			version.main_timestamp = src_map->timestamp;
8820			vm_map_unlock(src_map);
8821
8822			new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
8823
8824			vm_map_lock(src_map);
8825			if ((version.main_timestamp + 1) != src_map->timestamp) {
8826				if (!vm_map_lookup_entry(src_map, src_start,
8827							 &tmp_entry)) {
8828					RETURN(KERN_INVALID_ADDRESS);
8829				}
8830				if (!tmp_entry->is_sub_map)
8831					vm_map_clip_start(src_map, tmp_entry, src_start);
8832				continue; /* restart w/ new tmp_entry */
8833			}
8834		}
8835
8836		/*
8837		 *	Verify that the region can be read.
8838		 */
8839		if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
8840		     !use_maxprot) ||
8841		    (src_entry->max_protection & VM_PROT_READ) == 0)
8842			RETURN(KERN_PROTECTION_FAILURE);
8843
8844		/*
8845		 *	Clip against the endpoints of the entire region.
8846		 */
8847
8848		vm_map_clip_end(src_map, src_entry, src_end);
8849
8850		src_size = src_entry->vme_end - src_start;
8851		src_object = src_entry->object.vm_object;
8852		src_offset = src_entry->offset;
8853		was_wired = (src_entry->wired_count != 0);
8854
8855		vm_map_entry_copy(new_entry, src_entry);
8856		if (new_entry->is_sub_map) {
8857			/* clr address space specifics */
8858			new_entry->use_pmap = FALSE;
8859		}
8860
8861		/*
8862		 *	Attempt non-blocking copy-on-write optimizations.
8863		 */
8864
8865		if (src_destroy &&
8866		    (src_object == VM_OBJECT_NULL ||
8867		     (src_object->internal && !src_object->true_share
8868		      && !map_share))) {
8869			/*
8870			 * If we are destroying the source, and the object
8871			 * is internal, we can move the object reference
8872			 * from the source to the copy.  The copy is
8873			 * copy-on-write only if the source is.
8874			 * We make another reference to the object, because
8875			 * destroying the source entry will deallocate it.
8876			 */
8877			vm_object_reference(src_object);
8878
8879			/*
8880			 * Copy is always unwired.  vm_map_copy_entry
8881			 * set its wired count to zero.
8882			 */
8883
8884			goto CopySuccessful;
8885		}
8886
8887
8888	RestartCopy:
8889		XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
8890		    src_object, new_entry, new_entry->object.vm_object,
8891		    was_wired, 0);
8892		if ((src_object == VM_OBJECT_NULL ||
8893		     (!was_wired && !map_share && !tmp_entry->is_shared)) &&
8894		    vm_object_copy_quickly(
8895			    &new_entry->object.vm_object,
8896			    src_offset,
8897			    src_size,
8898			    &src_needs_copy,
8899			    &new_entry_needs_copy)) {
8900
8901			new_entry->needs_copy = new_entry_needs_copy;
8902
8903			/*
8904			 *	Handle copy-on-write obligations
8905			 */
8906
8907			if (src_needs_copy && !tmp_entry->needs_copy) {
8908			        vm_prot_t prot;
8909
8910				prot = src_entry->protection & ~VM_PROT_WRITE;
8911
8912				if (override_nx(src_map, src_entry->alias) && prot)
8913				        prot |= VM_PROT_EXECUTE;
8914
8915				vm_object_pmap_protect(
8916					src_object,
8917					src_offset,
8918					src_size,
8919			      		(src_entry->is_shared ?
8920					 PMAP_NULL
8921					 : src_map->pmap),
8922					src_entry->vme_start,
8923					prot);
8924
8925				tmp_entry->needs_copy = TRUE;
8926			}
8927
8928			/*
8929			 *	The map has never been unlocked, so it's safe
8930			 *	to move to the next entry rather than doing
8931			 *	another lookup.
8932			 */
8933
8934			goto CopySuccessful;
8935		}
8936
8937		/*
8938		 *	Take an object reference, so that we may
8939		 *	release the map lock(s).
8940		 */
8941
8942		assert(src_object != VM_OBJECT_NULL);
8943		vm_object_reference(src_object);
8944
8945		/*
8946		 *	Record the timestamp for later verification.
8947		 *	Unlock the map.
8948		 */
8949
8950		version.main_timestamp = src_map->timestamp;
8951		vm_map_unlock(src_map);	/* Increments timestamp once! */
8952
8953		/*
8954		 *	Perform the copy
8955		 */
8956
8957		if (was_wired) {
8958		CopySlowly:
8959			vm_object_lock(src_object);
8960			result = vm_object_copy_slowly(
8961				src_object,
8962				src_offset,
8963				src_size,
8964				THREAD_UNINT,
8965				&new_entry->object.vm_object);
8966			new_entry->offset = 0;
8967			new_entry->needs_copy = FALSE;
8968
8969		}
8970		else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
8971			 (tmp_entry->is_shared  || map_share)) {
8972		  	vm_object_t new_object;
8973
8974			vm_object_lock_shared(src_object);
8975			new_object = vm_object_copy_delayed(
8976				src_object,
8977				src_offset,
8978				src_size,
8979				TRUE);
8980			if (new_object == VM_OBJECT_NULL)
8981			  	goto CopySlowly;
8982
8983			new_entry->object.vm_object = new_object;
8984			new_entry->needs_copy = TRUE;
8985			assert(!new_entry->iokit_acct);
8986			assert(new_object->purgable == VM_PURGABLE_DENY);
8987			new_entry->use_pmap = TRUE;
8988			result = KERN_SUCCESS;
8989
8990		} else {
8991			result = vm_object_copy_strategically(src_object,
8992							      src_offset,
8993							      src_size,
8994							      &new_entry->object.vm_object,
8995							      &new_entry->offset,
8996							      &new_entry_needs_copy);
8997
8998			new_entry->needs_copy = new_entry_needs_copy;
8999		}
9000
9001		if (result != KERN_SUCCESS &&
9002		    result != KERN_MEMORY_RESTART_COPY) {
9003			vm_map_lock(src_map);
9004			RETURN(result);
9005		}
9006
9007		/*
9008		 *	Throw away the extra reference
9009		 */
9010
9011		vm_object_deallocate(src_object);
9012
9013		/*
9014		 *	Verify that the map has not substantially
9015		 *	changed while the copy was being made.
9016		 */
9017
9018		vm_map_lock(src_map);
9019
9020		if ((version.main_timestamp + 1) == src_map->timestamp)
9021			goto VerificationSuccessful;
9022
9023		/*
9024		 *	Simple version comparison failed.
9025		 *
9026		 *	Retry the lookup and verify that the
9027		 *	same object/offset are still present.
9028		 *
9029		 *	[Note: a memory manager that colludes with
9030		 *	the calling task can detect that we have
9031		 *	cheated.  While the map was unlocked, the
9032		 *	mapping could have been changed and restored.]
9033		 */
9034
9035		if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
9036			if (result != KERN_MEMORY_RESTART_COPY) {
9037				vm_object_deallocate(new_entry->object.vm_object);
9038				new_entry->object.vm_object = VM_OBJECT_NULL;
9039				assert(!new_entry->iokit_acct);
9040				new_entry->use_pmap = TRUE;
9041			}
9042			RETURN(KERN_INVALID_ADDRESS);
9043		}
9044
9045		src_entry = tmp_entry;
9046		vm_map_clip_start(src_map, src_entry, src_start);
9047
9048		if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
9049		     !use_maxprot) ||
9050		    ((src_entry->max_protection & VM_PROT_READ) == 0))
9051			goto VerificationFailed;
9052
9053		if (src_entry->vme_end < new_entry->vme_end) {
9054			assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
9055						   VM_MAP_COPY_PAGE_MASK(copy)));
9056			new_entry->vme_end = src_entry->vme_end;
9057			src_size = new_entry->vme_end - src_start;
9058		}
9059
9060		if ((src_entry->object.vm_object != src_object) ||
9061		    (src_entry->offset != src_offset) ) {
9062
9063			/*
9064			 *	Verification failed.
9065			 *
9066			 *	Start over with this top-level entry.
9067			 */
9068
9069		VerificationFailed: ;
9070
9071			vm_object_deallocate(new_entry->object.vm_object);
9072			tmp_entry = src_entry;
9073			continue;
9074		}
9075
9076		/*
9077		 *	Verification succeeded.
9078		 */
9079
9080	VerificationSuccessful: ;
9081
9082		if (result == KERN_MEMORY_RESTART_COPY)
9083			goto RestartCopy;
9084
9085		/*
9086		 *	Copy succeeded.
9087		 */
9088
9089	CopySuccessful: ;
9090
9091		/*
9092		 *	Link in the new copy entry.
9093		 */
9094
9095		vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
9096				       new_entry);
9097
9098		/*
9099		 *	Determine whether the entire region
9100		 *	has been copied.
9101		 */
9102		src_base = src_start;
9103		src_start = new_entry->vme_end;
9104		new_entry = VM_MAP_ENTRY_NULL;
9105		while ((src_start >= src_end) && (src_end != 0)) {
9106			submap_map_t	*ptr;
9107
9108			if (src_map == base_map) {
9109				/* back to the top */
9110				break;
9111			}
9112
9113			ptr = parent_maps;
9114			assert(ptr != NULL);
9115			parent_maps = parent_maps->next;
9116
9117			/* fix up the damage we did in that submap */
9118			vm_map_simplify_range(src_map,
9119					      src_base,
9120					      src_end);
9121
9122			vm_map_unlock(src_map);
9123			vm_map_deallocate(src_map);
9124			vm_map_lock(ptr->parent_map);
9125			src_map = ptr->parent_map;
9126			src_base = ptr->base_start;
9127			src_start = ptr->base_start + ptr->base_len;
9128			src_end = ptr->base_end;
9129			if (!vm_map_lookup_entry(src_map,
9130						 src_start,
9131						 &tmp_entry) &&
9132			    (src_end > src_start)) {
9133				RETURN(KERN_INVALID_ADDRESS);
9134			}
9135			kfree(ptr, sizeof(submap_map_t));
9136			if (parent_maps == NULL)
9137				map_share = FALSE;
9138			src_entry = tmp_entry->vme_prev;
9139		}
9140
9141		if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
9142		    (src_start >= src_addr + len) &&
9143		    (src_addr + len != 0)) {
9144			/*
9145			 * Stop copying now, even though we haven't reached
9146			 * "src_end".  We'll adjust the end of the last copy
9147			 * entry at the end, if needed.
9148			 *
9149			 * If src_map's aligment is different from the
9150			 * system's page-alignment, there could be
9151			 * extra non-map-aligned map entries between
9152			 * the original (non-rounded) "src_addr + len"
9153			 * and the rounded "src_end".
9154			 * We do not want to copy those map entries since
9155			 * they're not part of the copied range.
9156			 */
9157			break;
9158		}
9159
9160		if ((src_start >= src_end) && (src_end != 0))
9161			break;
9162
9163		/*
9164		 *	Verify that there are no gaps in the region
9165		 */
9166
9167		tmp_entry = src_entry->vme_next;
9168		if ((tmp_entry->vme_start != src_start) ||
9169		    (tmp_entry == vm_map_to_entry(src_map))) {
9170			RETURN(KERN_INVALID_ADDRESS);
9171		}
9172	}
9173
9174	/*
9175	 * If the source should be destroyed, do it now, since the
9176	 * copy was successful.
9177	 */
9178	if (src_destroy) {
9179		(void) vm_map_delete(
9180			src_map,
9181			vm_map_trunc_page(src_addr,
9182					  VM_MAP_PAGE_MASK(src_map)),
9183			src_end,
9184			((src_map == kernel_map) ?
9185			 VM_MAP_REMOVE_KUNWIRE :
9186			 VM_MAP_NO_FLAGS),
9187			VM_MAP_NULL);
9188	} else {
9189		/* fix up the damage we did in the base map */
9190		vm_map_simplify_range(
9191			src_map,
9192			vm_map_trunc_page(src_addr,
9193					  VM_MAP_PAGE_MASK(src_map)),
9194			vm_map_round_page(src_end,
9195					  VM_MAP_PAGE_MASK(src_map)));
9196	}
9197
9198	vm_map_unlock(src_map);
9199
9200	if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
9201		vm_map_offset_t original_start, original_offset, original_end;
9202
9203		assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
9204
9205		/* adjust alignment of first copy_entry's "vme_start" */
9206		tmp_entry = vm_map_copy_first_entry(copy);
9207		if (tmp_entry != vm_map_copy_to_entry(copy)) {
9208			vm_map_offset_t adjustment;
9209
9210			original_start = tmp_entry->vme_start;
9211			original_offset = tmp_entry->offset;
9212
9213			/* map-align the start of the first copy entry... */
9214			adjustment = (tmp_entry->vme_start -
9215				      vm_map_trunc_page(
9216					      tmp_entry->vme_start,
9217					      VM_MAP_PAGE_MASK(src_map)));
9218			tmp_entry->vme_start -= adjustment;
9219			tmp_entry->offset -= adjustment;
9220			copy_addr -= adjustment;
9221			assert(tmp_entry->vme_start < tmp_entry->vme_end);
9222			/* ... adjust for mis-aligned start of copy range */
9223			adjustment =
9224				(vm_map_trunc_page(copy->offset,
9225						   PAGE_MASK) -
9226				 vm_map_trunc_page(copy->offset,
9227						   VM_MAP_PAGE_MASK(src_map)));
9228			if (adjustment) {
9229				assert(page_aligned(adjustment));
9230				assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
9231				tmp_entry->vme_start += adjustment;
9232				tmp_entry->offset += adjustment;
9233				copy_addr += adjustment;
9234				assert(tmp_entry->vme_start < tmp_entry->vme_end);
9235			}
9236
9237			/*
9238			 * Assert that the adjustments haven't exposed
9239			 * more than was originally copied...
9240			 */
9241			assert(tmp_entry->vme_start >= original_start);
9242			assert(tmp_entry->offset >= original_offset);
9243			/*
9244			 * ... and that it did not adjust outside of a
9245			 * a single 16K page.
9246			 */
9247			assert(vm_map_trunc_page(tmp_entry->vme_start,
9248						 VM_MAP_PAGE_MASK(src_map)) ==
9249			       vm_map_trunc_page(original_start,
9250						 VM_MAP_PAGE_MASK(src_map)));
9251		}
9252
9253		/* adjust alignment of last copy_entry's "vme_end" */
9254		tmp_entry = vm_map_copy_last_entry(copy);
9255		if (tmp_entry != vm_map_copy_to_entry(copy)) {
9256			vm_map_offset_t adjustment;
9257
9258			original_end = tmp_entry->vme_end;
9259
9260			/* map-align the end of the last copy entry... */
9261			tmp_entry->vme_end =
9262				vm_map_round_page(tmp_entry->vme_end,
9263						  VM_MAP_PAGE_MASK(src_map));
9264			/* ... adjust for mis-aligned end of copy range */
9265			adjustment =
9266				(vm_map_round_page((copy->offset +
9267						    copy->size),
9268						   VM_MAP_PAGE_MASK(src_map)) -
9269				 vm_map_round_page((copy->offset +
9270						    copy->size),
9271						   PAGE_MASK));
9272			if (adjustment) {
9273				assert(page_aligned(adjustment));
9274				assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
9275				tmp_entry->vme_end -= adjustment;
9276				assert(tmp_entry->vme_start < tmp_entry->vme_end);
9277			}
9278
9279			/*
9280			 * Assert that the adjustments haven't exposed
9281			 * more than was originally copied...
9282			 */
9283			assert(tmp_entry->vme_end <= original_end);
9284			/*
9285			 * ... and that it did not adjust outside of a
9286			 * a single 16K page.
9287			 */
9288			assert(vm_map_round_page(tmp_entry->vme_end,
9289						 VM_MAP_PAGE_MASK(src_map)) ==
9290			       vm_map_round_page(original_end,
9291						 VM_MAP_PAGE_MASK(src_map)));
9292		}
9293	}
9294
9295	/* Fix-up start and end points in copy.  This is necessary */
9296	/* when the various entries in the copy object were picked */
9297	/* up from different sub-maps */
9298
9299	tmp_entry = vm_map_copy_first_entry(copy);
9300	copy_size = 0; /* compute actual size */
9301	while (tmp_entry != vm_map_copy_to_entry(copy)) {
9302		assert(VM_MAP_PAGE_ALIGNED(
9303			       copy_addr + (tmp_entry->vme_end -
9304					    tmp_entry->vme_start),
9305			       VM_MAP_COPY_PAGE_MASK(copy)));
9306		assert(VM_MAP_PAGE_ALIGNED(
9307			       copy_addr,
9308			       VM_MAP_COPY_PAGE_MASK(copy)));
9309
9310		/*
9311		 * The copy_entries will be injected directly into the
9312		 * destination map and might not be "map aligned" there...
9313		 */
9314		tmp_entry->map_aligned = FALSE;
9315
9316		tmp_entry->vme_end = copy_addr +
9317			(tmp_entry->vme_end - tmp_entry->vme_start);
9318		tmp_entry->vme_start = copy_addr;
9319		assert(tmp_entry->vme_start < tmp_entry->vme_end);
9320		copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
9321		copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
9322		tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
9323	}
9324
9325	if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
9326	    copy_size < copy->size) {
9327		/*
9328		 * The actual size of the VM map copy is smaller than what
9329		 * was requested by the caller.  This must be because some
9330		 * PAGE_SIZE-sized pages are missing at the end of the last
9331		 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
9332		 * The caller might not have been aware of those missing
9333		 * pages and might not want to be aware of it, which is
9334		 * fine as long as they don't try to access (and crash on)
9335		 * those missing pages.
9336		 * Let's adjust the size of the "copy", to avoid failing
9337		 * in vm_map_copyout() or vm_map_copy_overwrite().
9338		 */
9339		assert(vm_map_round_page(copy_size,
9340					 VM_MAP_PAGE_MASK(src_map)) ==
9341		       vm_map_round_page(copy->size,
9342					 VM_MAP_PAGE_MASK(src_map)));
9343		copy->size = copy_size;
9344	}
9345
9346	*copy_result = copy;
9347	return(KERN_SUCCESS);
9348
9349#undef	RETURN
9350}
9351
9352kern_return_t
9353vm_map_copy_extract(
9354	vm_map_t		src_map,
9355	vm_map_address_t	src_addr,
9356	vm_map_size_t		len,
9357	vm_map_copy_t		*copy_result,	/* OUT */
9358	vm_prot_t		*cur_prot,	/* OUT */
9359	vm_prot_t		*max_prot)
9360{
9361	vm_map_offset_t	src_start, src_end;
9362	vm_map_copy_t	copy;
9363	kern_return_t	kr;
9364
9365	/*
9366	 *	Check for copies of zero bytes.
9367	 */
9368
9369	if (len == 0) {
9370		*copy_result = VM_MAP_COPY_NULL;
9371		return(KERN_SUCCESS);
9372	}
9373
9374	/*
9375	 *	Check that the end address doesn't overflow
9376	 */
9377	src_end = src_addr + len;
9378	if (src_end < src_addr)
9379		return KERN_INVALID_ADDRESS;
9380
9381	/*
9382	 *	Compute (page aligned) start and end of region
9383	 */
9384	src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
9385	src_end = vm_map_round_page(src_end, PAGE_MASK);
9386
9387	/*
9388	 *	Allocate a header element for the list.
9389	 *
9390	 *	Use the start and end in the header to
9391	 *	remember the endpoints prior to rounding.
9392	 */
9393
9394	copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
9395	vm_map_copy_first_entry(copy) =
9396		vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
9397	copy->type = VM_MAP_COPY_ENTRY_LIST;
9398	copy->cpy_hdr.nentries = 0;
9399	copy->cpy_hdr.entries_pageable = TRUE;
9400
9401	vm_map_store_init(&copy->cpy_hdr);
9402
9403	copy->offset = 0;
9404	copy->size = len;
9405
9406	kr = vm_map_remap_extract(src_map,
9407				  src_addr,
9408				  len,
9409				  FALSE, /* copy */
9410				  &copy->cpy_hdr,
9411				  cur_prot,
9412				  max_prot,
9413				  VM_INHERIT_SHARE,
9414				  TRUE); /* pageable */
9415	if (kr != KERN_SUCCESS) {
9416		vm_map_copy_discard(copy);
9417		return kr;
9418	}
9419
9420	*copy_result = copy;
9421	return KERN_SUCCESS;
9422}
9423
9424/*
9425 *	vm_map_copyin_object:
9426 *
9427 *	Create a copy object from an object.
9428 *	Our caller donates an object reference.
9429 */
9430
9431kern_return_t
9432vm_map_copyin_object(
9433	vm_object_t		object,
9434	vm_object_offset_t	offset,	/* offset of region in object */
9435	vm_object_size_t	size,	/* size of region in object */
9436	vm_map_copy_t	*copy_result)	/* OUT */
9437{
9438	vm_map_copy_t	copy;		/* Resulting copy */
9439
9440	/*
9441	 *	We drop the object into a special copy object
9442	 *	that contains the object directly.
9443	 */
9444
9445	copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
9446	copy->type = VM_MAP_COPY_OBJECT;
9447	copy->cpy_object = object;
9448	copy->offset = offset;
9449	copy->size = size;
9450
9451	*copy_result = copy;
9452	return(KERN_SUCCESS);
9453}
9454
9455static void
9456vm_map_fork_share(
9457	vm_map_t	old_map,
9458	vm_map_entry_t	old_entry,
9459	vm_map_t	new_map)
9460{
9461	vm_object_t 	object;
9462	vm_map_entry_t 	new_entry;
9463
9464	/*
9465	 *	New sharing code.  New map entry
9466	 *	references original object.  Internal
9467	 *	objects use asynchronous copy algorithm for
9468	 *	future copies.  First make sure we have
9469	 *	the right object.  If we need a shadow,
9470	 *	or someone else already has one, then
9471	 *	make a new shadow and share it.
9472	 */
9473
9474	object = old_entry->object.vm_object;
9475	if (old_entry->is_sub_map) {
9476		assert(old_entry->wired_count == 0);
9477#ifndef NO_NESTED_PMAP
9478		if(old_entry->use_pmap) {
9479			kern_return_t	result;
9480
9481			result = pmap_nest(new_map->pmap,
9482					   (old_entry->object.sub_map)->pmap,
9483					   (addr64_t)old_entry->vme_start,
9484					   (addr64_t)old_entry->vme_start,
9485					   (uint64_t)(old_entry->vme_end - old_entry->vme_start));
9486			if(result)
9487				panic("vm_map_fork_share: pmap_nest failed!");
9488		}
9489#endif	/* NO_NESTED_PMAP */
9490	} else if (object == VM_OBJECT_NULL) {
9491		object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
9492							    old_entry->vme_start));
9493		old_entry->offset = 0;
9494		old_entry->object.vm_object = object;
9495		old_entry->use_pmap = TRUE;
9496		assert(!old_entry->needs_copy);
9497	} else if (object->copy_strategy !=
9498		   MEMORY_OBJECT_COPY_SYMMETRIC) {
9499
9500		/*
9501		 *	We are already using an asymmetric
9502		 *	copy, and therefore we already have
9503		 *	the right object.
9504		 */
9505
9506		assert(! old_entry->needs_copy);
9507	}
9508	else if (old_entry->needs_copy ||	/* case 1 */
9509		 object->shadowed ||		/* case 2 */
9510		 (!object->true_share && 	/* case 3 */
9511		  !old_entry->is_shared &&
9512		  (object->vo_size >
9513		   (vm_map_size_t)(old_entry->vme_end -
9514				   old_entry->vme_start)))) {
9515
9516		/*
9517		 *	We need to create a shadow.
9518		 *	There are three cases here.
9519		 *	In the first case, we need to
9520		 *	complete a deferred symmetrical
9521		 *	copy that we participated in.
9522		 *	In the second and third cases,
9523		 *	we need to create the shadow so
9524		 *	that changes that we make to the
9525		 *	object do not interfere with
9526		 *	any symmetrical copies which
9527		 *	have occured (case 2) or which
9528		 *	might occur (case 3).
9529		 *
9530		 *	The first case is when we had
9531		 *	deferred shadow object creation
9532		 *	via the entry->needs_copy mechanism.
9533		 *	This mechanism only works when
9534		 *	only one entry points to the source
9535		 *	object, and we are about to create
9536		 *	a second entry pointing to the
9537		 *	same object. The problem is that
9538		 *	there is no way of mapping from
9539		 *	an object to the entries pointing
9540		 *	to it. (Deferred shadow creation
9541		 *	works with one entry because occurs
9542		 *	at fault time, and we walk from the
9543		 *	entry to the object when handling
9544		 *	the fault.)
9545		 *
9546		 *	The second case is when the object
9547		 *	to be shared has already been copied
9548		 *	with a symmetric copy, but we point
9549		 *	directly to the object without
9550		 *	needs_copy set in our entry. (This
9551		 *	can happen because different ranges
9552		 *	of an object can be pointed to by
9553		 *	different entries. In particular,
9554		 *	a single entry pointing to an object
9555		 *	can be split by a call to vm_inherit,
9556		 *	which, combined with task_create, can
9557		 *	result in the different entries
9558		 *	having different needs_copy values.)
9559		 *	The shadowed flag in the object allows
9560		 *	us to detect this case. The problem
9561		 *	with this case is that if this object
9562		 *	has or will have shadows, then we
9563		 *	must not perform an asymmetric copy
9564		 *	of this object, since such a copy
9565		 *	allows the object to be changed, which
9566		 *	will break the previous symmetrical
9567		 *	copies (which rely upon the object
9568		 *	not changing). In a sense, the shadowed
9569		 *	flag says "don't change this object".
9570		 *	We fix this by creating a shadow
9571		 *	object for this object, and sharing
9572		 *	that. This works because we are free
9573		 *	to change the shadow object (and thus
9574		 *	to use an asymmetric copy strategy);
9575		 *	this is also semantically correct,
9576		 *	since this object is temporary, and
9577		 *	therefore a copy of the object is
9578		 *	as good as the object itself. (This
9579		 *	is not true for permanent objects,
9580		 *	since the pager needs to see changes,
9581		 *	which won't happen if the changes
9582		 *	are made to a copy.)
9583		 *
9584		 *	The third case is when the object
9585		 *	to be shared has parts sticking
9586		 *	outside of the entry we're working
9587		 *	with, and thus may in the future
9588		 *	be subject to a symmetrical copy.
9589		 *	(This is a preemptive version of
9590		 *	case 2.)
9591		 */
9592		vm_object_shadow(&old_entry->object.vm_object,
9593				 &old_entry->offset,
9594				 (vm_map_size_t) (old_entry->vme_end -
9595						  old_entry->vme_start));
9596
9597		/*
9598		 *	If we're making a shadow for other than
9599		 *	copy on write reasons, then we have
9600		 *	to remove write permission.
9601		 */
9602
9603		if (!old_entry->needs_copy &&
9604		    (old_entry->protection & VM_PROT_WRITE)) {
9605		        vm_prot_t prot;
9606
9607			prot = old_entry->protection & ~VM_PROT_WRITE;
9608
9609			if (override_nx(old_map, old_entry->alias) && prot)
9610			        prot |= VM_PROT_EXECUTE;
9611
9612			if (old_map->mapped_in_other_pmaps) {
9613				vm_object_pmap_protect(
9614					old_entry->object.vm_object,
9615					old_entry->offset,
9616					(old_entry->vme_end -
9617					 old_entry->vme_start),
9618					PMAP_NULL,
9619					old_entry->vme_start,
9620					prot);
9621			} else {
9622				pmap_protect(old_map->pmap,
9623					     old_entry->vme_start,
9624					     old_entry->vme_end,
9625					     prot);
9626			}
9627		}
9628
9629		old_entry->needs_copy = FALSE;
9630		object = old_entry->object.vm_object;
9631	}
9632
9633
9634	/*
9635	 *	If object was using a symmetric copy strategy,
9636	 *	change its copy strategy to the default
9637	 *	asymmetric copy strategy, which is copy_delay
9638	 *	in the non-norma case and copy_call in the
9639	 *	norma case. Bump the reference count for the
9640	 *	new entry.
9641	 */
9642
9643	if(old_entry->is_sub_map) {
9644		vm_map_lock(old_entry->object.sub_map);
9645		vm_map_reference(old_entry->object.sub_map);
9646		vm_map_unlock(old_entry->object.sub_map);
9647	} else {
9648		vm_object_lock(object);
9649		vm_object_reference_locked(object);
9650		if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
9651			object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
9652		}
9653		vm_object_unlock(object);
9654	}
9655
9656	/*
9657	 *	Clone the entry, using object ref from above.
9658	 *	Mark both entries as shared.
9659	 */
9660
9661	new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
9662							  * map or descendants */
9663	vm_map_entry_copy(new_entry, old_entry);
9664	old_entry->is_shared = TRUE;
9665	new_entry->is_shared = TRUE;
9666
9667	/*
9668	 *	Insert the entry into the new map -- we
9669	 *	know we're inserting at the end of the new
9670	 *	map.
9671	 */
9672
9673	vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
9674
9675	/*
9676	 *	Update the physical map
9677	 */
9678
9679	if (old_entry->is_sub_map) {
9680		/* Bill Angell pmap support goes here */
9681	} else {
9682		pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
9683			  old_entry->vme_end - old_entry->vme_start,
9684			  old_entry->vme_start);
9685	}
9686}
9687
9688static boolean_t
9689vm_map_fork_copy(
9690	vm_map_t	old_map,
9691	vm_map_entry_t	*old_entry_p,
9692	vm_map_t	new_map)
9693{
9694	vm_map_entry_t old_entry = *old_entry_p;
9695	vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
9696	vm_map_offset_t start = old_entry->vme_start;
9697	vm_map_copy_t copy;
9698	vm_map_entry_t last = vm_map_last_entry(new_map);
9699
9700	vm_map_unlock(old_map);
9701	/*
9702	 *	Use maxprot version of copyin because we
9703	 *	care about whether this memory can ever
9704	 *	be accessed, not just whether it's accessible
9705	 *	right now.
9706	 */
9707	if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
9708	    != KERN_SUCCESS) {
9709		/*
9710		 *	The map might have changed while it
9711		 *	was unlocked, check it again.  Skip
9712		 *	any blank space or permanently
9713		 *	unreadable region.
9714		 */
9715		vm_map_lock(old_map);
9716		if (!vm_map_lookup_entry(old_map, start, &last) ||
9717		    (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
9718			last = last->vme_next;
9719		}
9720		*old_entry_p = last;
9721
9722		/*
9723		 * XXX	For some error returns, want to
9724		 * XXX	skip to the next element.  Note
9725		 *	that INVALID_ADDRESS and
9726		 *	PROTECTION_FAILURE are handled above.
9727		 */
9728
9729		return FALSE;
9730	}
9731
9732	/*
9733	 *	Insert the copy into the new map
9734	 */
9735
9736	vm_map_copy_insert(new_map, last, copy);
9737
9738	/*
9739	 *	Pick up the traversal at the end of
9740	 *	the copied region.
9741	 */
9742
9743	vm_map_lock(old_map);
9744	start += entry_size;
9745	if (! vm_map_lookup_entry(old_map, start, &last)) {
9746		last = last->vme_next;
9747	} else {
9748		if (last->vme_start == start) {
9749			/*
9750			 * No need to clip here and we don't
9751			 * want to cause any unnecessary
9752			 * unnesting...
9753			 */
9754		} else {
9755			vm_map_clip_start(old_map, last, start);
9756		}
9757	}
9758	*old_entry_p = last;
9759
9760	return TRUE;
9761}
9762
9763/*
9764 *	vm_map_fork:
9765 *
9766 *	Create and return a new map based on the old
9767 *	map, according to the inheritance values on the
9768 *	regions in that map.
9769 *
9770 *	The source map must not be locked.
9771 */
9772vm_map_t
9773vm_map_fork(
9774	ledger_t	ledger,
9775	vm_map_t	old_map)
9776{
9777	pmap_t		new_pmap;
9778	vm_map_t	new_map;
9779	vm_map_entry_t	old_entry;
9780	vm_map_size_t	new_size = 0, entry_size;
9781	vm_map_entry_t	new_entry;
9782	boolean_t	src_needs_copy;
9783	boolean_t	new_entry_needs_copy;
9784
9785	new_pmap = pmap_create(ledger, (vm_map_size_t) 0,
9786#if defined(__i386__) || defined(__x86_64__)
9787			       old_map->pmap->pm_task_map != TASK_MAP_32BIT
9788#else
9789#error Unknown architecture.
9790#endif
9791			       );
9792
9793	vm_map_reference_swap(old_map);
9794	vm_map_lock(old_map);
9795
9796	new_map = vm_map_create(new_pmap,
9797				old_map->min_offset,
9798				old_map->max_offset,
9799				old_map->hdr.entries_pageable);
9800	/* inherit the parent map's page size */
9801	vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
9802	for (
9803		old_entry = vm_map_first_entry(old_map);
9804		old_entry != vm_map_to_entry(old_map);
9805		) {
9806
9807		entry_size = old_entry->vme_end - old_entry->vme_start;
9808
9809		switch (old_entry->inheritance) {
9810		case VM_INHERIT_NONE:
9811			break;
9812
9813		case VM_INHERIT_SHARE:
9814			vm_map_fork_share(old_map, old_entry, new_map);
9815			new_size += entry_size;
9816			break;
9817
9818		case VM_INHERIT_COPY:
9819
9820			/*
9821			 *	Inline the copy_quickly case;
9822			 *	upon failure, fall back on call
9823			 *	to vm_map_fork_copy.
9824			 */
9825
9826			if(old_entry->is_sub_map)
9827				break;
9828			if ((old_entry->wired_count != 0) ||
9829			    ((old_entry->object.vm_object != NULL) &&
9830			     (old_entry->object.vm_object->true_share))) {
9831				goto slow_vm_map_fork_copy;
9832			}
9833
9834			new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
9835			vm_map_entry_copy(new_entry, old_entry);
9836			if (new_entry->is_sub_map) {
9837				/* clear address space specifics */
9838				new_entry->use_pmap = FALSE;
9839			}
9840
9841			if (! vm_object_copy_quickly(
9842				    &new_entry->object.vm_object,
9843				    old_entry->offset,
9844				    (old_entry->vme_end -
9845				     old_entry->vme_start),
9846				    &src_needs_copy,
9847				    &new_entry_needs_copy)) {
9848				vm_map_entry_dispose(new_map, new_entry);
9849				goto slow_vm_map_fork_copy;
9850			}
9851
9852			/*
9853			 *	Handle copy-on-write obligations
9854			 */
9855
9856			if (src_needs_copy && !old_entry->needs_copy) {
9857			        vm_prot_t prot;
9858
9859				prot = old_entry->protection & ~VM_PROT_WRITE;
9860
9861				if (override_nx(old_map, old_entry->alias) && prot)
9862				        prot |= VM_PROT_EXECUTE;
9863
9864				vm_object_pmap_protect(
9865					old_entry->object.vm_object,
9866					old_entry->offset,
9867					(old_entry->vme_end -
9868					 old_entry->vme_start),
9869					((old_entry->is_shared
9870					  || old_map->mapped_in_other_pmaps)
9871					 ? PMAP_NULL :
9872					 old_map->pmap),
9873					old_entry->vme_start,
9874					prot);
9875
9876				old_entry->needs_copy = TRUE;
9877			}
9878			new_entry->needs_copy = new_entry_needs_copy;
9879
9880			/*
9881			 *	Insert the entry at the end
9882			 *	of the map.
9883			 */
9884
9885			vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
9886					  new_entry);
9887			new_size += entry_size;
9888			break;
9889
9890		slow_vm_map_fork_copy:
9891			if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
9892				new_size += entry_size;
9893			}
9894			continue;
9895		}
9896		old_entry = old_entry->vme_next;
9897	}
9898
9899
9900	new_map->size = new_size;
9901	vm_map_unlock(old_map);
9902	vm_map_deallocate(old_map);
9903
9904	return(new_map);
9905}
9906
9907/*
9908 * vm_map_exec:
9909 *
9910 * 	Setup the "new_map" with the proper execution environment according
9911 *	to the type of executable (platform, 64bit, chroot environment).
9912 *	Map the comm page and shared region, etc...
9913 */
9914kern_return_t
9915vm_map_exec(
9916	vm_map_t	new_map,
9917	task_t		task,
9918	void		*fsroot,
9919	cpu_type_t	cpu)
9920{
9921	SHARED_REGION_TRACE_DEBUG(
9922		("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
9923		 (void *)VM_KERNEL_ADDRPERM(current_task()),
9924		 (void *)VM_KERNEL_ADDRPERM(new_map),
9925		 (void *)VM_KERNEL_ADDRPERM(task),
9926		 (void *)VM_KERNEL_ADDRPERM(fsroot),
9927		 cpu));
9928	(void) vm_commpage_enter(new_map, task);
9929	(void) vm_shared_region_enter(new_map, task, fsroot, cpu);
9930	SHARED_REGION_TRACE_DEBUG(
9931		("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
9932		 (void *)VM_KERNEL_ADDRPERM(current_task()),
9933		 (void *)VM_KERNEL_ADDRPERM(new_map),
9934		 (void *)VM_KERNEL_ADDRPERM(task),
9935		 (void *)VM_KERNEL_ADDRPERM(fsroot),
9936		 cpu));
9937	return KERN_SUCCESS;
9938}
9939
9940/*
9941 *	vm_map_lookup_locked:
9942 *
9943 *	Finds the VM object, offset, and
9944 *	protection for a given virtual address in the
9945 *	specified map, assuming a page fault of the
9946 *	type specified.
9947 *
9948 *	Returns the (object, offset, protection) for
9949 *	this address, whether it is wired down, and whether
9950 *	this map has the only reference to the data in question.
9951 *	In order to later verify this lookup, a "version"
9952 *	is returned.
9953 *
9954 *	The map MUST be locked by the caller and WILL be
9955 *	locked on exit.  In order to guarantee the
9956 *	existence of the returned object, it is returned
9957 *	locked.
9958 *
9959 *	If a lookup is requested with "write protection"
9960 *	specified, the map may be changed to perform virtual
9961 *	copying operations, although the data referenced will
9962 *	remain the same.
9963 */
9964kern_return_t
9965vm_map_lookup_locked(
9966	vm_map_t		*var_map,	/* IN/OUT */
9967	vm_map_offset_t		vaddr,
9968	vm_prot_t		fault_type,
9969	int			object_lock_type,
9970	vm_map_version_t	*out_version,	/* OUT */
9971	vm_object_t		*object,	/* OUT */
9972	vm_object_offset_t	*offset,	/* OUT */
9973	vm_prot_t		*out_prot,	/* OUT */
9974	boolean_t		*wired,		/* OUT */
9975	vm_object_fault_info_t	fault_info,	/* OUT */
9976	vm_map_t		*real_map)
9977{
9978	vm_map_entry_t			entry;
9979	register vm_map_t		map = *var_map;
9980	vm_map_t			old_map = *var_map;
9981	vm_map_t			cow_sub_map_parent = VM_MAP_NULL;
9982	vm_map_offset_t			cow_parent_vaddr = 0;
9983	vm_map_offset_t			old_start = 0;
9984	vm_map_offset_t			old_end = 0;
9985	register vm_prot_t		prot;
9986	boolean_t			mask_protections;
9987	boolean_t			force_copy;
9988	vm_prot_t			original_fault_type;
9989
9990	/*
9991	 * VM_PROT_MASK means that the caller wants us to use "fault_type"
9992	 * as a mask against the mapping's actual protections, not as an
9993	 * absolute value.
9994	 */
9995	mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
9996	force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
9997	fault_type &= VM_PROT_ALL;
9998	original_fault_type = fault_type;
9999
10000	*real_map = map;
10001
10002RetryLookup:
10003	fault_type = original_fault_type;
10004
10005	/*
10006	 *	If the map has an interesting hint, try it before calling
10007	 *	full blown lookup routine.
10008	 */
10009	entry = map->hint;
10010
10011	if ((entry == vm_map_to_entry(map)) ||
10012	    (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
10013		vm_map_entry_t	tmp_entry;
10014
10015		/*
10016		 *	Entry was either not a valid hint, or the vaddr
10017		 *	was not contained in the entry, so do a full lookup.
10018		 */
10019		if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
10020			if((cow_sub_map_parent) && (cow_sub_map_parent != map))
10021				vm_map_unlock(cow_sub_map_parent);
10022			if((*real_map != map)
10023			   && (*real_map != cow_sub_map_parent))
10024				vm_map_unlock(*real_map);
10025			return KERN_INVALID_ADDRESS;
10026		}
10027
10028		entry = tmp_entry;
10029	}
10030	if(map == old_map) {
10031		old_start = entry->vme_start;
10032		old_end = entry->vme_end;
10033	}
10034
10035	/*
10036	 *	Handle submaps.  Drop lock on upper map, submap is
10037	 *	returned locked.
10038	 */
10039
10040submap_recurse:
10041	if (entry->is_sub_map) {
10042		vm_map_offset_t		local_vaddr;
10043		vm_map_offset_t		end_delta;
10044		vm_map_offset_t		start_delta;
10045		vm_map_entry_t		submap_entry;
10046		boolean_t		mapped_needs_copy=FALSE;
10047
10048		local_vaddr = vaddr;
10049
10050		if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
10051			/* if real_map equals map we unlock below */
10052			if ((*real_map != map) &&
10053			    (*real_map != cow_sub_map_parent))
10054				vm_map_unlock(*real_map);
10055			*real_map = entry->object.sub_map;
10056		}
10057
10058		if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
10059			if (!mapped_needs_copy) {
10060				if (vm_map_lock_read_to_write(map)) {
10061					vm_map_lock_read(map);
10062					*real_map = map;
10063					goto RetryLookup;
10064				}
10065				vm_map_lock_read(entry->object.sub_map);
10066				*var_map = entry->object.sub_map;
10067				cow_sub_map_parent = map;
10068				/* reset base to map before cow object */
10069				/* this is the map which will accept   */
10070				/* the new cow object */
10071				old_start = entry->vme_start;
10072				old_end = entry->vme_end;
10073				cow_parent_vaddr = vaddr;
10074				mapped_needs_copy = TRUE;
10075			} else {
10076				vm_map_lock_read(entry->object.sub_map);
10077				*var_map = entry->object.sub_map;
10078				if((cow_sub_map_parent != map) &&
10079				   (*real_map != map))
10080					vm_map_unlock(map);
10081			}
10082		} else {
10083			vm_map_lock_read(entry->object.sub_map);
10084			*var_map = entry->object.sub_map;
10085			/* leave map locked if it is a target */
10086			/* cow sub_map above otherwise, just  */
10087			/* follow the maps down to the object */
10088			/* here we unlock knowing we are not  */
10089			/* revisiting the map.  */
10090			if((*real_map != map) && (map != cow_sub_map_parent))
10091				vm_map_unlock_read(map);
10092		}
10093
10094		map = *var_map;
10095
10096		/* calculate the offset in the submap for vaddr */
10097		local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
10098
10099	RetrySubMap:
10100		if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
10101			if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
10102				vm_map_unlock(cow_sub_map_parent);
10103			}
10104			if((*real_map != map)
10105			   && (*real_map != cow_sub_map_parent)) {
10106				vm_map_unlock(*real_map);
10107			}
10108			*real_map = map;
10109			return KERN_INVALID_ADDRESS;
10110		}
10111
10112		/* find the attenuated shadow of the underlying object */
10113		/* on our target map */
10114
10115		/* in english the submap object may extend beyond the     */
10116		/* region mapped by the entry or, may only fill a portion */
10117		/* of it.  For our purposes, we only care if the object   */
10118		/* doesn't fill.  In this case the area which will        */
10119		/* ultimately be clipped in the top map will only need    */
10120		/* to be as big as the portion of the underlying entry    */
10121		/* which is mapped */
10122		start_delta = submap_entry->vme_start > entry->offset ?
10123			submap_entry->vme_start - entry->offset : 0;
10124
10125		end_delta =
10126			(entry->offset + start_delta + (old_end - old_start)) <=
10127			submap_entry->vme_end ?
10128			0 : (entry->offset +
10129			     (old_end - old_start))
10130			- submap_entry->vme_end;
10131
10132		old_start += start_delta;
10133		old_end -= end_delta;
10134
10135		if(submap_entry->is_sub_map) {
10136			entry = submap_entry;
10137			vaddr = local_vaddr;
10138			goto submap_recurse;
10139		}
10140
10141		if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
10142
10143			vm_object_t	sub_object, copy_object;
10144			vm_object_offset_t copy_offset;
10145			vm_map_offset_t	local_start;
10146			vm_map_offset_t	local_end;
10147			boolean_t		copied_slowly = FALSE;
10148
10149			if (vm_map_lock_read_to_write(map)) {
10150				vm_map_lock_read(map);
10151				old_start -= start_delta;
10152				old_end += end_delta;
10153				goto RetrySubMap;
10154			}
10155
10156
10157			sub_object = submap_entry->object.vm_object;
10158			if (sub_object == VM_OBJECT_NULL) {
10159				sub_object =
10160					vm_object_allocate(
10161						(vm_map_size_t)
10162						(submap_entry->vme_end -
10163						 submap_entry->vme_start));
10164				submap_entry->object.vm_object = sub_object;
10165				submap_entry->offset = 0;
10166			}
10167			local_start =  local_vaddr -
10168				(cow_parent_vaddr - old_start);
10169			local_end = local_vaddr +
10170				(old_end - cow_parent_vaddr);
10171			vm_map_clip_start(map, submap_entry, local_start);
10172			vm_map_clip_end(map, submap_entry, local_end);
10173			if (submap_entry->is_sub_map) {
10174				/* unnesting was done when clipping */
10175				assert(!submap_entry->use_pmap);
10176			}
10177
10178			/* This is the COW case, lets connect */
10179			/* an entry in our space to the underlying */
10180			/* object in the submap, bypassing the  */
10181			/* submap. */
10182
10183
10184			if(submap_entry->wired_count != 0 ||
10185			   (sub_object->copy_strategy ==
10186			    MEMORY_OBJECT_COPY_NONE)) {
10187				vm_object_lock(sub_object);
10188				vm_object_copy_slowly(sub_object,
10189						      submap_entry->offset,
10190						      (submap_entry->vme_end -
10191						       submap_entry->vme_start),
10192						      FALSE,
10193						      &copy_object);
10194				copied_slowly = TRUE;
10195			} else {
10196
10197				/* set up shadow object */
10198				copy_object = sub_object;
10199				vm_object_reference(copy_object);
10200				sub_object->shadowed = TRUE;
10201				submap_entry->needs_copy = TRUE;
10202
10203				prot = submap_entry->protection & ~VM_PROT_WRITE;
10204
10205				if (override_nx(old_map, submap_entry->alias) && prot)
10206				        prot |= VM_PROT_EXECUTE;
10207
10208				vm_object_pmap_protect(
10209					sub_object,
10210					submap_entry->offset,
10211					submap_entry->vme_end -
10212					submap_entry->vme_start,
10213					(submap_entry->is_shared
10214					 || map->mapped_in_other_pmaps) ?
10215					PMAP_NULL : map->pmap,
10216					submap_entry->vme_start,
10217					prot);
10218			}
10219
10220			/*
10221			 * Adjust the fault offset to the submap entry.
10222			 */
10223			copy_offset = (local_vaddr -
10224				       submap_entry->vme_start +
10225				       submap_entry->offset);
10226
10227			/* This works diffently than the   */
10228			/* normal submap case. We go back  */
10229			/* to the parent of the cow map and*/
10230			/* clip out the target portion of  */
10231			/* the sub_map, substituting the   */
10232			/* new copy object,                */
10233
10234			vm_map_unlock(map);
10235			local_start = old_start;
10236			local_end = old_end;
10237			map = cow_sub_map_parent;
10238			*var_map = cow_sub_map_parent;
10239			vaddr = cow_parent_vaddr;
10240			cow_sub_map_parent = NULL;
10241
10242			if(!vm_map_lookup_entry(map,
10243						vaddr, &entry)) {
10244				vm_object_deallocate(
10245					copy_object);
10246				vm_map_lock_write_to_read(map);
10247				return KERN_INVALID_ADDRESS;
10248			}
10249
10250			/* clip out the portion of space */
10251			/* mapped by the sub map which   */
10252			/* corresponds to the underlying */
10253			/* object */
10254
10255			/*
10256			 * Clip (and unnest) the smallest nested chunk
10257			 * possible around the faulting address...
10258			 */
10259			local_start = vaddr & ~(pmap_nesting_size_min - 1);
10260			local_end = local_start + pmap_nesting_size_min;
10261			/*
10262			 * ... but don't go beyond the "old_start" to "old_end"
10263			 * range, to avoid spanning over another VM region
10264			 * with a possibly different VM object and/or offset.
10265			 */
10266			if (local_start < old_start) {
10267				local_start = old_start;
10268			}
10269			if (local_end > old_end) {
10270				local_end = old_end;
10271			}
10272			/*
10273			 * Adjust copy_offset to the start of the range.
10274			 */
10275			copy_offset -= (vaddr - local_start);
10276
10277			vm_map_clip_start(map, entry, local_start);
10278			vm_map_clip_end(map, entry, local_end);
10279			if (entry->is_sub_map) {
10280				/* unnesting was done when clipping */
10281				assert(!entry->use_pmap);
10282			}
10283
10284			/* substitute copy object for */
10285			/* shared map entry           */
10286			vm_map_deallocate(entry->object.sub_map);
10287			assert(!entry->iokit_acct);
10288			entry->is_sub_map = FALSE;
10289			entry->use_pmap = TRUE;
10290			entry->object.vm_object = copy_object;
10291
10292			/* propagate the submap entry's protections */
10293			entry->protection |= submap_entry->protection;
10294			entry->max_protection |= submap_entry->max_protection;
10295
10296			if(copied_slowly) {
10297				entry->offset = local_start - old_start;
10298				entry->needs_copy = FALSE;
10299				entry->is_shared = FALSE;
10300			} else {
10301				entry->offset = copy_offset;
10302				entry->needs_copy = TRUE;
10303				if(entry->inheritance == VM_INHERIT_SHARE)
10304					entry->inheritance = VM_INHERIT_COPY;
10305				if (map != old_map)
10306					entry->is_shared = TRUE;
10307			}
10308			if(entry->inheritance == VM_INHERIT_SHARE)
10309				entry->inheritance = VM_INHERIT_COPY;
10310
10311			vm_map_lock_write_to_read(map);
10312		} else {
10313			if((cow_sub_map_parent)
10314			   && (cow_sub_map_parent != *real_map)
10315			   && (cow_sub_map_parent != map)) {
10316				vm_map_unlock(cow_sub_map_parent);
10317			}
10318			entry = submap_entry;
10319			vaddr = local_vaddr;
10320		}
10321	}
10322
10323	/*
10324	 *	Check whether this task is allowed to have
10325	 *	this page.
10326	 */
10327
10328	prot = entry->protection;
10329
10330	if (override_nx(old_map, entry->alias) && prot) {
10331	        /*
10332		 * HACK -- if not a stack, then allow execution
10333		 */
10334	        prot |= VM_PROT_EXECUTE;
10335	}
10336
10337	if (mask_protections) {
10338		fault_type &= prot;
10339		if (fault_type == VM_PROT_NONE) {
10340			goto protection_failure;
10341		}
10342	}
10343	if ((fault_type & (prot)) != fault_type) {
10344	protection_failure:
10345		if (*real_map != map) {
10346			vm_map_unlock(*real_map);
10347		}
10348		*real_map = map;
10349
10350		if ((fault_type & VM_PROT_EXECUTE) && prot)
10351		        log_stack_execution_failure((addr64_t)vaddr, prot);
10352
10353		DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
10354		return KERN_PROTECTION_FAILURE;
10355	}
10356
10357	/*
10358	 *	If this page is not pageable, we have to get
10359	 *	it for all possible accesses.
10360	 */
10361
10362	*wired = (entry->wired_count != 0);
10363	if (*wired)
10364	        fault_type = prot;
10365
10366	/*
10367	 *	If the entry was copy-on-write, we either ...
10368	 */
10369
10370	if (entry->needs_copy) {
10371	    	/*
10372		 *	If we want to write the page, we may as well
10373		 *	handle that now since we've got the map locked.
10374		 *
10375		 *	If we don't need to write the page, we just
10376		 *	demote the permissions allowed.
10377		 */
10378
10379		if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
10380			/*
10381			 *	Make a new object, and place it in the
10382			 *	object chain.  Note that no new references
10383			 *	have appeared -- one just moved from the
10384			 *	map to the new object.
10385			 */
10386
10387			if (vm_map_lock_read_to_write(map)) {
10388				vm_map_lock_read(map);
10389				goto RetryLookup;
10390			}
10391			vm_object_shadow(&entry->object.vm_object,
10392					 &entry->offset,
10393					 (vm_map_size_t) (entry->vme_end -
10394							  entry->vme_start));
10395
10396			entry->object.vm_object->shadowed = TRUE;
10397			entry->needs_copy = FALSE;
10398			vm_map_lock_write_to_read(map);
10399		}
10400		else {
10401			/*
10402			 *	We're attempting to read a copy-on-write
10403			 *	page -- don't allow writes.
10404			 */
10405
10406			prot &= (~VM_PROT_WRITE);
10407		}
10408	}
10409
10410	/*
10411	 *	Create an object if necessary.
10412	 */
10413	if (entry->object.vm_object == VM_OBJECT_NULL) {
10414
10415		if (vm_map_lock_read_to_write(map)) {
10416			vm_map_lock_read(map);
10417			goto RetryLookup;
10418		}
10419
10420		entry->object.vm_object = vm_object_allocate(
10421			(vm_map_size_t)(entry->vme_end - entry->vme_start));
10422		entry->offset = 0;
10423		vm_map_lock_write_to_read(map);
10424	}
10425
10426	/*
10427	 *	Return the object/offset from this entry.  If the entry
10428	 *	was copy-on-write or empty, it has been fixed up.  Also
10429	 *	return the protection.
10430	 */
10431
10432        *offset = (vaddr - entry->vme_start) + entry->offset;
10433        *object = entry->object.vm_object;
10434	*out_prot = prot;
10435
10436	if (fault_info) {
10437		fault_info->interruptible = THREAD_UNINT; /* for now... */
10438		/* ... the caller will change "interruptible" if needed */
10439	        fault_info->cluster_size = 0;
10440		fault_info->user_tag = entry->alias;
10441		fault_info->pmap_options = 0;
10442		if (entry->iokit_acct ||
10443		    (!entry->is_sub_map && !entry->use_pmap)) {
10444			fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
10445		}
10446	        fault_info->behavior = entry->behavior;
10447		fault_info->lo_offset = entry->offset;
10448		fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
10449		fault_info->no_cache  = entry->no_cache;
10450		fault_info->stealth = FALSE;
10451		fault_info->io_sync = FALSE;
10452		fault_info->cs_bypass = (entry->used_for_jit)? TRUE : FALSE;
10453		fault_info->mark_zf_absent = FALSE;
10454		fault_info->batch_pmap_op = FALSE;
10455	}
10456
10457	/*
10458	 *	Lock the object to prevent it from disappearing
10459	 */
10460	if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
10461	        vm_object_lock(*object);
10462	else
10463	        vm_object_lock_shared(*object);
10464
10465	/*
10466	 *	Save the version number
10467	 */
10468
10469	out_version->main_timestamp = map->timestamp;
10470
10471	return KERN_SUCCESS;
10472}
10473
10474
10475/*
10476 *	vm_map_verify:
10477 *
10478 *	Verifies that the map in question has not changed
10479 *	since the given version.  If successful, the map
10480 *	will not change until vm_map_verify_done() is called.
10481 */
10482boolean_t
10483vm_map_verify(
10484	register vm_map_t		map,
10485	register vm_map_version_t	*version)	/* REF */
10486{
10487	boolean_t	result;
10488
10489	vm_map_lock_read(map);
10490	result = (map->timestamp == version->main_timestamp);
10491
10492	if (!result)
10493		vm_map_unlock_read(map);
10494
10495	return(result);
10496}
10497
10498/*
10499 *	vm_map_verify_done:
10500 *
10501 *	Releases locks acquired by a vm_map_verify.
10502 *
10503 *	This is now a macro in vm/vm_map.h.  It does a
10504 *	vm_map_unlock_read on the map.
10505 */
10506
10507
10508/*
10509 *	TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
10510 *	Goes away after regular vm_region_recurse function migrates to
10511 *	64 bits
10512 *	vm_region_recurse: A form of vm_region which follows the
10513 *	submaps in a target map
10514 *
10515 */
10516
10517kern_return_t
10518vm_map_region_recurse_64(
10519	vm_map_t		 map,
10520	vm_map_offset_t	*address,		/* IN/OUT */
10521	vm_map_size_t		*size,			/* OUT */
10522	natural_t	 	*nesting_depth,	/* IN/OUT */
10523	vm_region_submap_info_64_t	submap_info,	/* IN/OUT */
10524	mach_msg_type_number_t	*count)	/* IN/OUT */
10525{
10526	mach_msg_type_number_t	original_count;
10527	vm_region_extended_info_data_t	extended;
10528	vm_map_entry_t			tmp_entry;
10529	vm_map_offset_t			user_address;
10530	unsigned int			user_max_depth;
10531
10532	/*
10533	 * "curr_entry" is the VM map entry preceding or including the
10534	 * address we're looking for.
10535	 * "curr_map" is the map or sub-map containing "curr_entry".
10536	 * "curr_address" is the equivalent of the top map's "user_address"
10537	 * in the current map.
10538	 * "curr_offset" is the cumulated offset of "curr_map" in the
10539	 * target task's address space.
10540	 * "curr_depth" is the depth of "curr_map" in the chain of
10541	 * sub-maps.
10542	 *
10543	 * "curr_max_below" and "curr_max_above" limit the range (around
10544	 * "curr_address") we should take into account in the current (sub)map.
10545	 * They limit the range to what's visible through the map entries
10546	 * we've traversed from the top map to the current map.
10547
10548	 */
10549	vm_map_entry_t			curr_entry;
10550	vm_map_address_t		curr_address;
10551	vm_map_offset_t			curr_offset;
10552	vm_map_t			curr_map;
10553	unsigned int			curr_depth;
10554	vm_map_offset_t			curr_max_below, curr_max_above;
10555	vm_map_offset_t			curr_skip;
10556
10557	/*
10558	 * "next_" is the same as "curr_" but for the VM region immediately
10559	 * after the address we're looking for.  We need to keep track of this
10560	 * too because we want to return info about that region if the
10561	 * address we're looking for is not mapped.
10562	 */
10563	vm_map_entry_t			next_entry;
10564	vm_map_offset_t			next_offset;
10565	vm_map_offset_t			next_address;
10566	vm_map_t			next_map;
10567	unsigned int			next_depth;
10568	vm_map_offset_t			next_max_below, next_max_above;
10569	vm_map_offset_t			next_skip;
10570
10571	boolean_t			look_for_pages;
10572	vm_region_submap_short_info_64_t short_info;
10573
10574	if (map == VM_MAP_NULL) {
10575		/* no address space to work on */
10576		return KERN_INVALID_ARGUMENT;
10577	}
10578
10579
10580	if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
10581		/*
10582		 * "info" structure is not big enough and
10583		 * would overflow
10584		 */
10585		return KERN_INVALID_ARGUMENT;
10586	}
10587
10588	original_count = *count;
10589
10590	if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
10591		*count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
10592		look_for_pages = FALSE;
10593		short_info = (vm_region_submap_short_info_64_t) submap_info;
10594		submap_info = NULL;
10595	} else {
10596		look_for_pages = TRUE;
10597		*count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
10598		short_info = NULL;
10599
10600		if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
10601			*count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
10602		}
10603	}
10604
10605	user_address = *address;
10606	user_max_depth = *nesting_depth;
10607
10608	curr_entry = NULL;
10609	curr_map = map;
10610	curr_address = user_address;
10611	curr_offset = 0;
10612	curr_skip = 0;
10613	curr_depth = 0;
10614	curr_max_above = ((vm_map_offset_t) -1) - curr_address;
10615	curr_max_below = curr_address;
10616
10617	next_entry = NULL;
10618	next_map = NULL;
10619	next_address = 0;
10620	next_offset = 0;
10621	next_skip = 0;
10622	next_depth = 0;
10623	next_max_above = (vm_map_offset_t) -1;
10624	next_max_below = (vm_map_offset_t) -1;
10625
10626	if (not_in_kdp) {
10627		vm_map_lock_read(curr_map);
10628	}
10629
10630	for (;;) {
10631		if (vm_map_lookup_entry(curr_map,
10632					curr_address,
10633					&tmp_entry)) {
10634			/* tmp_entry contains the address we're looking for */
10635			curr_entry = tmp_entry;
10636		} else {
10637			vm_map_offset_t skip;
10638			/*
10639			 * The address is not mapped.  "tmp_entry" is the
10640			 * map entry preceding the address.  We want the next
10641			 * one, if it exists.
10642			 */
10643			curr_entry = tmp_entry->vme_next;
10644
10645			if (curr_entry == vm_map_to_entry(curr_map) ||
10646			    (curr_entry->vme_start >=
10647			     curr_address + curr_max_above)) {
10648				/* no next entry at this level: stop looking */
10649				if (not_in_kdp) {
10650					vm_map_unlock_read(curr_map);
10651				}
10652				curr_entry = NULL;
10653				curr_map = NULL;
10654				curr_offset = 0;
10655				curr_depth = 0;
10656				curr_max_above = 0;
10657				curr_max_below = 0;
10658				break;
10659			}
10660
10661			/* adjust current address and offset */
10662			skip = curr_entry->vme_start - curr_address;
10663			curr_address = curr_entry->vme_start;
10664			curr_skip = skip;
10665			curr_offset += skip;
10666			curr_max_above -= skip;
10667			curr_max_below = 0;
10668		}
10669
10670		/*
10671		 * Is the next entry at this level closer to the address (or
10672		 * deeper in the submap chain) than the one we had
10673		 * so far ?
10674		 */
10675		tmp_entry = curr_entry->vme_next;
10676		if (tmp_entry == vm_map_to_entry(curr_map)) {
10677			/* no next entry at this level */
10678		} else if (tmp_entry->vme_start >=
10679			   curr_address + curr_max_above) {
10680			/*
10681			 * tmp_entry is beyond the scope of what we mapped of
10682			 * this submap in the upper level: ignore it.
10683			 */
10684		} else if ((next_entry == NULL) ||
10685			   (tmp_entry->vme_start + curr_offset <=
10686			    next_entry->vme_start + next_offset)) {
10687			/*
10688			 * We didn't have a "next_entry" or this one is
10689			 * closer to the address we're looking for:
10690			 * use this "tmp_entry" as the new "next_entry".
10691			 */
10692			if (next_entry != NULL) {
10693				/* unlock the last "next_map" */
10694				if (next_map != curr_map && not_in_kdp) {
10695					vm_map_unlock_read(next_map);
10696				}
10697			}
10698			next_entry = tmp_entry;
10699			next_map = curr_map;
10700			next_depth = curr_depth;
10701			next_address = next_entry->vme_start;
10702			next_skip = curr_skip;
10703			next_offset = curr_offset;
10704			next_offset += (next_address - curr_address);
10705			next_max_above = MIN(next_max_above, curr_max_above);
10706			next_max_above = MIN(next_max_above,
10707					     next_entry->vme_end - next_address);
10708			next_max_below = MIN(next_max_below, curr_max_below);
10709			next_max_below = MIN(next_max_below,
10710					     next_address - next_entry->vme_start);
10711		}
10712
10713		/*
10714		 * "curr_max_{above,below}" allow us to keep track of the
10715		 * portion of the submap that is actually mapped at this level:
10716		 * the rest of that submap is irrelevant to us, since it's not
10717		 * mapped here.
10718		 * The relevant portion of the map starts at
10719		 * "curr_entry->offset" up to the size of "curr_entry".
10720		 */
10721		curr_max_above = MIN(curr_max_above,
10722				     curr_entry->vme_end - curr_address);
10723		curr_max_below = MIN(curr_max_below,
10724				     curr_address - curr_entry->vme_start);
10725
10726		if (!curr_entry->is_sub_map ||
10727		    curr_depth >= user_max_depth) {
10728			/*
10729			 * We hit a leaf map or we reached the maximum depth
10730			 * we could, so stop looking.  Keep the current map
10731			 * locked.
10732			 */
10733			break;
10734		}
10735
10736		/*
10737		 * Get down to the next submap level.
10738		 */
10739
10740		/*
10741		 * Lock the next level and unlock the current level,
10742		 * unless we need to keep it locked to access the "next_entry"
10743		 * later.
10744		 */
10745		if (not_in_kdp) {
10746			vm_map_lock_read(curr_entry->object.sub_map);
10747		}
10748		if (curr_map == next_map) {
10749			/* keep "next_map" locked in case we need it */
10750		} else {
10751			/* release this map */
10752			if (not_in_kdp)
10753				vm_map_unlock_read(curr_map);
10754		}
10755
10756		/*
10757		 * Adjust the offset.  "curr_entry" maps the submap
10758		 * at relative address "curr_entry->vme_start" in the
10759		 * curr_map but skips the first "curr_entry->offset"
10760		 * bytes of the submap.
10761		 * "curr_offset" always represents the offset of a virtual
10762		 * address in the curr_map relative to the absolute address
10763		 * space (i.e. the top-level VM map).
10764		 */
10765		curr_offset +=
10766			(curr_entry->offset - curr_entry->vme_start);
10767		curr_address = user_address + curr_offset;
10768		/* switch to the submap */
10769		curr_map = curr_entry->object.sub_map;
10770		curr_depth++;
10771		curr_entry = NULL;
10772	}
10773
10774	if (curr_entry == NULL) {
10775		/* no VM region contains the address... */
10776		if (next_entry == NULL) {
10777			/* ... and no VM region follows it either */
10778			return KERN_INVALID_ADDRESS;
10779		}
10780		/* ... gather info about the next VM region */
10781		curr_entry = next_entry;
10782		curr_map = next_map;	/* still locked ... */
10783		curr_address = next_address;
10784		curr_skip = next_skip;
10785		curr_offset = next_offset;
10786		curr_depth = next_depth;
10787		curr_max_above = next_max_above;
10788		curr_max_below = next_max_below;
10789		if (curr_map == map) {
10790			user_address = curr_address;
10791		}
10792	} else {
10793		/* we won't need "next_entry" after all */
10794		if (next_entry != NULL) {
10795			/* release "next_map" */
10796			if (next_map != curr_map && not_in_kdp) {
10797				vm_map_unlock_read(next_map);
10798			}
10799		}
10800	}
10801	next_entry = NULL;
10802	next_map = NULL;
10803	next_offset = 0;
10804	next_skip = 0;
10805	next_depth = 0;
10806	next_max_below = -1;
10807	next_max_above = -1;
10808
10809	*nesting_depth = curr_depth;
10810	*size = curr_max_above + curr_max_below;
10811	*address = user_address + curr_skip - curr_max_below;
10812
10813// LP64todo: all the current tools are 32bit, obviously never worked for 64b
10814// so probably should be a real 32b ID vs. ptr.
10815// Current users just check for equality
10816#define INFO_MAKE_OBJECT_ID(p)	((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
10817
10818	if (look_for_pages) {
10819		submap_info->user_tag = curr_entry->alias;
10820		submap_info->offset = curr_entry->offset;
10821		submap_info->protection = curr_entry->protection;
10822		submap_info->inheritance = curr_entry->inheritance;
10823		submap_info->max_protection = curr_entry->max_protection;
10824		submap_info->behavior = curr_entry->behavior;
10825		submap_info->user_wired_count = curr_entry->user_wired_count;
10826		submap_info->is_submap = curr_entry->is_sub_map;
10827		submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
10828	} else {
10829		short_info->user_tag = curr_entry->alias;
10830		short_info->offset = curr_entry->offset;
10831		short_info->protection = curr_entry->protection;
10832		short_info->inheritance = curr_entry->inheritance;
10833		short_info->max_protection = curr_entry->max_protection;
10834		short_info->behavior = curr_entry->behavior;
10835		short_info->user_wired_count = curr_entry->user_wired_count;
10836		short_info->is_submap = curr_entry->is_sub_map;
10837		short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
10838	}
10839
10840	extended.pages_resident = 0;
10841	extended.pages_swapped_out = 0;
10842	extended.pages_shared_now_private = 0;
10843	extended.pages_dirtied = 0;
10844	extended.pages_reusable = 0;
10845	extended.external_pager = 0;
10846	extended.shadow_depth = 0;
10847
10848	if (not_in_kdp) {
10849		if (!curr_entry->is_sub_map) {
10850			vm_map_offset_t range_start, range_end;
10851			range_start = MAX((curr_address - curr_max_below),
10852					  curr_entry->vme_start);
10853			range_end = MIN((curr_address + curr_max_above),
10854					curr_entry->vme_end);
10855			vm_map_region_walk(curr_map,
10856					   range_start,
10857					   curr_entry,
10858					   (curr_entry->offset +
10859					    (range_start -
10860					     curr_entry->vme_start)),
10861					   range_end - range_start,
10862					   &extended,
10863					   look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
10864			if (extended.external_pager &&
10865			    extended.ref_count == 2 &&
10866			    extended.share_mode == SM_SHARED) {
10867				extended.share_mode = SM_PRIVATE;
10868			}
10869		} else {
10870			if (curr_entry->use_pmap) {
10871				extended.share_mode = SM_TRUESHARED;
10872			} else {
10873				extended.share_mode = SM_PRIVATE;
10874			}
10875			extended.ref_count =
10876				curr_entry->object.sub_map->ref_count;
10877		}
10878	}
10879
10880	if (look_for_pages) {
10881		submap_info->pages_resident = extended.pages_resident;
10882		submap_info->pages_swapped_out = extended.pages_swapped_out;
10883		submap_info->pages_shared_now_private =
10884			extended.pages_shared_now_private;
10885		submap_info->pages_dirtied = extended.pages_dirtied;
10886		submap_info->external_pager = extended.external_pager;
10887		submap_info->shadow_depth = extended.shadow_depth;
10888		submap_info->share_mode = extended.share_mode;
10889		submap_info->ref_count = extended.ref_count;
10890
10891		if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
10892			submap_info->pages_reusable = extended.pages_reusable;
10893		}
10894	} else {
10895		short_info->external_pager = extended.external_pager;
10896		short_info->shadow_depth = extended.shadow_depth;
10897		short_info->share_mode = extended.share_mode;
10898		short_info->ref_count = extended.ref_count;
10899	}
10900
10901	if (not_in_kdp) {
10902		vm_map_unlock_read(curr_map);
10903	}
10904
10905	return KERN_SUCCESS;
10906}
10907
10908/*
10909 *	vm_region:
10910 *
10911 *	User call to obtain information about a region in
10912 *	a task's address map. Currently, only one flavor is
10913 *	supported.
10914 *
10915 *	XXX The reserved and behavior fields cannot be filled
10916 *	    in until the vm merge from the IK is completed, and
10917 *	    vm_reserve is implemented.
10918 */
10919
10920kern_return_t
10921vm_map_region(
10922	vm_map_t		 map,
10923	vm_map_offset_t	*address,		/* IN/OUT */
10924	vm_map_size_t		*size,			/* OUT */
10925	vm_region_flavor_t	 flavor,		/* IN */
10926	vm_region_info_t	 info,			/* OUT */
10927	mach_msg_type_number_t	*count,	/* IN/OUT */
10928	mach_port_t		*object_name)		/* OUT */
10929{
10930	vm_map_entry_t		tmp_entry;
10931	vm_map_entry_t		entry;
10932	vm_map_offset_t		start;
10933
10934	if (map == VM_MAP_NULL)
10935		return(KERN_INVALID_ARGUMENT);
10936
10937	switch (flavor) {
10938
10939	case VM_REGION_BASIC_INFO:
10940		/* legacy for old 32-bit objects info */
10941	{
10942		vm_region_basic_info_t	basic;
10943
10944		if (*count < VM_REGION_BASIC_INFO_COUNT)
10945			return(KERN_INVALID_ARGUMENT);
10946
10947		basic = (vm_region_basic_info_t) info;
10948		*count = VM_REGION_BASIC_INFO_COUNT;
10949
10950		vm_map_lock_read(map);
10951
10952		start = *address;
10953		if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
10954			if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
10955				vm_map_unlock_read(map);
10956				return(KERN_INVALID_ADDRESS);
10957			}
10958		} else {
10959			entry = tmp_entry;
10960		}
10961
10962		start = entry->vme_start;
10963
10964		basic->offset = (uint32_t)entry->offset;
10965		basic->protection = entry->protection;
10966		basic->inheritance = entry->inheritance;
10967		basic->max_protection = entry->max_protection;
10968		basic->behavior = entry->behavior;
10969		basic->user_wired_count = entry->user_wired_count;
10970		basic->reserved = entry->is_sub_map;
10971		*address = start;
10972		*size = (entry->vme_end - start);
10973
10974		if (object_name) *object_name = IP_NULL;
10975		if (entry->is_sub_map) {
10976			basic->shared = FALSE;
10977		} else {
10978			basic->shared = entry->is_shared;
10979		}
10980
10981		vm_map_unlock_read(map);
10982		return(KERN_SUCCESS);
10983	}
10984
10985	case VM_REGION_BASIC_INFO_64:
10986	{
10987		vm_region_basic_info_64_t	basic;
10988
10989		if (*count < VM_REGION_BASIC_INFO_COUNT_64)
10990			return(KERN_INVALID_ARGUMENT);
10991
10992		basic = (vm_region_basic_info_64_t) info;
10993		*count = VM_REGION_BASIC_INFO_COUNT_64;
10994
10995		vm_map_lock_read(map);
10996
10997		start = *address;
10998		if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
10999			if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11000				vm_map_unlock_read(map);
11001				return(KERN_INVALID_ADDRESS);
11002			}
11003		} else {
11004			entry = tmp_entry;
11005		}
11006
11007		start = entry->vme_start;
11008
11009		basic->offset = entry->offset;
11010		basic->protection = entry->protection;
11011		basic->inheritance = entry->inheritance;
11012		basic->max_protection = entry->max_protection;
11013		basic->behavior = entry->behavior;
11014		basic->user_wired_count = entry->user_wired_count;
11015		basic->reserved = entry->is_sub_map;
11016		*address = start;
11017		*size = (entry->vme_end - start);
11018
11019		if (object_name) *object_name = IP_NULL;
11020		if (entry->is_sub_map) {
11021			basic->shared = FALSE;
11022		} else {
11023			basic->shared = entry->is_shared;
11024		}
11025
11026		vm_map_unlock_read(map);
11027		return(KERN_SUCCESS);
11028	}
11029	case VM_REGION_EXTENDED_INFO:
11030		if (*count < VM_REGION_EXTENDED_INFO_COUNT)
11031			return(KERN_INVALID_ARGUMENT);
11032		/*fallthru*/
11033	case VM_REGION_EXTENDED_INFO__legacy:
11034		if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy)
11035			return KERN_INVALID_ARGUMENT;
11036
11037	{
11038		vm_region_extended_info_t	extended;
11039		mach_msg_type_number_t original_count;
11040
11041		extended = (vm_region_extended_info_t) info;
11042
11043		vm_map_lock_read(map);
11044
11045		start = *address;
11046		if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11047			if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11048				vm_map_unlock_read(map);
11049				return(KERN_INVALID_ADDRESS);
11050			}
11051		} else {
11052			entry = tmp_entry;
11053		}
11054		start = entry->vme_start;
11055
11056		extended->protection = entry->protection;
11057		extended->user_tag = entry->alias;
11058		extended->pages_resident = 0;
11059		extended->pages_swapped_out = 0;
11060		extended->pages_shared_now_private = 0;
11061		extended->pages_dirtied = 0;
11062		extended->external_pager = 0;
11063		extended->shadow_depth = 0;
11064
11065		original_count = *count;
11066		if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
11067			*count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
11068		} else {
11069			extended->pages_reusable = 0;
11070			*count = VM_REGION_EXTENDED_INFO_COUNT;
11071		}
11072
11073		vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE, *count);
11074
11075		if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
11076			extended->share_mode = SM_PRIVATE;
11077
11078		if (object_name)
11079			*object_name = IP_NULL;
11080		*address = start;
11081		*size = (entry->vme_end - start);
11082
11083		vm_map_unlock_read(map);
11084		return(KERN_SUCCESS);
11085	}
11086	case VM_REGION_TOP_INFO:
11087	{
11088		vm_region_top_info_t	top;
11089
11090		if (*count < VM_REGION_TOP_INFO_COUNT)
11091			return(KERN_INVALID_ARGUMENT);
11092
11093		top = (vm_region_top_info_t) info;
11094		*count = VM_REGION_TOP_INFO_COUNT;
11095
11096		vm_map_lock_read(map);
11097
11098		start = *address;
11099		if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11100			if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11101				vm_map_unlock_read(map);
11102				return(KERN_INVALID_ADDRESS);
11103			}
11104		} else {
11105			entry = tmp_entry;
11106
11107		}
11108		start = entry->vme_start;
11109
11110		top->private_pages_resident = 0;
11111		top->shared_pages_resident = 0;
11112
11113		vm_map_region_top_walk(entry, top);
11114
11115		if (object_name)
11116			*object_name = IP_NULL;
11117		*address = start;
11118		*size = (entry->vme_end - start);
11119
11120		vm_map_unlock_read(map);
11121		return(KERN_SUCCESS);
11122	}
11123	default:
11124		return(KERN_INVALID_ARGUMENT);
11125	}
11126}
11127
11128#define OBJ_RESIDENT_COUNT(obj, entry_size)				\
11129	MIN((entry_size),						\
11130	    ((obj)->all_reusable ?					\
11131	     (obj)->wired_page_count :					\
11132	     (obj)->resident_page_count - (obj)->reusable_page_count))
11133
11134void
11135vm_map_region_top_walk(
11136        vm_map_entry_t		   entry,
11137	vm_region_top_info_t       top)
11138{
11139
11140	if (entry->object.vm_object == 0 || entry->is_sub_map) {
11141		top->share_mode = SM_EMPTY;
11142		top->ref_count = 0;
11143		top->obj_id = 0;
11144		return;
11145	}
11146
11147	{
11148	        struct	vm_object *obj, *tmp_obj;
11149		int		ref_count;
11150		uint32_t	entry_size;
11151
11152		entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
11153
11154		obj = entry->object.vm_object;
11155
11156		vm_object_lock(obj);
11157
11158		if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11159			ref_count--;
11160
11161		assert(obj->reusable_page_count <= obj->resident_page_count);
11162		if (obj->shadow) {
11163			if (ref_count == 1)
11164				top->private_pages_resident =
11165					OBJ_RESIDENT_COUNT(obj, entry_size);
11166			else
11167				top->shared_pages_resident =
11168					OBJ_RESIDENT_COUNT(obj, entry_size);
11169			top->ref_count  = ref_count;
11170			top->share_mode = SM_COW;
11171
11172			while ((tmp_obj = obj->shadow)) {
11173				vm_object_lock(tmp_obj);
11174				vm_object_unlock(obj);
11175				obj = tmp_obj;
11176
11177				if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11178					ref_count--;
11179
11180				assert(obj->reusable_page_count <= obj->resident_page_count);
11181				top->shared_pages_resident +=
11182					OBJ_RESIDENT_COUNT(obj, entry_size);
11183				top->ref_count += ref_count - 1;
11184			}
11185		} else {
11186			if (entry->superpage_size) {
11187				top->share_mode = SM_LARGE_PAGE;
11188				top->shared_pages_resident = 0;
11189				top->private_pages_resident = entry_size;
11190			} else if (entry->needs_copy) {
11191				top->share_mode = SM_COW;
11192				top->shared_pages_resident =
11193					OBJ_RESIDENT_COUNT(obj, entry_size);
11194			} else {
11195				if (ref_count == 1 ||
11196				    (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
11197					top->share_mode = SM_PRIVATE;
11198						top->private_pages_resident =
11199							OBJ_RESIDENT_COUNT(obj,
11200									   entry_size);
11201				} else {
11202					top->share_mode = SM_SHARED;
11203					top->shared_pages_resident =
11204						OBJ_RESIDENT_COUNT(obj,
11205								  entry_size);
11206				}
11207			}
11208			top->ref_count = ref_count;
11209		}
11210		/* XXX K64: obj_id will be truncated */
11211		top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
11212
11213		vm_object_unlock(obj);
11214	}
11215}
11216
11217void
11218vm_map_region_walk(
11219	vm_map_t		   	map,
11220	vm_map_offset_t			va,
11221	vm_map_entry_t			entry,
11222	vm_object_offset_t		offset,
11223	vm_object_size_t		range,
11224	vm_region_extended_info_t	extended,
11225	boolean_t			look_for_pages,
11226	mach_msg_type_number_t count)
11227{
11228        register struct vm_object *obj, *tmp_obj;
11229	register vm_map_offset_t       last_offset;
11230	register int               i;
11231	register int               ref_count;
11232	struct vm_object	*shadow_object;
11233	int			shadow_depth;
11234
11235	if ((entry->object.vm_object == 0) ||
11236	    (entry->is_sub_map) ||
11237	    (entry->object.vm_object->phys_contiguous &&
11238	     !entry->superpage_size)) {
11239		extended->share_mode = SM_EMPTY;
11240		extended->ref_count = 0;
11241		return;
11242	}
11243
11244	if (entry->superpage_size) {
11245		extended->shadow_depth = 0;
11246		extended->share_mode = SM_LARGE_PAGE;
11247		extended->ref_count = 1;
11248		extended->external_pager = 0;
11249		extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
11250		extended->shadow_depth = 0;
11251		return;
11252	}
11253
11254	{
11255		obj = entry->object.vm_object;
11256
11257		vm_object_lock(obj);
11258
11259		if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11260			ref_count--;
11261
11262		if (look_for_pages) {
11263			for (last_offset = offset + range;
11264			     offset < last_offset;
11265			     offset += PAGE_SIZE_64, va += PAGE_SIZE) {
11266					vm_map_region_look_for_page(map, va, obj,
11267								    offset, ref_count,
11268								    0, extended, count);
11269			}
11270		} else {
11271			shadow_object = obj->shadow;
11272			shadow_depth = 0;
11273
11274			if ( !(obj->pager_trusted) && !(obj->internal))
11275				extended->external_pager = 1;
11276
11277			if (shadow_object != VM_OBJECT_NULL) {
11278				vm_object_lock(shadow_object);
11279				for (;
11280				     shadow_object != VM_OBJECT_NULL;
11281				     shadow_depth++) {
11282					vm_object_t	next_shadow;
11283
11284					if ( !(shadow_object->pager_trusted) &&
11285					     !(shadow_object->internal))
11286						extended->external_pager = 1;
11287
11288					next_shadow = shadow_object->shadow;
11289					if (next_shadow) {
11290						vm_object_lock(next_shadow);
11291					}
11292					vm_object_unlock(shadow_object);
11293					shadow_object = next_shadow;
11294				}
11295			}
11296			extended->shadow_depth = shadow_depth;
11297		}
11298
11299		if (extended->shadow_depth || entry->needs_copy)
11300			extended->share_mode = SM_COW;
11301		else {
11302			if (ref_count == 1)
11303				extended->share_mode = SM_PRIVATE;
11304			else {
11305				if (obj->true_share)
11306					extended->share_mode = SM_TRUESHARED;
11307				else
11308					extended->share_mode = SM_SHARED;
11309			}
11310		}
11311		extended->ref_count = ref_count - extended->shadow_depth;
11312
11313		for (i = 0; i < extended->shadow_depth; i++) {
11314			if ((tmp_obj = obj->shadow) == 0)
11315				break;
11316			vm_object_lock(tmp_obj);
11317			vm_object_unlock(obj);
11318
11319			if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
11320				ref_count--;
11321
11322			extended->ref_count += ref_count;
11323			obj = tmp_obj;
11324		}
11325		vm_object_unlock(obj);
11326
11327		if (extended->share_mode == SM_SHARED) {
11328			register vm_map_entry_t	     cur;
11329			register vm_map_entry_t	     last;
11330			int      my_refs;
11331
11332			obj = entry->object.vm_object;
11333			last = vm_map_to_entry(map);
11334			my_refs = 0;
11335
11336			if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11337				ref_count--;
11338			for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
11339				my_refs += vm_map_region_count_obj_refs(cur, obj);
11340
11341			if (my_refs == ref_count)
11342				extended->share_mode = SM_PRIVATE_ALIASED;
11343			else if (my_refs > 1)
11344				extended->share_mode = SM_SHARED_ALIASED;
11345		}
11346	}
11347}
11348
11349
11350/* object is locked on entry and locked on return */
11351
11352
11353static void
11354vm_map_region_look_for_page(
11355	__unused vm_map_t		map,
11356	__unused vm_map_offset_t	va,
11357	vm_object_t			object,
11358	vm_object_offset_t		offset,
11359	int				max_refcnt,
11360	int				depth,
11361	vm_region_extended_info_t	extended,
11362	mach_msg_type_number_t count)
11363{
11364        register vm_page_t	p;
11365        register vm_object_t	shadow;
11366	register int            ref_count;
11367	vm_object_t		caller_object;
11368	kern_return_t		kr;
11369	shadow = object->shadow;
11370	caller_object = object;
11371
11372
11373	while (TRUE) {
11374
11375		if ( !(object->pager_trusted) && !(object->internal))
11376			extended->external_pager = 1;
11377
11378		if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
11379	        	if (shadow && (max_refcnt == 1))
11380		    		extended->pages_shared_now_private++;
11381
11382			if (!p->fictitious &&
11383			    (p->dirty || pmap_is_modified(p->phys_page)))
11384		    		extended->pages_dirtied++;
11385			else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
11386				if (p->reusable || p->object->all_reusable) {
11387					extended->pages_reusable++;
11388				}
11389			}
11390
11391			extended->pages_resident++;
11392
11393			if(object != caller_object)
11394				vm_object_unlock(object);
11395
11396			return;
11397		}
11398#if	MACH_PAGEMAP
11399		if (object->existence_map) {
11400	    		if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
11401
11402	        		extended->pages_swapped_out++;
11403
11404				if(object != caller_object)
11405					vm_object_unlock(object);
11406
11407				return;
11408	    		}
11409		} else
11410#endif /* MACH_PAGEMAP */
11411		if (object->internal &&
11412		    object->alive &&
11413		    !object->terminating &&
11414		    object->pager_ready) {
11415
11416			if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
11417				if (VM_COMPRESSOR_PAGER_STATE_GET(object,
11418								  offset)
11419				    == VM_EXTERNAL_STATE_EXISTS) {
11420					/* the pager has that page */
11421					extended->pages_swapped_out++;
11422					if (object != caller_object)
11423						vm_object_unlock(object);
11424					return;
11425				}
11426			} else {
11427				memory_object_t pager;
11428
11429				vm_object_paging_begin(object);
11430				pager = object->pager;
11431				vm_object_unlock(object);
11432
11433				kr = memory_object_data_request(
11434					pager,
11435					offset + object->paging_offset,
11436					0, /* just poke the pager */
11437					VM_PROT_READ,
11438					NULL);
11439
11440				vm_object_lock(object);
11441				vm_object_paging_end(object);
11442
11443				if (kr == KERN_SUCCESS) {
11444					/* the pager has that page */
11445					extended->pages_swapped_out++;
11446					if (object != caller_object)
11447						vm_object_unlock(object);
11448					return;
11449				}
11450			}
11451		}
11452
11453		if (shadow) {
11454			vm_object_lock(shadow);
11455
11456			if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
11457			        ref_count--;
11458
11459	    		if (++depth > extended->shadow_depth)
11460	        		extended->shadow_depth = depth;
11461
11462	    		if (ref_count > max_refcnt)
11463	        		max_refcnt = ref_count;
11464
11465			if(object != caller_object)
11466				vm_object_unlock(object);
11467
11468			offset = offset + object->vo_shadow_offset;
11469			object = shadow;
11470			shadow = object->shadow;
11471			continue;
11472		}
11473		if(object != caller_object)
11474			vm_object_unlock(object);
11475		break;
11476	}
11477}
11478
11479static int
11480vm_map_region_count_obj_refs(
11481        vm_map_entry_t    entry,
11482	vm_object_t       object)
11483{
11484        register int ref_count;
11485	register vm_object_t chk_obj;
11486	register vm_object_t tmp_obj;
11487
11488	if (entry->object.vm_object == 0)
11489		return(0);
11490
11491        if (entry->is_sub_map)
11492		return(0);
11493	else {
11494		ref_count = 0;
11495
11496		chk_obj = entry->object.vm_object;
11497		vm_object_lock(chk_obj);
11498
11499		while (chk_obj) {
11500			if (chk_obj == object)
11501				ref_count++;
11502			tmp_obj = chk_obj->shadow;
11503			if (tmp_obj)
11504				vm_object_lock(tmp_obj);
11505			vm_object_unlock(chk_obj);
11506
11507			chk_obj = tmp_obj;
11508		}
11509	}
11510	return(ref_count);
11511}
11512
11513
11514/*
11515 *	Routine:	vm_map_simplify
11516 *
11517 *	Description:
11518 *		Attempt to simplify the map representation in
11519 *		the vicinity of the given starting address.
11520 *	Note:
11521 *		This routine is intended primarily to keep the
11522 *		kernel maps more compact -- they generally don't
11523 *		benefit from the "expand a map entry" technology
11524 *		at allocation time because the adjacent entry
11525 *		is often wired down.
11526 */
11527void
11528vm_map_simplify_entry(
11529	vm_map_t	map,
11530	vm_map_entry_t	this_entry)
11531{
11532	vm_map_entry_t	prev_entry;
11533
11534	counter(c_vm_map_simplify_entry_called++);
11535
11536	prev_entry = this_entry->vme_prev;
11537
11538	if ((this_entry != vm_map_to_entry(map)) &&
11539	    (prev_entry != vm_map_to_entry(map)) &&
11540
11541	    (prev_entry->vme_end == this_entry->vme_start) &&
11542
11543	    (prev_entry->is_sub_map == this_entry->is_sub_map) &&
11544	    (prev_entry->object.vm_object == this_entry->object.vm_object) &&
11545	    ((prev_entry->offset + (prev_entry->vme_end -
11546				    prev_entry->vme_start))
11547	     == this_entry->offset) &&
11548
11549	    (prev_entry->behavior == this_entry->behavior) &&
11550	    (prev_entry->needs_copy == this_entry->needs_copy) &&
11551	    (prev_entry->protection == this_entry->protection) &&
11552	    (prev_entry->max_protection == this_entry->max_protection) &&
11553	    (prev_entry->inheritance == this_entry->inheritance) &&
11554	    (prev_entry->use_pmap == this_entry->use_pmap) &&
11555	    (prev_entry->alias == this_entry->alias) &&
11556	    (prev_entry->no_cache == this_entry->no_cache) &&
11557	    (prev_entry->permanent == this_entry->permanent) &&
11558	    (prev_entry->map_aligned == this_entry->map_aligned) &&
11559	    (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
11560	    (prev_entry->used_for_jit == this_entry->used_for_jit) &&
11561	    /* from_reserved_zone: OK if that field doesn't match */
11562	    (prev_entry->iokit_acct == this_entry->iokit_acct) &&
11563
11564	    (prev_entry->wired_count == this_entry->wired_count) &&
11565	    (prev_entry->user_wired_count == this_entry->user_wired_count) &&
11566
11567	    (prev_entry->in_transition == FALSE) &&
11568	    (this_entry->in_transition == FALSE) &&
11569	    (prev_entry->needs_wakeup == FALSE) &&
11570	    (this_entry->needs_wakeup == FALSE) &&
11571	    (prev_entry->is_shared == FALSE) &&
11572	    (this_entry->is_shared == FALSE) &&
11573	    (prev_entry->superpage_size == FALSE) &&
11574	    (this_entry->superpage_size == FALSE)
11575		) {
11576		vm_map_store_entry_unlink(map, prev_entry);
11577		assert(prev_entry->vme_start < this_entry->vme_end);
11578		if (prev_entry->map_aligned)
11579			assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
11580						   VM_MAP_PAGE_MASK(map)));
11581		this_entry->vme_start = prev_entry->vme_start;
11582		this_entry->offset = prev_entry->offset;
11583		if (prev_entry->is_sub_map) {
11584			vm_map_deallocate(prev_entry->object.sub_map);
11585		} else {
11586			vm_object_deallocate(prev_entry->object.vm_object);
11587		}
11588		vm_map_entry_dispose(map, prev_entry);
11589		SAVE_HINT_MAP_WRITE(map, this_entry);
11590		counter(c_vm_map_simplified++);
11591	}
11592}
11593
11594void
11595vm_map_simplify(
11596	vm_map_t	map,
11597	vm_map_offset_t	start)
11598{
11599	vm_map_entry_t	this_entry;
11600
11601	vm_map_lock(map);
11602	if (vm_map_lookup_entry(map, start, &this_entry)) {
11603		vm_map_simplify_entry(map, this_entry);
11604		vm_map_simplify_entry(map, this_entry->vme_next);
11605	}
11606	counter(c_vm_map_simplify_called++);
11607	vm_map_unlock(map);
11608}
11609
11610static void
11611vm_map_simplify_range(
11612	vm_map_t	map,
11613	vm_map_offset_t	start,
11614	vm_map_offset_t	end)
11615{
11616	vm_map_entry_t	entry;
11617
11618	/*
11619	 * The map should be locked (for "write") by the caller.
11620	 */
11621
11622	if (start >= end) {
11623		/* invalid address range */
11624		return;
11625	}
11626
11627	start = vm_map_trunc_page(start,
11628				  VM_MAP_PAGE_MASK(map));
11629	end = vm_map_round_page(end,
11630				VM_MAP_PAGE_MASK(map));
11631
11632	if (!vm_map_lookup_entry(map, start, &entry)) {
11633		/* "start" is not mapped and "entry" ends before "start" */
11634		if (entry == vm_map_to_entry(map)) {
11635			/* start with first entry in the map */
11636			entry = vm_map_first_entry(map);
11637		} else {
11638			/* start with next entry */
11639			entry = entry->vme_next;
11640		}
11641	}
11642
11643	while (entry != vm_map_to_entry(map) &&
11644	       entry->vme_start <= end) {
11645		/* try and coalesce "entry" with its previous entry */
11646		vm_map_simplify_entry(map, entry);
11647		entry = entry->vme_next;
11648	}
11649}
11650
11651
11652/*
11653 *	Routine:	vm_map_machine_attribute
11654 *	Purpose:
11655 *		Provide machine-specific attributes to mappings,
11656 *		such as cachability etc. for machines that provide
11657 *		them.  NUMA architectures and machines with big/strange
11658 *		caches will use this.
11659 *	Note:
11660 *		Responsibilities for locking and checking are handled here,
11661 *		everything else in the pmap module. If any non-volatile
11662 *		information must be kept, the pmap module should handle
11663 *		it itself. [This assumes that attributes do not
11664 *		need to be inherited, which seems ok to me]
11665 */
11666kern_return_t
11667vm_map_machine_attribute(
11668	vm_map_t			map,
11669	vm_map_offset_t		start,
11670	vm_map_offset_t		end,
11671	vm_machine_attribute_t	attribute,
11672	vm_machine_attribute_val_t* value)		/* IN/OUT */
11673{
11674	kern_return_t	ret;
11675	vm_map_size_t sync_size;
11676	vm_map_entry_t entry;
11677
11678	if (start < vm_map_min(map) || end > vm_map_max(map))
11679		return KERN_INVALID_ADDRESS;
11680
11681	/* Figure how much memory we need to flush (in page increments) */
11682	sync_size = end - start;
11683
11684	vm_map_lock(map);
11685
11686	if (attribute != MATTR_CACHE) {
11687		/* If we don't have to find physical addresses, we */
11688		/* don't have to do an explicit traversal here.    */
11689		ret = pmap_attribute(map->pmap, start, end-start,
11690				     attribute, value);
11691		vm_map_unlock(map);
11692		return ret;
11693	}
11694
11695	ret = KERN_SUCCESS;										/* Assume it all worked */
11696
11697	while(sync_size) {
11698		if (vm_map_lookup_entry(map, start, &entry)) {
11699			vm_map_size_t	sub_size;
11700			if((entry->vme_end - start) > sync_size) {
11701				sub_size = sync_size;
11702				sync_size = 0;
11703			} else {
11704				sub_size = entry->vme_end - start;
11705				sync_size -= sub_size;
11706			}
11707			if(entry->is_sub_map) {
11708				vm_map_offset_t sub_start;
11709				vm_map_offset_t sub_end;
11710
11711				sub_start = (start - entry->vme_start)
11712					+ entry->offset;
11713				sub_end = sub_start + sub_size;
11714				vm_map_machine_attribute(
11715					entry->object.sub_map,
11716					sub_start,
11717					sub_end,
11718					attribute, value);
11719			} else {
11720				if(entry->object.vm_object) {
11721					vm_page_t		m;
11722					vm_object_t		object;
11723					vm_object_t		base_object;
11724					vm_object_t		last_object;
11725					vm_object_offset_t	offset;
11726					vm_object_offset_t	base_offset;
11727					vm_map_size_t		range;
11728					range = sub_size;
11729					offset = (start - entry->vme_start)
11730						+ entry->offset;
11731					base_offset = offset;
11732					object = entry->object.vm_object;
11733					base_object = object;
11734					last_object = NULL;
11735
11736					vm_object_lock(object);
11737
11738					while (range) {
11739						m = vm_page_lookup(
11740							object, offset);
11741
11742						if (m && !m->fictitious) {
11743						        ret =
11744								pmap_attribute_cache_sync(
11745									m->phys_page,
11746									PAGE_SIZE,
11747									attribute, value);
11748
11749						} else if (object->shadow) {
11750						        offset = offset + object->vo_shadow_offset;
11751							last_object = object;
11752							object = object->shadow;
11753							vm_object_lock(last_object->shadow);
11754							vm_object_unlock(last_object);
11755							continue;
11756						}
11757						range -= PAGE_SIZE;
11758
11759						if (base_object != object) {
11760						        vm_object_unlock(object);
11761							vm_object_lock(base_object);
11762							object = base_object;
11763						}
11764						/* Bump to the next page */
11765						base_offset += PAGE_SIZE;
11766						offset = base_offset;
11767					}
11768					vm_object_unlock(object);
11769				}
11770			}
11771			start += sub_size;
11772		} else {
11773			vm_map_unlock(map);
11774			return KERN_FAILURE;
11775		}
11776
11777	}
11778
11779	vm_map_unlock(map);
11780
11781	return ret;
11782}
11783
11784/*
11785 *	vm_map_behavior_set:
11786 *
11787 *	Sets the paging reference behavior of the specified address
11788 *	range in the target map.  Paging reference behavior affects
11789 *	how pagein operations resulting from faults on the map will be
11790 *	clustered.
11791 */
11792kern_return_t
11793vm_map_behavior_set(
11794	vm_map_t	map,
11795	vm_map_offset_t	start,
11796	vm_map_offset_t	end,
11797	vm_behavior_t	new_behavior)
11798{
11799	register vm_map_entry_t	entry;
11800	vm_map_entry_t	temp_entry;
11801
11802	XPR(XPR_VM_MAP,
11803	    "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
11804	    map, start, end, new_behavior, 0);
11805
11806	if (start > end ||
11807	    start < vm_map_min(map) ||
11808	    end > vm_map_max(map)) {
11809		return KERN_NO_SPACE;
11810	}
11811
11812	switch (new_behavior) {
11813
11814	/*
11815	 * This first block of behaviors all set a persistent state on the specified
11816	 * memory range.  All we have to do here is to record the desired behavior
11817	 * in the vm_map_entry_t's.
11818	 */
11819
11820	case VM_BEHAVIOR_DEFAULT:
11821	case VM_BEHAVIOR_RANDOM:
11822	case VM_BEHAVIOR_SEQUENTIAL:
11823	case VM_BEHAVIOR_RSEQNTL:
11824	case VM_BEHAVIOR_ZERO_WIRED_PAGES:
11825		vm_map_lock(map);
11826
11827		/*
11828		 *	The entire address range must be valid for the map.
11829		 * 	Note that vm_map_range_check() does a
11830		 *	vm_map_lookup_entry() internally and returns the
11831		 *	entry containing the start of the address range if
11832		 *	the entire range is valid.
11833		 */
11834		if (vm_map_range_check(map, start, end, &temp_entry)) {
11835			entry = temp_entry;
11836			vm_map_clip_start(map, entry, start);
11837		}
11838		else {
11839			vm_map_unlock(map);
11840			return(KERN_INVALID_ADDRESS);
11841		}
11842
11843		while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
11844			vm_map_clip_end(map, entry, end);
11845			if (entry->is_sub_map) {
11846				assert(!entry->use_pmap);
11847			}
11848
11849			if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
11850				entry->zero_wired_pages = TRUE;
11851			} else {
11852				entry->behavior = new_behavior;
11853			}
11854			entry = entry->vme_next;
11855		}
11856
11857		vm_map_unlock(map);
11858		break;
11859
11860	/*
11861	 * The rest of these are different from the above in that they cause
11862	 * an immediate action to take place as opposed to setting a behavior that
11863	 * affects future actions.
11864	 */
11865
11866	case VM_BEHAVIOR_WILLNEED:
11867		return vm_map_willneed(map, start, end);
11868
11869	case VM_BEHAVIOR_DONTNEED:
11870		return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
11871
11872	case VM_BEHAVIOR_FREE:
11873		return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
11874
11875	case VM_BEHAVIOR_REUSABLE:
11876		return vm_map_reusable_pages(map, start, end);
11877
11878	case VM_BEHAVIOR_REUSE:
11879		return vm_map_reuse_pages(map, start, end);
11880
11881	case VM_BEHAVIOR_CAN_REUSE:
11882		return vm_map_can_reuse(map, start, end);
11883
11884	default:
11885		return(KERN_INVALID_ARGUMENT);
11886	}
11887
11888	return(KERN_SUCCESS);
11889}
11890
11891
11892/*
11893 * Internals for madvise(MADV_WILLNEED) system call.
11894 *
11895 * The present implementation is to do a read-ahead if the mapping corresponds
11896 * to a mapped regular file.  If it's an anonymous mapping, then we do nothing
11897 * and basically ignore the "advice" (which we are always free to do).
11898 */
11899
11900
11901static kern_return_t
11902vm_map_willneed(
11903	vm_map_t	map,
11904	vm_map_offset_t	start,
11905	vm_map_offset_t	end
11906)
11907{
11908	vm_map_entry_t 			entry;
11909	vm_object_t			object;
11910	memory_object_t			pager;
11911	struct vm_object_fault_info	fault_info;
11912	kern_return_t			kr;
11913	vm_object_size_t		len;
11914	vm_object_offset_t		offset;
11915
11916	/*
11917	 * Fill in static values in fault_info.  Several fields get ignored by the code
11918	 * we call, but we'll fill them in anyway since uninitialized fields are bad
11919	 * when it comes to future backwards compatibility.
11920	 */
11921
11922	fault_info.interruptible = THREAD_UNINT;		/* ignored value */
11923	fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
11924	fault_info.no_cache      = FALSE;			/* ignored value */
11925	fault_info.stealth	 = TRUE;
11926	fault_info.io_sync = FALSE;
11927	fault_info.cs_bypass = FALSE;
11928	fault_info.mark_zf_absent = FALSE;
11929	fault_info.batch_pmap_op = FALSE;
11930
11931	/*
11932	 * The MADV_WILLNEED operation doesn't require any changes to the
11933	 * vm_map_entry_t's, so the read lock is sufficient.
11934	 */
11935
11936	vm_map_lock_read(map);
11937
11938	/*
11939	 * The madvise semantics require that the address range be fully
11940	 * allocated with no holes.  Otherwise, we're required to return
11941	 * an error.
11942	 */
11943
11944	if (! vm_map_range_check(map, start, end, &entry)) {
11945		vm_map_unlock_read(map);
11946		return KERN_INVALID_ADDRESS;
11947	}
11948
11949	/*
11950	 * Examine each vm_map_entry_t in the range.
11951	 */
11952	for (; entry != vm_map_to_entry(map) && start < end; ) {
11953
11954		/*
11955		 * The first time through, the start address could be anywhere
11956		 * within the vm_map_entry we found.  So adjust the offset to
11957		 * correspond.  After that, the offset will always be zero to
11958		 * correspond to the beginning of the current vm_map_entry.
11959		 */
11960		offset = (start - entry->vme_start) + entry->offset;
11961
11962		/*
11963		 * Set the length so we don't go beyond the end of the
11964		 * map_entry or beyond the end of the range we were given.
11965		 * This range could span also multiple map entries all of which
11966		 * map different files, so make sure we only do the right amount
11967		 * of I/O for each object.  Note that it's possible for there
11968		 * to be multiple map entries all referring to the same object
11969		 * but with different page permissions, but it's not worth
11970		 * trying to optimize that case.
11971		 */
11972		len = MIN(entry->vme_end - start, end - start);
11973
11974		if ((vm_size_t) len != len) {
11975			/* 32-bit overflow */
11976			len = (vm_size_t) (0 - PAGE_SIZE);
11977		}
11978		fault_info.cluster_size = (vm_size_t) len;
11979		fault_info.lo_offset    = offset;
11980		fault_info.hi_offset    = offset + len;
11981		fault_info.user_tag     = entry->alias;
11982		fault_info.pmap_options = 0;
11983		if (entry->iokit_acct ||
11984		    (!entry->is_sub_map && !entry->use_pmap)) {
11985			fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11986		}
11987
11988		/*
11989		 * If there's no read permission to this mapping, then just
11990		 * skip it.
11991		 */
11992		if ((entry->protection & VM_PROT_READ) == 0) {
11993			entry = entry->vme_next;
11994			start = entry->vme_start;
11995			continue;
11996		}
11997
11998		/*
11999		 * Find the file object backing this map entry.  If there is
12000		 * none, then we simply ignore the "will need" advice for this
12001		 * entry and go on to the next one.
12002		 */
12003		if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
12004			entry = entry->vme_next;
12005			start = entry->vme_start;
12006			continue;
12007		}
12008
12009		/*
12010		 * The data_request() could take a long time, so let's
12011		 * release the map lock to avoid blocking other threads.
12012		 */
12013		vm_map_unlock_read(map);
12014
12015		vm_object_paging_begin(object);
12016		pager = object->pager;
12017		vm_object_unlock(object);
12018
12019		/*
12020		 * Get the data from the object asynchronously.
12021		 *
12022		 * Note that memory_object_data_request() places limits on the
12023		 * amount of I/O it will do.  Regardless of the len we
12024		 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
12025		 * silently truncates the len to that size.  This isn't
12026		 * necessarily bad since madvise shouldn't really be used to
12027		 * page in unlimited amounts of data.  Other Unix variants
12028		 * limit the willneed case as well.  If this turns out to be an
12029		 * issue for developers, then we can always adjust the policy
12030		 * here and still be backwards compatible since this is all
12031		 * just "advice".
12032		 */
12033		kr = memory_object_data_request(
12034			pager,
12035			offset + object->paging_offset,
12036			0,	/* ignored */
12037			VM_PROT_READ,
12038			(memory_object_fault_info_t)&fault_info);
12039
12040		vm_object_lock(object);
12041		vm_object_paging_end(object);
12042		vm_object_unlock(object);
12043
12044		/*
12045		 * If we couldn't do the I/O for some reason, just give up on
12046		 * the madvise.  We still return success to the user since
12047		 * madvise isn't supposed to fail when the advice can't be
12048		 * taken.
12049		 */
12050		if (kr != KERN_SUCCESS) {
12051			return KERN_SUCCESS;
12052		}
12053
12054		start += len;
12055		if (start >= end) {
12056			/* done */
12057			return KERN_SUCCESS;
12058		}
12059
12060		/* look up next entry */
12061		vm_map_lock_read(map);
12062		if (! vm_map_lookup_entry(map, start, &entry)) {
12063			/*
12064			 * There's a new hole in the address range.
12065			 */
12066			vm_map_unlock_read(map);
12067			return KERN_INVALID_ADDRESS;
12068		}
12069	}
12070
12071	vm_map_unlock_read(map);
12072	return KERN_SUCCESS;
12073}
12074
12075static boolean_t
12076vm_map_entry_is_reusable(
12077	vm_map_entry_t entry)
12078{
12079	vm_object_t object;
12080
12081	switch (entry->alias) {
12082	case VM_MEMORY_MALLOC:
12083	case VM_MEMORY_MALLOC_SMALL:
12084	case VM_MEMORY_MALLOC_LARGE:
12085	case VM_MEMORY_REALLOC:
12086	case VM_MEMORY_MALLOC_TINY:
12087	case VM_MEMORY_MALLOC_LARGE_REUSABLE:
12088	case VM_MEMORY_MALLOC_LARGE_REUSED:
12089		/*
12090		 * This is a malloc() memory region: check if it's still
12091		 * in its original state and can be re-used for more
12092		 * malloc() allocations.
12093		 */
12094		break;
12095	default:
12096		/*
12097		 * Not a malloc() memory region: let the caller decide if
12098		 * it's re-usable.
12099		 */
12100		return TRUE;
12101	}
12102
12103	if (entry->is_shared ||
12104	    entry->is_sub_map ||
12105	    entry->in_transition ||
12106	    entry->protection != VM_PROT_DEFAULT ||
12107	    entry->max_protection != VM_PROT_ALL ||
12108	    entry->inheritance != VM_INHERIT_DEFAULT ||
12109	    entry->no_cache ||
12110	    entry->permanent ||
12111	    entry->superpage_size != FALSE ||
12112	    entry->zero_wired_pages ||
12113	    entry->wired_count != 0 ||
12114	    entry->user_wired_count != 0) {
12115		return FALSE;
12116	}
12117
12118	object = entry->object.vm_object;
12119	if (object == VM_OBJECT_NULL) {
12120		return TRUE;
12121	}
12122	if (
12123#if 0
12124		/*
12125		 * Let's proceed even if the VM object is potentially
12126		 * shared.
12127		 * We check for this later when processing the actual
12128		 * VM pages, so the contents will be safe if shared.
12129		 *
12130		 * But we can still mark this memory region as "reusable" to
12131		 * acknowledge that the caller did let us know that the memory
12132		 * could be re-used and should not be penalized for holding
12133		 * on to it.  This allows its "resident size" to not include
12134		 * the reusable range.
12135		 */
12136	    object->ref_count == 1 &&
12137#endif
12138	    object->wired_page_count == 0 &&
12139	    object->copy == VM_OBJECT_NULL &&
12140	    object->shadow == VM_OBJECT_NULL &&
12141	    object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
12142	    object->internal &&
12143	    !object->true_share &&
12144	    object->wimg_bits == VM_WIMG_USE_DEFAULT &&
12145	    !object->code_signed) {
12146		return TRUE;
12147	}
12148	return FALSE;
12149
12150
12151}
12152
12153static kern_return_t
12154vm_map_reuse_pages(
12155	vm_map_t	map,
12156	vm_map_offset_t	start,
12157	vm_map_offset_t	end)
12158{
12159	vm_map_entry_t 			entry;
12160	vm_object_t			object;
12161	vm_object_offset_t		start_offset, end_offset;
12162
12163	/*
12164	 * The MADV_REUSE operation doesn't require any changes to the
12165	 * vm_map_entry_t's, so the read lock is sufficient.
12166	 */
12167
12168	vm_map_lock_read(map);
12169
12170	/*
12171	 * The madvise semantics require that the address range be fully
12172	 * allocated with no holes.  Otherwise, we're required to return
12173	 * an error.
12174	 */
12175
12176	if (!vm_map_range_check(map, start, end, &entry)) {
12177		vm_map_unlock_read(map);
12178		vm_page_stats_reusable.reuse_pages_failure++;
12179		return KERN_INVALID_ADDRESS;
12180	}
12181
12182	/*
12183	 * Examine each vm_map_entry_t in the range.
12184	 */
12185	for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
12186	     entry = entry->vme_next) {
12187		/*
12188		 * Sanity check on the VM map entry.
12189		 */
12190		if (! vm_map_entry_is_reusable(entry)) {
12191			vm_map_unlock_read(map);
12192			vm_page_stats_reusable.reuse_pages_failure++;
12193			return KERN_INVALID_ADDRESS;
12194		}
12195
12196		/*
12197		 * The first time through, the start address could be anywhere
12198		 * within the vm_map_entry we found.  So adjust the offset to
12199		 * correspond.
12200		 */
12201		if (entry->vme_start < start) {
12202			start_offset = start - entry->vme_start;
12203		} else {
12204			start_offset = 0;
12205		}
12206		end_offset = MIN(end, entry->vme_end) - entry->vme_start;
12207		start_offset += entry->offset;
12208		end_offset += entry->offset;
12209
12210		object = entry->object.vm_object;
12211		if (object != VM_OBJECT_NULL) {
12212			vm_object_lock(object);
12213			vm_object_reuse_pages(object, start_offset, end_offset,
12214					      TRUE);
12215			vm_object_unlock(object);
12216		}
12217
12218		if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
12219			/*
12220			 * XXX
12221			 * We do not hold the VM map exclusively here.
12222			 * The "alias" field is not that critical, so it's
12223			 * safe to update it here, as long as it is the only
12224			 * one that can be modified while holding the VM map
12225			 * "shared".
12226			 */
12227			entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED;
12228		}
12229	}
12230
12231	vm_map_unlock_read(map);
12232	vm_page_stats_reusable.reuse_pages_success++;
12233	return KERN_SUCCESS;
12234}
12235
12236
12237static kern_return_t
12238vm_map_reusable_pages(
12239	vm_map_t	map,
12240	vm_map_offset_t	start,
12241	vm_map_offset_t	end)
12242{
12243	vm_map_entry_t 			entry;
12244	vm_object_t			object;
12245	vm_object_offset_t		start_offset, end_offset;
12246
12247	/*
12248	 * The MADV_REUSABLE operation doesn't require any changes to the
12249	 * vm_map_entry_t's, so the read lock is sufficient.
12250	 */
12251
12252	vm_map_lock_read(map);
12253
12254	/*
12255	 * The madvise semantics require that the address range be fully
12256	 * allocated with no holes.  Otherwise, we're required to return
12257	 * an error.
12258	 */
12259
12260	if (!vm_map_range_check(map, start, end, &entry)) {
12261		vm_map_unlock_read(map);
12262		vm_page_stats_reusable.reusable_pages_failure++;
12263		return KERN_INVALID_ADDRESS;
12264	}
12265
12266	/*
12267	 * Examine each vm_map_entry_t in the range.
12268	 */
12269	for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
12270	     entry = entry->vme_next) {
12271		int kill_pages = 0;
12272
12273		/*
12274		 * Sanity check on the VM map entry.
12275		 */
12276		if (! vm_map_entry_is_reusable(entry)) {
12277			vm_map_unlock_read(map);
12278			vm_page_stats_reusable.reusable_pages_failure++;
12279			return KERN_INVALID_ADDRESS;
12280		}
12281
12282		/*
12283		 * The first time through, the start address could be anywhere
12284		 * within the vm_map_entry we found.  So adjust the offset to
12285		 * correspond.
12286		 */
12287		if (entry->vme_start < start) {
12288			start_offset = start - entry->vme_start;
12289		} else {
12290			start_offset = 0;
12291		}
12292		end_offset = MIN(end, entry->vme_end) - entry->vme_start;
12293		start_offset += entry->offset;
12294		end_offset += entry->offset;
12295
12296		object = entry->object.vm_object;
12297		if (object == VM_OBJECT_NULL)
12298			continue;
12299
12300
12301		vm_object_lock(object);
12302		if (object->ref_count == 1 &&
12303		    !object->shadow &&
12304		    /*
12305		     * "iokit_acct" entries are billed for their virtual size
12306		     * (rather than for their resident pages only), so they
12307		     * wouldn't benefit from making pages reusable, and it
12308		     * would be hard to keep track of pages that are both
12309		     * "iokit_acct" and "reusable" in the pmap stats and ledgers.
12310		     */
12311		    !(entry->iokit_acct ||
12312		      (!entry->is_sub_map && !entry->use_pmap)))
12313			kill_pages = 1;
12314		else
12315			kill_pages = -1;
12316		if (kill_pages != -1) {
12317			vm_object_deactivate_pages(object,
12318						   start_offset,
12319						   end_offset - start_offset,
12320						   kill_pages,
12321						   TRUE /*reusable_pages*/);
12322		} else {
12323			vm_page_stats_reusable.reusable_pages_shared++;
12324		}
12325		vm_object_unlock(object);
12326
12327		if (entry->alias == VM_MEMORY_MALLOC_LARGE ||
12328		    entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) {
12329			/*
12330			 * XXX
12331			 * We do not hold the VM map exclusively here.
12332			 * The "alias" field is not that critical, so it's
12333			 * safe to update it here, as long as it is the only
12334			 * one that can be modified while holding the VM map
12335			 * "shared".
12336			 */
12337			entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE;
12338		}
12339	}
12340
12341	vm_map_unlock_read(map);
12342	vm_page_stats_reusable.reusable_pages_success++;
12343	return KERN_SUCCESS;
12344}
12345
12346
12347static kern_return_t
12348vm_map_can_reuse(
12349	vm_map_t	map,
12350	vm_map_offset_t	start,
12351	vm_map_offset_t	end)
12352{
12353	vm_map_entry_t 			entry;
12354
12355	/*
12356	 * The MADV_REUSABLE operation doesn't require any changes to the
12357	 * vm_map_entry_t's, so the read lock is sufficient.
12358	 */
12359
12360	vm_map_lock_read(map);
12361
12362	/*
12363	 * The madvise semantics require that the address range be fully
12364	 * allocated with no holes.  Otherwise, we're required to return
12365	 * an error.
12366	 */
12367
12368	if (!vm_map_range_check(map, start, end, &entry)) {
12369		vm_map_unlock_read(map);
12370		vm_page_stats_reusable.can_reuse_failure++;
12371		return KERN_INVALID_ADDRESS;
12372	}
12373
12374	/*
12375	 * Examine each vm_map_entry_t in the range.
12376	 */
12377	for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
12378	     entry = entry->vme_next) {
12379		/*
12380		 * Sanity check on the VM map entry.
12381		 */
12382		if (! vm_map_entry_is_reusable(entry)) {
12383			vm_map_unlock_read(map);
12384			vm_page_stats_reusable.can_reuse_failure++;
12385			return KERN_INVALID_ADDRESS;
12386		}
12387	}
12388
12389	vm_map_unlock_read(map);
12390	vm_page_stats_reusable.can_reuse_success++;
12391	return KERN_SUCCESS;
12392}
12393
12394
12395/*
12396 *	Routine:	vm_map_entry_insert
12397 *
12398 *	Descritpion:	This routine inserts a new vm_entry in a locked map.
12399 */
12400vm_map_entry_t
12401vm_map_entry_insert(
12402	vm_map_t		map,
12403	vm_map_entry_t		insp_entry,
12404	vm_map_offset_t		start,
12405	vm_map_offset_t		end,
12406	vm_object_t		object,
12407	vm_object_offset_t	offset,
12408	boolean_t		needs_copy,
12409	boolean_t		is_shared,
12410	boolean_t		in_transition,
12411	vm_prot_t		cur_protection,
12412	vm_prot_t		max_protection,
12413	vm_behavior_t		behavior,
12414	vm_inherit_t		inheritance,
12415	unsigned		wired_count,
12416	boolean_t		no_cache,
12417	boolean_t		permanent,
12418	unsigned int		superpage_size,
12419	boolean_t		clear_map_aligned,
12420	boolean_t		is_submap)
12421{
12422	vm_map_entry_t	new_entry;
12423
12424	assert(insp_entry != (vm_map_entry_t)0);
12425
12426	new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
12427
12428	if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
12429		new_entry->map_aligned = TRUE;
12430	} else {
12431		new_entry->map_aligned = FALSE;
12432	}
12433	if (clear_map_aligned &&
12434	    (! VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
12435	     ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
12436		new_entry->map_aligned = FALSE;
12437	}
12438
12439	new_entry->vme_start = start;
12440	new_entry->vme_end = end;
12441	assert(page_aligned(new_entry->vme_start));
12442	assert(page_aligned(new_entry->vme_end));
12443	if (new_entry->map_aligned) {
12444		assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
12445					   VM_MAP_PAGE_MASK(map)));
12446		assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
12447					   VM_MAP_PAGE_MASK(map)));
12448	}
12449	assert(new_entry->vme_start < new_entry->vme_end);
12450
12451	new_entry->object.vm_object = object;
12452	new_entry->offset = offset;
12453	new_entry->is_shared = is_shared;
12454	new_entry->is_sub_map = is_submap;
12455	new_entry->needs_copy = needs_copy;
12456	new_entry->in_transition = in_transition;
12457	new_entry->needs_wakeup = FALSE;
12458	new_entry->inheritance = inheritance;
12459	new_entry->protection = cur_protection;
12460	new_entry->max_protection = max_protection;
12461	new_entry->behavior = behavior;
12462	new_entry->wired_count = wired_count;
12463	new_entry->user_wired_count = 0;
12464	if (is_submap) {
12465		/*
12466		 * submap: "use_pmap" means "nested".
12467		 * default: false.
12468		 */
12469		new_entry->use_pmap = FALSE;
12470	} else {
12471		/*
12472		 * object: "use_pmap" means "use pmap accounting" for footprint.
12473		 * default: true.
12474		 */
12475		new_entry->use_pmap = TRUE;
12476	}
12477	new_entry->alias = 0;
12478	new_entry->zero_wired_pages = FALSE;
12479	new_entry->no_cache = no_cache;
12480	new_entry->permanent = permanent;
12481	if (superpage_size)
12482		new_entry->superpage_size = TRUE;
12483	else
12484		new_entry->superpage_size = FALSE;
12485	new_entry->used_for_jit = FALSE;
12486	new_entry->iokit_acct = FALSE;
12487
12488	/*
12489	 *	Insert the new entry into the list.
12490	 */
12491
12492	vm_map_store_entry_link(map, insp_entry, new_entry);
12493	map->size += end - start;
12494
12495	/*
12496	 *	Update the free space hint and the lookup hint.
12497	 */
12498
12499	SAVE_HINT_MAP_WRITE(map, new_entry);
12500	return new_entry;
12501}
12502
12503/*
12504 *	Routine:	vm_map_remap_extract
12505 *
12506 *	Descritpion:	This routine returns a vm_entry list from a map.
12507 */
12508static kern_return_t
12509vm_map_remap_extract(
12510	vm_map_t		map,
12511	vm_map_offset_t		addr,
12512	vm_map_size_t		size,
12513	boolean_t		copy,
12514	struct vm_map_header	*map_header,
12515	vm_prot_t		*cur_protection,
12516	vm_prot_t		*max_protection,
12517	/* What, no behavior? */
12518	vm_inherit_t		inheritance,
12519	boolean_t		pageable)
12520{
12521	kern_return_t		result;
12522	vm_map_size_t		mapped_size;
12523	vm_map_size_t		tmp_size;
12524	vm_map_entry_t		src_entry;     /* result of last map lookup */
12525	vm_map_entry_t		new_entry;
12526	vm_object_offset_t	offset;
12527	vm_map_offset_t		map_address;
12528	vm_map_offset_t		src_start;     /* start of entry to map */
12529	vm_map_offset_t		src_end;       /* end of region to be mapped */
12530	vm_object_t		object;
12531	vm_map_version_t	version;
12532	boolean_t		src_needs_copy;
12533	boolean_t		new_entry_needs_copy;
12534
12535	assert(map != VM_MAP_NULL);
12536	assert(size != 0);
12537	assert(size == vm_map_round_page(size, PAGE_MASK));
12538	assert(inheritance == VM_INHERIT_NONE ||
12539	       inheritance == VM_INHERIT_COPY ||
12540	       inheritance == VM_INHERIT_SHARE);
12541
12542	/*
12543	 *	Compute start and end of region.
12544	 */
12545	src_start = vm_map_trunc_page(addr, PAGE_MASK);
12546	src_end = vm_map_round_page(src_start + size, PAGE_MASK);
12547
12548
12549	/*
12550	 *	Initialize map_header.
12551	 */
12552	map_header->links.next = (struct vm_map_entry *)&map_header->links;
12553	map_header->links.prev = (struct vm_map_entry *)&map_header->links;
12554	map_header->nentries = 0;
12555	map_header->entries_pageable = pageable;
12556	map_header->page_shift = PAGE_SHIFT;
12557
12558	vm_map_store_init( map_header );
12559
12560	*cur_protection = VM_PROT_ALL;
12561	*max_protection = VM_PROT_ALL;
12562
12563	map_address = 0;
12564	mapped_size = 0;
12565	result = KERN_SUCCESS;
12566
12567	/*
12568	 *	The specified source virtual space might correspond to
12569	 *	multiple map entries, need to loop on them.
12570	 */
12571	vm_map_lock(map);
12572	while (mapped_size != size) {
12573		vm_map_size_t	entry_size;
12574
12575		/*
12576		 *	Find the beginning of the region.
12577		 */
12578		if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
12579			result = KERN_INVALID_ADDRESS;
12580			break;
12581		}
12582
12583		if (src_start < src_entry->vme_start ||
12584		    (mapped_size && src_start != src_entry->vme_start)) {
12585			result = KERN_INVALID_ADDRESS;
12586			break;
12587		}
12588
12589		tmp_size = size - mapped_size;
12590		if (src_end > src_entry->vme_end)
12591			tmp_size -= (src_end - src_entry->vme_end);
12592
12593		entry_size = (vm_map_size_t)(src_entry->vme_end -
12594					     src_entry->vme_start);
12595
12596		if(src_entry->is_sub_map) {
12597			vm_map_reference(src_entry->object.sub_map);
12598			object = VM_OBJECT_NULL;
12599		} else {
12600			object = src_entry->object.vm_object;
12601			if (src_entry->iokit_acct) {
12602				/*
12603				 * This entry uses "IOKit accounting".
12604				 */
12605			} else if (object != VM_OBJECT_NULL &&
12606				   object->purgable != VM_PURGABLE_DENY) {
12607				/*
12608				 * Purgeable objects have their own accounting:
12609				 * no pmap accounting for them.
12610				 */
12611				assert(!src_entry->use_pmap);
12612			} else {
12613				/*
12614				 * Not IOKit or purgeable:
12615				 * must be accounted by pmap stats.
12616				 */
12617				assert(src_entry->use_pmap);
12618			}
12619
12620			if (object == VM_OBJECT_NULL) {
12621				object = vm_object_allocate(entry_size);
12622				src_entry->offset = 0;
12623				src_entry->object.vm_object = object;
12624			} else if (object->copy_strategy !=
12625				   MEMORY_OBJECT_COPY_SYMMETRIC) {
12626				/*
12627				 *	We are already using an asymmetric
12628				 *	copy, and therefore we already have
12629				 *	the right object.
12630				 */
12631				assert(!src_entry->needs_copy);
12632			} else if (src_entry->needs_copy || object->shadowed ||
12633				   (object->internal && !object->true_share &&
12634				    !src_entry->is_shared &&
12635				    object->vo_size > entry_size)) {
12636
12637				vm_object_shadow(&src_entry->object.vm_object,
12638						 &src_entry->offset,
12639						 entry_size);
12640
12641				if (!src_entry->needs_copy &&
12642				    (src_entry->protection & VM_PROT_WRITE)) {
12643				        vm_prot_t prot;
12644
12645				        prot = src_entry->protection & ~VM_PROT_WRITE;
12646
12647					if (override_nx(map, src_entry->alias) && prot)
12648					        prot |= VM_PROT_EXECUTE;
12649
12650					if(map->mapped_in_other_pmaps) {
12651						vm_object_pmap_protect(
12652							src_entry->object.vm_object,
12653							src_entry->offset,
12654							entry_size,
12655							PMAP_NULL,
12656							src_entry->vme_start,
12657							prot);
12658					} else {
12659						pmap_protect(vm_map_pmap(map),
12660							     src_entry->vme_start,
12661							     src_entry->vme_end,
12662							     prot);
12663					}
12664				}
12665
12666				object = src_entry->object.vm_object;
12667				src_entry->needs_copy = FALSE;
12668			}
12669
12670
12671			vm_object_lock(object);
12672			vm_object_reference_locked(object); /* object ref. for new entry */
12673			if (object->copy_strategy ==
12674			    MEMORY_OBJECT_COPY_SYMMETRIC) {
12675				object->copy_strategy =
12676					MEMORY_OBJECT_COPY_DELAY;
12677			}
12678			vm_object_unlock(object);
12679		}
12680
12681		offset = src_entry->offset + (src_start - src_entry->vme_start);
12682
12683		new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
12684		vm_map_entry_copy(new_entry, src_entry);
12685		if (new_entry->is_sub_map) {
12686			/* clr address space specifics */
12687			new_entry->use_pmap = FALSE;
12688		}
12689
12690		new_entry->map_aligned = FALSE;
12691
12692		new_entry->vme_start = map_address;
12693		new_entry->vme_end = map_address + tmp_size;
12694		assert(new_entry->vme_start < new_entry->vme_end);
12695		new_entry->inheritance = inheritance;
12696		new_entry->offset = offset;
12697
12698		/*
12699		 * The new region has to be copied now if required.
12700		 */
12701	RestartCopy:
12702		if (!copy) {
12703			/*
12704			 * Cannot allow an entry describing a JIT
12705			 * region to be shared across address spaces.
12706			 */
12707			if (src_entry->used_for_jit == TRUE) {
12708				result = KERN_INVALID_ARGUMENT;
12709				break;
12710			}
12711			src_entry->is_shared = TRUE;
12712			new_entry->is_shared = TRUE;
12713			if (!(new_entry->is_sub_map))
12714				new_entry->needs_copy = FALSE;
12715
12716		} else if (src_entry->is_sub_map) {
12717			/* make this a COW sub_map if not already */
12718			new_entry->needs_copy = TRUE;
12719			object = VM_OBJECT_NULL;
12720		} else if (src_entry->wired_count == 0 &&
12721			   vm_object_copy_quickly(&new_entry->object.vm_object,
12722						  new_entry->offset,
12723						  (new_entry->vme_end -
12724						   new_entry->vme_start),
12725						  &src_needs_copy,
12726						  &new_entry_needs_copy)) {
12727
12728			new_entry->needs_copy = new_entry_needs_copy;
12729			new_entry->is_shared = FALSE;
12730
12731			/*
12732			 * Handle copy_on_write semantics.
12733			 */
12734			if (src_needs_copy && !src_entry->needs_copy) {
12735			        vm_prot_t prot;
12736
12737				prot = src_entry->protection & ~VM_PROT_WRITE;
12738
12739				if (override_nx(map, src_entry->alias) && prot)
12740				        prot |= VM_PROT_EXECUTE;
12741
12742				vm_object_pmap_protect(object,
12743						       offset,
12744						       entry_size,
12745						       ((src_entry->is_shared
12746							 || map->mapped_in_other_pmaps) ?
12747							PMAP_NULL : map->pmap),
12748						       src_entry->vme_start,
12749						       prot);
12750
12751				src_entry->needs_copy = TRUE;
12752			}
12753			/*
12754			 * Throw away the old object reference of the new entry.
12755			 */
12756			vm_object_deallocate(object);
12757
12758		} else {
12759			new_entry->is_shared = FALSE;
12760
12761			/*
12762			 * The map can be safely unlocked since we
12763			 * already hold a reference on the object.
12764			 *
12765			 * Record the timestamp of the map for later
12766			 * verification, and unlock the map.
12767			 */
12768			version.main_timestamp = map->timestamp;
12769			vm_map_unlock(map); 	/* Increments timestamp once! */
12770
12771			/*
12772			 * Perform the copy.
12773			 */
12774			if (src_entry->wired_count > 0) {
12775				vm_object_lock(object);
12776				result = vm_object_copy_slowly(
12777					object,
12778					offset,
12779					entry_size,
12780					THREAD_UNINT,
12781					&new_entry->object.vm_object);
12782
12783				new_entry->offset = 0;
12784				new_entry->needs_copy = FALSE;
12785			} else {
12786				result = vm_object_copy_strategically(
12787					object,
12788					offset,
12789					entry_size,
12790					&new_entry->object.vm_object,
12791					&new_entry->offset,
12792					&new_entry_needs_copy);
12793
12794				new_entry->needs_copy = new_entry_needs_copy;
12795			}
12796
12797			/*
12798			 * Throw away the old object reference of the new entry.
12799			 */
12800			vm_object_deallocate(object);
12801
12802			if (result != KERN_SUCCESS &&
12803			    result != KERN_MEMORY_RESTART_COPY) {
12804				_vm_map_entry_dispose(map_header, new_entry);
12805				break;
12806			}
12807
12808			/*
12809			 * Verify that the map has not substantially
12810			 * changed while the copy was being made.
12811			 */
12812
12813			vm_map_lock(map);
12814			if (version.main_timestamp + 1 != map->timestamp) {
12815				/*
12816				 * Simple version comparison failed.
12817				 *
12818				 * Retry the lookup and verify that the
12819				 * same object/offset are still present.
12820				 */
12821				vm_object_deallocate(new_entry->
12822						     object.vm_object);
12823				_vm_map_entry_dispose(map_header, new_entry);
12824				if (result == KERN_MEMORY_RESTART_COPY)
12825					result = KERN_SUCCESS;
12826				continue;
12827			}
12828
12829			if (result == KERN_MEMORY_RESTART_COPY) {
12830				vm_object_reference(object);
12831				goto RestartCopy;
12832			}
12833		}
12834
12835		_vm_map_store_entry_link(map_header,
12836				   map_header->links.prev, new_entry);
12837
12838		/*Protections for submap mapping are irrelevant here*/
12839		if( !src_entry->is_sub_map ) {
12840			*cur_protection &= src_entry->protection;
12841			*max_protection &= src_entry->max_protection;
12842		}
12843		map_address += tmp_size;
12844		mapped_size += tmp_size;
12845		src_start += tmp_size;
12846
12847	} /* end while */
12848
12849	vm_map_unlock(map);
12850	if (result != KERN_SUCCESS) {
12851		/*
12852		 * Free all allocated elements.
12853		 */
12854		for (src_entry = map_header->links.next;
12855		     src_entry != (struct vm_map_entry *)&map_header->links;
12856		     src_entry = new_entry) {
12857			new_entry = src_entry->vme_next;
12858			_vm_map_store_entry_unlink(map_header, src_entry);
12859			if (src_entry->is_sub_map) {
12860				vm_map_deallocate(src_entry->object.sub_map);
12861			} else {
12862				vm_object_deallocate(src_entry->object.vm_object);
12863			}
12864			_vm_map_entry_dispose(map_header, src_entry);
12865		}
12866	}
12867	return result;
12868}
12869
12870/*
12871 *	Routine:	vm_remap
12872 *
12873 *			Map portion of a task's address space.
12874 *			Mapped region must not overlap more than
12875 *			one vm memory object. Protections and
12876 *			inheritance attributes remain the same
12877 *			as in the original task and are	out parameters.
12878 *			Source and Target task can be identical
12879 *			Other attributes are identical as for vm_map()
12880 */
12881kern_return_t
12882vm_map_remap(
12883	vm_map_t		target_map,
12884	vm_map_address_t	*address,
12885	vm_map_size_t		size,
12886	vm_map_offset_t		mask,
12887	int			flags,
12888	vm_map_t		src_map,
12889	vm_map_offset_t		memory_address,
12890	boolean_t		copy,
12891	vm_prot_t		*cur_protection,
12892	vm_prot_t		*max_protection,
12893	vm_inherit_t		inheritance)
12894{
12895	kern_return_t		result;
12896	vm_map_entry_t		entry;
12897	vm_map_entry_t		insp_entry = VM_MAP_ENTRY_NULL;
12898	vm_map_entry_t		new_entry;
12899	struct vm_map_header	map_header;
12900	vm_map_offset_t		offset_in_mapping;
12901
12902	if (target_map == VM_MAP_NULL)
12903		return KERN_INVALID_ARGUMENT;
12904
12905	switch (inheritance) {
12906	case VM_INHERIT_NONE:
12907	case VM_INHERIT_COPY:
12908	case VM_INHERIT_SHARE:
12909		if (size != 0 && src_map != VM_MAP_NULL)
12910			break;
12911		/*FALL THRU*/
12912	default:
12913		return KERN_INVALID_ARGUMENT;
12914	}
12915
12916	/*
12917	 * If the user is requesting that we return the address of the
12918	 * first byte of the data (rather than the base of the page),
12919	 * then we use different rounding semantics: specifically,
12920	 * we assume that (memory_address, size) describes a region
12921	 * all of whose pages we must cover, rather than a base to be truncated
12922	 * down and a size to be added to that base.  So we figure out
12923	 * the highest page that the requested region includes and make
12924	 * sure that the size will cover it.
12925	 *
12926 	 * The key example we're worried about it is of the form:
12927	 *
12928	 * 		memory_address = 0x1ff0, size = 0x20
12929	 *
12930	 * With the old semantics, we round down the memory_address to 0x1000
12931	 * and round up the size to 0x1000, resulting in our covering *only*
12932	 * page 0x1000.  With the new semantics, we'd realize that the region covers
12933	 * 0x1ff0-0x2010, and compute a size of 0x2000.  Thus, we cover both page
12934	 * 0x1000 and page 0x2000 in the region we remap.
12935	 */
12936	if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
12937		offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
12938		size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
12939	} else {
12940		size = vm_map_round_page(size, PAGE_MASK);
12941	}
12942
12943	result = vm_map_remap_extract(src_map, memory_address,
12944				      size, copy, &map_header,
12945				      cur_protection,
12946				      max_protection,
12947				      inheritance,
12948				      target_map->hdr.entries_pageable);
12949
12950	if (result != KERN_SUCCESS) {
12951		return result;
12952	}
12953
12954	/*
12955	 * Allocate/check a range of free virtual address
12956	 * space for the target
12957	 */
12958	*address = vm_map_trunc_page(*address,
12959				     VM_MAP_PAGE_MASK(target_map));
12960	vm_map_lock(target_map);
12961	result = vm_map_remap_range_allocate(target_map, address, size,
12962					     mask, flags, &insp_entry);
12963
12964	for (entry = map_header.links.next;
12965	     entry != (struct vm_map_entry *)&map_header.links;
12966	     entry = new_entry) {
12967		new_entry = entry->vme_next;
12968		_vm_map_store_entry_unlink(&map_header, entry);
12969		if (result == KERN_SUCCESS) {
12970			entry->vme_start += *address;
12971			entry->vme_end += *address;
12972			assert(!entry->map_aligned);
12973			vm_map_store_entry_link(target_map, insp_entry, entry);
12974			insp_entry = entry;
12975		} else {
12976			if (!entry->is_sub_map) {
12977				vm_object_deallocate(entry->object.vm_object);
12978			} else {
12979				vm_map_deallocate(entry->object.sub_map);
12980			}
12981			_vm_map_entry_dispose(&map_header, entry);
12982		}
12983	}
12984
12985	if( target_map->disable_vmentry_reuse == TRUE) {
12986		if( target_map->highest_entry_end < insp_entry->vme_end ){
12987			target_map->highest_entry_end = insp_entry->vme_end;
12988		}
12989	}
12990
12991	if (result == KERN_SUCCESS) {
12992		target_map->size += size;
12993		SAVE_HINT_MAP_WRITE(target_map, insp_entry);
12994	}
12995	vm_map_unlock(target_map);
12996
12997	if (result == KERN_SUCCESS && target_map->wiring_required)
12998		result = vm_map_wire(target_map, *address,
12999				     *address + size, *cur_protection, TRUE);
13000
13001	/*
13002	 * If requested, return the address of the data pointed to by the
13003	 * request, rather than the base of the resulting page.
13004	 */
13005	if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
13006		*address += offset_in_mapping;
13007	}
13008
13009	return result;
13010}
13011
13012/*
13013 *	Routine:	vm_map_remap_range_allocate
13014 *
13015 *	Description:
13016 *		Allocate a range in the specified virtual address map.
13017 *		returns the address and the map entry just before the allocated
13018 *		range
13019 *
13020 *	Map must be locked.
13021 */
13022
13023static kern_return_t
13024vm_map_remap_range_allocate(
13025	vm_map_t		map,
13026	vm_map_address_t	*address,	/* IN/OUT */
13027	vm_map_size_t		size,
13028	vm_map_offset_t		mask,
13029	int			flags,
13030	vm_map_entry_t		*map_entry)	/* OUT */
13031{
13032	vm_map_entry_t	entry;
13033	vm_map_offset_t	start;
13034	vm_map_offset_t	end;
13035	kern_return_t	kr;
13036
13037StartAgain: ;
13038
13039	start = *address;
13040
13041	if (flags & VM_FLAGS_ANYWHERE)
13042	{
13043		/*
13044		 *	Calculate the first possible address.
13045		 */
13046
13047		if (start < map->min_offset)
13048			start = map->min_offset;
13049		if (start > map->max_offset)
13050			return(KERN_NO_SPACE);
13051
13052		/*
13053		 *	Look for the first possible address;
13054		 *	if there's already something at this
13055		 *	address, we have to start after it.
13056		 */
13057
13058		if( map->disable_vmentry_reuse == TRUE) {
13059			VM_MAP_HIGHEST_ENTRY(map, entry, start);
13060		} else {
13061			assert(first_free_is_valid(map));
13062			if (start == map->min_offset) {
13063				if ((entry = map->first_free) != vm_map_to_entry(map))
13064					start = entry->vme_end;
13065			} else {
13066				vm_map_entry_t	tmp_entry;
13067				if (vm_map_lookup_entry(map, start, &tmp_entry))
13068					start = tmp_entry->vme_end;
13069				entry = tmp_entry;
13070			}
13071			start = vm_map_round_page(start,
13072						  VM_MAP_PAGE_MASK(map));
13073		}
13074
13075		/*
13076		 *	In any case, the "entry" always precedes
13077		 *	the proposed new region throughout the
13078		 *	loop:
13079		 */
13080
13081		while (TRUE) {
13082			register vm_map_entry_t	next;
13083
13084			/*
13085			 *	Find the end of the proposed new region.
13086			 *	Be sure we didn't go beyond the end, or
13087			 *	wrap around the address.
13088			 */
13089
13090			end = ((start + mask) & ~mask);
13091			end = vm_map_round_page(end,
13092						VM_MAP_PAGE_MASK(map));
13093			if (end < start)
13094				return(KERN_NO_SPACE);
13095			start = end;
13096			end += size;
13097
13098			if ((end > map->max_offset) || (end < start)) {
13099				if (map->wait_for_space) {
13100					if (size <= (map->max_offset -
13101						     map->min_offset)) {
13102						assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
13103						vm_map_unlock(map);
13104						thread_block(THREAD_CONTINUE_NULL);
13105						vm_map_lock(map);
13106						goto StartAgain;
13107					}
13108				}
13109
13110				return(KERN_NO_SPACE);
13111			}
13112
13113			/*
13114			 *	If there are no more entries, we must win.
13115			 */
13116
13117			next = entry->vme_next;
13118			if (next == vm_map_to_entry(map))
13119				break;
13120
13121			/*
13122			 *	If there is another entry, it must be
13123			 *	after the end of the potential new region.
13124			 */
13125
13126			if (next->vme_start >= end)
13127				break;
13128
13129			/*
13130			 *	Didn't fit -- move to the next entry.
13131			 */
13132
13133			entry = next;
13134			start = entry->vme_end;
13135		}
13136		*address = start;
13137	} else {
13138		vm_map_entry_t		temp_entry;
13139
13140		/*
13141		 *	Verify that:
13142		 *		the address doesn't itself violate
13143		 *		the mask requirement.
13144		 */
13145
13146		if ((start & mask) != 0)
13147			return(KERN_NO_SPACE);
13148
13149
13150		/*
13151		 *	...	the address is within bounds
13152		 */
13153
13154		end = start + size;
13155
13156		if ((start < map->min_offset) ||
13157		    (end > map->max_offset) ||
13158		    (start >= end)) {
13159			return(KERN_INVALID_ADDRESS);
13160		}
13161
13162		/*
13163		 * If we're asked to overwrite whatever was mapped in that
13164		 * range, first deallocate that range.
13165		 */
13166		if (flags & VM_FLAGS_OVERWRITE) {
13167			vm_map_t zap_map;
13168
13169			/*
13170			 * We use a "zap_map" to avoid having to unlock
13171			 * the "map" in vm_map_delete(), which would compromise
13172			 * the atomicity of the "deallocate" and then "remap"
13173			 * combination.
13174			 */
13175			zap_map = vm_map_create(PMAP_NULL,
13176						start,
13177						end,
13178						map->hdr.entries_pageable);
13179			if (zap_map == VM_MAP_NULL) {
13180				return KERN_RESOURCE_SHORTAGE;
13181			}
13182			vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
13183
13184			kr = vm_map_delete(map, start, end,
13185					   (VM_MAP_REMOVE_SAVE_ENTRIES |
13186					    VM_MAP_REMOVE_NO_MAP_ALIGN),
13187					   zap_map);
13188			if (kr == KERN_SUCCESS) {
13189				vm_map_destroy(zap_map,
13190					       VM_MAP_REMOVE_NO_PMAP_CLEANUP);
13191				zap_map = VM_MAP_NULL;
13192			}
13193		}
13194
13195		/*
13196		 *	...	the starting address isn't allocated
13197		 */
13198
13199		if (vm_map_lookup_entry(map, start, &temp_entry))
13200			return(KERN_NO_SPACE);
13201
13202		entry = temp_entry;
13203
13204		/*
13205		 *	...	the next region doesn't overlap the
13206		 *		end point.
13207		 */
13208
13209		if ((entry->vme_next != vm_map_to_entry(map)) &&
13210		    (entry->vme_next->vme_start < end))
13211			return(KERN_NO_SPACE);
13212	}
13213	*map_entry = entry;
13214	return(KERN_SUCCESS);
13215}
13216
13217/*
13218 *	vm_map_switch:
13219 *
13220 *	Set the address map for the current thread to the specified map
13221 */
13222
13223vm_map_t
13224vm_map_switch(
13225	vm_map_t	map)
13226{
13227	int		mycpu;
13228	thread_t	thread = current_thread();
13229	vm_map_t	oldmap = thread->map;
13230
13231	mp_disable_preemption();
13232	mycpu = cpu_number();
13233
13234	/*
13235	 *	Deactivate the current map and activate the requested map
13236	 */
13237	PMAP_SWITCH_USER(thread, map, mycpu);
13238
13239	mp_enable_preemption();
13240	return(oldmap);
13241}
13242
13243
13244/*
13245 *	Routine:	vm_map_write_user
13246 *
13247 *	Description:
13248 *		Copy out data from a kernel space into space in the
13249 *		destination map. The space must already exist in the
13250 *		destination map.
13251 *		NOTE:  This routine should only be called by threads
13252 *		which can block on a page fault. i.e. kernel mode user
13253 *		threads.
13254 *
13255 */
13256kern_return_t
13257vm_map_write_user(
13258	vm_map_t		map,
13259	void			*src_p,
13260	vm_map_address_t	dst_addr,
13261	vm_size_t		size)
13262{
13263	kern_return_t	kr = KERN_SUCCESS;
13264
13265	if(current_map() == map) {
13266		if (copyout(src_p, dst_addr, size)) {
13267			kr = KERN_INVALID_ADDRESS;
13268		}
13269	} else {
13270		vm_map_t	oldmap;
13271
13272		/* take on the identity of the target map while doing */
13273		/* the transfer */
13274
13275		vm_map_reference(map);
13276		oldmap = vm_map_switch(map);
13277		if (copyout(src_p, dst_addr, size)) {
13278			kr = KERN_INVALID_ADDRESS;
13279		}
13280		vm_map_switch(oldmap);
13281		vm_map_deallocate(map);
13282	}
13283	return kr;
13284}
13285
13286/*
13287 *	Routine:	vm_map_read_user
13288 *
13289 *	Description:
13290 *		Copy in data from a user space source map into the
13291 *		kernel map. The space must already exist in the
13292 *		kernel map.
13293 *		NOTE:  This routine should only be called by threads
13294 *		which can block on a page fault. i.e. kernel mode user
13295 *		threads.
13296 *
13297 */
13298kern_return_t
13299vm_map_read_user(
13300	vm_map_t		map,
13301	vm_map_address_t	src_addr,
13302	void			*dst_p,
13303	vm_size_t		size)
13304{
13305	kern_return_t	kr = KERN_SUCCESS;
13306
13307	if(current_map() == map) {
13308		if (copyin(src_addr, dst_p, size)) {
13309			kr = KERN_INVALID_ADDRESS;
13310		}
13311	} else {
13312		vm_map_t	oldmap;
13313
13314		/* take on the identity of the target map while doing */
13315		/* the transfer */
13316
13317		vm_map_reference(map);
13318		oldmap = vm_map_switch(map);
13319		if (copyin(src_addr, dst_p, size)) {
13320			kr = KERN_INVALID_ADDRESS;
13321		}
13322		vm_map_switch(oldmap);
13323		vm_map_deallocate(map);
13324	}
13325	return kr;
13326}
13327
13328
13329/*
13330 *	vm_map_check_protection:
13331 *
13332 *	Assert that the target map allows the specified
13333 *	privilege on the entire address region given.
13334 *	The entire region must be allocated.
13335 */
13336boolean_t
13337vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
13338			vm_map_offset_t end, vm_prot_t protection)
13339{
13340	vm_map_entry_t entry;
13341	vm_map_entry_t tmp_entry;
13342
13343	vm_map_lock(map);
13344
13345	if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
13346	{
13347		vm_map_unlock(map);
13348		return (FALSE);
13349	}
13350
13351	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13352		vm_map_unlock(map);
13353		return(FALSE);
13354	}
13355
13356	entry = tmp_entry;
13357
13358	while (start < end) {
13359		if (entry == vm_map_to_entry(map)) {
13360			vm_map_unlock(map);
13361			return(FALSE);
13362		}
13363
13364		/*
13365		 *	No holes allowed!
13366		 */
13367
13368		if (start < entry->vme_start) {
13369			vm_map_unlock(map);
13370			return(FALSE);
13371		}
13372
13373		/*
13374		 * Check protection associated with entry.
13375		 */
13376
13377		if ((entry->protection & protection) != protection) {
13378			vm_map_unlock(map);
13379			return(FALSE);
13380		}
13381
13382		/* go to next entry */
13383
13384		start = entry->vme_end;
13385		entry = entry->vme_next;
13386	}
13387	vm_map_unlock(map);
13388	return(TRUE);
13389}
13390
13391kern_return_t
13392vm_map_purgable_control(
13393	vm_map_t		map,
13394	vm_map_offset_t		address,
13395	vm_purgable_t		control,
13396	int			*state)
13397{
13398	vm_map_entry_t		entry;
13399	vm_object_t		object;
13400	kern_return_t		kr;
13401	boolean_t		was_nonvolatile;
13402
13403	/*
13404	 * Vet all the input parameters and current type and state of the
13405	 * underlaying object.  Return with an error if anything is amiss.
13406	 */
13407	if (map == VM_MAP_NULL)
13408		return(KERN_INVALID_ARGUMENT);
13409
13410	if (control != VM_PURGABLE_SET_STATE &&
13411	    control != VM_PURGABLE_GET_STATE &&
13412	    control != VM_PURGABLE_PURGE_ALL)
13413		return(KERN_INVALID_ARGUMENT);
13414
13415	if (control == VM_PURGABLE_PURGE_ALL) {
13416		vm_purgeable_object_purge_all();
13417		return KERN_SUCCESS;
13418	}
13419
13420	if (control == VM_PURGABLE_SET_STATE &&
13421	    (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
13422	     ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
13423		return(KERN_INVALID_ARGUMENT);
13424
13425	vm_map_lock_read(map);
13426
13427	if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
13428
13429		/*
13430		 * Must pass a valid non-submap address.
13431		 */
13432		vm_map_unlock_read(map);
13433		return(KERN_INVALID_ADDRESS);
13434	}
13435
13436	if ((entry->protection & VM_PROT_WRITE) == 0) {
13437		/*
13438		 * Can't apply purgable controls to something you can't write.
13439		 */
13440		vm_map_unlock_read(map);
13441		return(KERN_PROTECTION_FAILURE);
13442	}
13443
13444	object = entry->object.vm_object;
13445	if (object == VM_OBJECT_NULL ||
13446	    object->purgable == VM_PURGABLE_DENY) {
13447		/*
13448		 * Object must already be present and be purgeable.
13449		 */
13450		vm_map_unlock_read(map);
13451		return KERN_INVALID_ARGUMENT;
13452	}
13453
13454	vm_object_lock(object);
13455
13456#if 00
13457	if (entry->offset != 0 ||
13458	    entry->vme_end - entry->vme_start != object->vo_size) {
13459		/*
13460		 * Can only apply purgable controls to the whole (existing)
13461		 * object at once.
13462		 */
13463		vm_map_unlock_read(map);
13464		vm_object_unlock(object);
13465		return KERN_INVALID_ARGUMENT;
13466	}
13467#endif
13468
13469	assert(!entry->is_sub_map);
13470	assert(!entry->use_pmap); /* purgeable has its own accounting */
13471
13472	vm_map_unlock_read(map);
13473
13474	was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
13475
13476	kr = vm_object_purgable_control(object, control, state);
13477
13478	if (was_nonvolatile &&
13479	    object->purgable != VM_PURGABLE_NONVOLATILE &&
13480	    map->pmap == kernel_pmap) {
13481#if DEBUG
13482		object->vo_purgeable_volatilizer = kernel_task;
13483#endif /* DEBUG */
13484	}
13485
13486	vm_object_unlock(object);
13487
13488	return kr;
13489}
13490
13491kern_return_t
13492vm_map_page_query_internal(
13493	vm_map_t	target_map,
13494	vm_map_offset_t	offset,
13495	int		*disposition,
13496	int		*ref_count)
13497{
13498	kern_return_t			kr;
13499	vm_page_info_basic_data_t	info;
13500	mach_msg_type_number_t		count;
13501
13502	count = VM_PAGE_INFO_BASIC_COUNT;
13503	kr = vm_map_page_info(target_map,
13504			      offset,
13505			      VM_PAGE_INFO_BASIC,
13506			      (vm_page_info_t) &info,
13507			      &count);
13508	if (kr == KERN_SUCCESS) {
13509		*disposition = info.disposition;
13510		*ref_count = info.ref_count;
13511	} else {
13512		*disposition = 0;
13513		*ref_count = 0;
13514	}
13515
13516	return kr;
13517}
13518
13519kern_return_t
13520vm_map_page_info(
13521	vm_map_t		map,
13522	vm_map_offset_t		offset,
13523	vm_page_info_flavor_t	flavor,
13524	vm_page_info_t		info,
13525	mach_msg_type_number_t	*count)
13526{
13527	vm_map_entry_t		map_entry;
13528	vm_object_t		object;
13529	vm_page_t		m;
13530	kern_return_t		kr;
13531	kern_return_t		retval = KERN_SUCCESS;
13532	boolean_t		top_object;
13533	int			disposition;
13534	int 			ref_count;
13535	vm_page_info_basic_t	basic_info;
13536	int			depth;
13537	vm_map_offset_t		offset_in_page;
13538
13539	switch (flavor) {
13540	case VM_PAGE_INFO_BASIC:
13541		if (*count != VM_PAGE_INFO_BASIC_COUNT) {
13542			/*
13543			 * The "vm_page_info_basic_data" structure was not
13544			 * properly padded, so allow the size to be off by
13545			 * one to maintain backwards binary compatibility...
13546			 */
13547			if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
13548				return KERN_INVALID_ARGUMENT;
13549		}
13550		break;
13551	default:
13552		return KERN_INVALID_ARGUMENT;
13553	}
13554
13555	disposition = 0;
13556	ref_count = 0;
13557	top_object = TRUE;
13558	depth = 0;
13559
13560	retval = KERN_SUCCESS;
13561	offset_in_page = offset & PAGE_MASK;
13562	offset = vm_map_trunc_page(offset, PAGE_MASK);
13563
13564	vm_map_lock_read(map);
13565
13566	/*
13567	 * First, find the map entry covering "offset", going down
13568	 * submaps if necessary.
13569	 */
13570	for (;;) {
13571		if (!vm_map_lookup_entry(map, offset, &map_entry)) {
13572			vm_map_unlock_read(map);
13573			return KERN_INVALID_ADDRESS;
13574		}
13575		/* compute offset from this map entry's start */
13576		offset -= map_entry->vme_start;
13577		/* compute offset into this map entry's object (or submap) */
13578		offset += map_entry->offset;
13579
13580		if (map_entry->is_sub_map) {
13581			vm_map_t sub_map;
13582
13583			sub_map = map_entry->object.sub_map;
13584			vm_map_lock_read(sub_map);
13585			vm_map_unlock_read(map);
13586
13587			map = sub_map;
13588
13589			ref_count = MAX(ref_count, map->ref_count);
13590			continue;
13591		}
13592		break;
13593	}
13594
13595	object = map_entry->object.vm_object;
13596	if (object == VM_OBJECT_NULL) {
13597		/* no object -> no page */
13598		vm_map_unlock_read(map);
13599		goto done;
13600	}
13601
13602	vm_object_lock(object);
13603	vm_map_unlock_read(map);
13604
13605	/*
13606	 * Go down the VM object shadow chain until we find the page
13607	 * we're looking for.
13608	 */
13609	for (;;) {
13610		ref_count = MAX(ref_count, object->ref_count);
13611
13612		m = vm_page_lookup(object, offset);
13613
13614		if (m != VM_PAGE_NULL) {
13615			disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
13616			break;
13617		} else {
13618#if MACH_PAGEMAP
13619			if (object->existence_map) {
13620				if (vm_external_state_get(object->existence_map,
13621							  offset) ==
13622				    VM_EXTERNAL_STATE_EXISTS) {
13623					/*
13624					 * this page has been paged out
13625					 */
13626				        disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
13627					break;
13628				}
13629			} else
13630#endif
13631			if (object->internal &&
13632			    object->alive &&
13633			    !object->terminating &&
13634			    object->pager_ready) {
13635
13636				if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
13637					if (VM_COMPRESSOR_PAGER_STATE_GET(
13638						    object,
13639						    offset)
13640					    == VM_EXTERNAL_STATE_EXISTS) {
13641						/* the pager has that page */
13642						disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
13643						break;
13644					}
13645				} else {
13646					memory_object_t pager;
13647
13648					vm_object_paging_begin(object);
13649					pager = object->pager;
13650					vm_object_unlock(object);
13651
13652					/*
13653					 * Ask the default pager if
13654					 * it has this page.
13655					 */
13656					kr = memory_object_data_request(
13657						pager,
13658						offset + object->paging_offset,
13659						0, /* just poke the pager */
13660						VM_PROT_READ,
13661						NULL);
13662
13663					vm_object_lock(object);
13664					vm_object_paging_end(object);
13665
13666					if (kr == KERN_SUCCESS) {
13667						/* the default pager has it */
13668						disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
13669						break;
13670					}
13671				}
13672			}
13673
13674			if (object->shadow != VM_OBJECT_NULL) {
13675			        vm_object_t shadow;
13676
13677				offset += object->vo_shadow_offset;
13678				shadow = object->shadow;
13679
13680				vm_object_lock(shadow);
13681				vm_object_unlock(object);
13682
13683				object = shadow;
13684				top_object = FALSE;
13685				depth++;
13686			} else {
13687//			        if (!object->internal)
13688//				        break;
13689//				retval = KERN_FAILURE;
13690//				goto done_with_object;
13691				break;
13692			}
13693		}
13694	}
13695	/* The ref_count is not strictly accurate, it measures the number   */
13696	/* of entities holding a ref on the object, they may not be mapping */
13697	/* the object or may not be mapping the section holding the         */
13698	/* target page but its still a ball park number and though an over- */
13699	/* count, it picks up the copy-on-write cases                       */
13700
13701	/* We could also get a picture of page sharing from pmap_attributes */
13702	/* but this would under count as only faulted-in mappings would     */
13703	/* show up.							    */
13704
13705	if (top_object == TRUE && object->shadow)
13706		disposition |= VM_PAGE_QUERY_PAGE_COPIED;
13707
13708	if (! object->internal)
13709		disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
13710
13711	if (m == VM_PAGE_NULL)
13712	        goto done_with_object;
13713
13714	if (m->fictitious) {
13715		disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
13716		goto done_with_object;
13717	}
13718	if (m->dirty || pmap_is_modified(m->phys_page))
13719		disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
13720
13721	if (m->reference || pmap_is_referenced(m->phys_page))
13722		disposition |= VM_PAGE_QUERY_PAGE_REF;
13723
13724	if (m->speculative)
13725		disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
13726
13727	if (m->cs_validated)
13728		disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
13729	if (m->cs_tainted)
13730		disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
13731
13732done_with_object:
13733	vm_object_unlock(object);
13734done:
13735
13736	switch (flavor) {
13737	case VM_PAGE_INFO_BASIC:
13738		basic_info = (vm_page_info_basic_t) info;
13739		basic_info->disposition = disposition;
13740		basic_info->ref_count = ref_count;
13741		basic_info->object_id = (vm_object_id_t) (uintptr_t)
13742			VM_KERNEL_ADDRPERM(object);
13743		basic_info->offset =
13744			(memory_object_offset_t) offset + offset_in_page;
13745		basic_info->depth = depth;
13746		break;
13747	}
13748
13749	return retval;
13750}
13751
13752/*
13753 *	vm_map_msync
13754 *
13755 *	Synchronises the memory range specified with its backing store
13756 *	image by either flushing or cleaning the contents to the appropriate
13757 *	memory manager engaging in a memory object synchronize dialog with
13758 *	the manager.  The client doesn't return until the manager issues
13759 *	m_o_s_completed message.  MIG Magically converts user task parameter
13760 *	to the task's address map.
13761 *
13762 *	interpretation of sync_flags
13763 *	VM_SYNC_INVALIDATE	- discard pages, only return precious
13764 *				  pages to manager.
13765 *
13766 *	VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
13767 *				- discard pages, write dirty or precious
13768 *				  pages back to memory manager.
13769 *
13770 *	VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
13771 *				- write dirty or precious pages back to
13772 *				  the memory manager.
13773 *
13774 *	VM_SYNC_CONTIGUOUS	- does everything normally, but if there
13775 *				  is a hole in the region, and we would
13776 *				  have returned KERN_SUCCESS, return
13777 *				  KERN_INVALID_ADDRESS instead.
13778 *
13779 *	NOTE
13780 *	The memory object attributes have not yet been implemented, this
13781 *	function will have to deal with the invalidate attribute
13782 *
13783 *	RETURNS
13784 *	KERN_INVALID_TASK		Bad task parameter
13785 *	KERN_INVALID_ARGUMENT		both sync and async were specified.
13786 *	KERN_SUCCESS			The usual.
13787 *	KERN_INVALID_ADDRESS		There was a hole in the region.
13788 */
13789
13790kern_return_t
13791vm_map_msync(
13792	vm_map_t		map,
13793	vm_map_address_t	address,
13794	vm_map_size_t		size,
13795	vm_sync_t		sync_flags)
13796{
13797	msync_req_t		msr;
13798	msync_req_t		new_msr;
13799	queue_chain_t		req_q;	/* queue of requests for this msync */
13800	vm_map_entry_t		entry;
13801	vm_map_size_t		amount_left;
13802	vm_object_offset_t	offset;
13803	boolean_t		do_sync_req;
13804	boolean_t		had_hole = FALSE;
13805	memory_object_t		pager;
13806
13807	if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
13808	    (sync_flags & VM_SYNC_SYNCHRONOUS))
13809		return(KERN_INVALID_ARGUMENT);
13810
13811	/*
13812	 * align address and size on page boundaries
13813	 */
13814	size = (vm_map_round_page(address + size,
13815				  VM_MAP_PAGE_MASK(map)) -
13816		vm_map_trunc_page(address,
13817				  VM_MAP_PAGE_MASK(map)));
13818	address = vm_map_trunc_page(address,
13819				    VM_MAP_PAGE_MASK(map));
13820
13821        if (map == VM_MAP_NULL)
13822                return(KERN_INVALID_TASK);
13823
13824	if (size == 0)
13825		return(KERN_SUCCESS);
13826
13827	queue_init(&req_q);
13828	amount_left = size;
13829
13830	while (amount_left > 0) {
13831		vm_object_size_t	flush_size;
13832		vm_object_t		object;
13833
13834		vm_map_lock(map);
13835		if (!vm_map_lookup_entry(map,
13836					 vm_map_trunc_page(
13837						 address,
13838						 VM_MAP_PAGE_MASK(map)),
13839					 &entry)) {
13840
13841			vm_map_size_t	skip;
13842
13843			/*
13844			 * hole in the address map.
13845			 */
13846			had_hole = TRUE;
13847
13848			/*
13849			 * Check for empty map.
13850			 */
13851			if (entry == vm_map_to_entry(map) &&
13852			    entry->vme_next == entry) {
13853				vm_map_unlock(map);
13854				break;
13855			}
13856			/*
13857			 * Check that we don't wrap and that
13858			 * we have at least one real map entry.
13859			 */
13860			if ((map->hdr.nentries == 0) ||
13861			    (entry->vme_next->vme_start < address)) {
13862				vm_map_unlock(map);
13863				break;
13864			}
13865			/*
13866			 * Move up to the next entry if needed
13867			 */
13868			skip = (entry->vme_next->vme_start - address);
13869			if (skip >= amount_left)
13870				amount_left = 0;
13871			else
13872				amount_left -= skip;
13873			address = entry->vme_next->vme_start;
13874			vm_map_unlock(map);
13875			continue;
13876		}
13877
13878		offset = address - entry->vme_start;
13879
13880		/*
13881		 * do we have more to flush than is contained in this
13882		 * entry ?
13883		 */
13884		if (amount_left + entry->vme_start + offset > entry->vme_end) {
13885			flush_size = entry->vme_end -
13886				(entry->vme_start + offset);
13887		} else {
13888			flush_size = amount_left;
13889		}
13890		amount_left -= flush_size;
13891		address += flush_size;
13892
13893		if (entry->is_sub_map == TRUE) {
13894			vm_map_t	local_map;
13895			vm_map_offset_t	local_offset;
13896
13897			local_map = entry->object.sub_map;
13898			local_offset = entry->offset;
13899			vm_map_unlock(map);
13900			if (vm_map_msync(
13901				    local_map,
13902				    local_offset,
13903				    flush_size,
13904				    sync_flags) == KERN_INVALID_ADDRESS) {
13905				had_hole = TRUE;
13906			}
13907			continue;
13908		}
13909		object = entry->object.vm_object;
13910
13911		/*
13912		 * We can't sync this object if the object has not been
13913		 * created yet
13914		 */
13915		if (object == VM_OBJECT_NULL) {
13916			vm_map_unlock(map);
13917			continue;
13918		}
13919		offset += entry->offset;
13920
13921                vm_object_lock(object);
13922
13923		if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
13924		        int kill_pages = 0;
13925			boolean_t reusable_pages = FALSE;
13926
13927			if (sync_flags & VM_SYNC_KILLPAGES) {
13928			        if (object->ref_count == 1 && !object->shadow)
13929				        kill_pages = 1;
13930				else
13931				        kill_pages = -1;
13932			}
13933			if (kill_pages != -1)
13934			        vm_object_deactivate_pages(object, offset,
13935							   (vm_object_size_t)flush_size, kill_pages, reusable_pages);
13936			vm_object_unlock(object);
13937			vm_map_unlock(map);
13938			continue;
13939		}
13940		/*
13941		 * We can't sync this object if there isn't a pager.
13942		 * Don't bother to sync internal objects, since there can't
13943		 * be any "permanent" storage for these objects anyway.
13944		 */
13945		if ((object->pager == MEMORY_OBJECT_NULL) ||
13946		    (object->internal) || (object->private)) {
13947			vm_object_unlock(object);
13948			vm_map_unlock(map);
13949			continue;
13950		}
13951		/*
13952		 * keep reference on the object until syncing is done
13953		 */
13954		vm_object_reference_locked(object);
13955		vm_object_unlock(object);
13956
13957		vm_map_unlock(map);
13958
13959		do_sync_req = vm_object_sync(object,
13960					     offset,
13961					     flush_size,
13962					     sync_flags & VM_SYNC_INVALIDATE,
13963					     ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
13964					      (sync_flags & VM_SYNC_ASYNCHRONOUS)),
13965					     sync_flags & VM_SYNC_SYNCHRONOUS);
13966		/*
13967		 * only send a m_o_s if we returned pages or if the entry
13968		 * is writable (ie dirty pages may have already been sent back)
13969		 */
13970		if (!do_sync_req) {
13971			if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
13972				/*
13973				 * clear out the clustering and read-ahead hints
13974				 */
13975				vm_object_lock(object);
13976
13977				object->pages_created = 0;
13978				object->pages_used = 0;
13979				object->sequential = 0;
13980				object->last_alloc = 0;
13981
13982				vm_object_unlock(object);
13983			}
13984			vm_object_deallocate(object);
13985			continue;
13986		}
13987		msync_req_alloc(new_msr);
13988
13989                vm_object_lock(object);
13990		offset += object->paging_offset;
13991
13992		new_msr->offset = offset;
13993		new_msr->length = flush_size;
13994		new_msr->object = object;
13995		new_msr->flag = VM_MSYNC_SYNCHRONIZING;
13996	re_iterate:
13997
13998		/*
13999		 * We can't sync this object if there isn't a pager.  The
14000		 * pager can disappear anytime we're not holding the object
14001		 * lock.  So this has to be checked anytime we goto re_iterate.
14002		 */
14003
14004		pager = object->pager;
14005
14006		if (pager == MEMORY_OBJECT_NULL) {
14007			vm_object_unlock(object);
14008			vm_object_deallocate(object);
14009			msync_req_free(new_msr);
14010			new_msr = NULL;
14011			continue;
14012		}
14013
14014		queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
14015			/*
14016			 * need to check for overlapping entry, if found, wait
14017			 * on overlapping msr to be done, then reiterate
14018			 */
14019			msr_lock(msr);
14020			if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
14021			    ((offset >= msr->offset &&
14022			      offset < (msr->offset + msr->length)) ||
14023			     (msr->offset >= offset &&
14024			      msr->offset < (offset + flush_size))))
14025			{
14026				assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
14027				msr_unlock(msr);
14028				vm_object_unlock(object);
14029				thread_block(THREAD_CONTINUE_NULL);
14030				vm_object_lock(object);
14031				goto re_iterate;
14032			}
14033			msr_unlock(msr);
14034		}/* queue_iterate */
14035
14036		queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
14037
14038		vm_object_paging_begin(object);
14039		vm_object_unlock(object);
14040
14041		queue_enter(&req_q, new_msr, msync_req_t, req_q);
14042
14043		(void) memory_object_synchronize(
14044			pager,
14045			offset,
14046			flush_size,
14047			sync_flags & ~VM_SYNC_CONTIGUOUS);
14048
14049		vm_object_lock(object);
14050		vm_object_paging_end(object);
14051		vm_object_unlock(object);
14052	}/* while */
14053
14054	/*
14055	 * wait for memory_object_sychronize_completed messages from pager(s)
14056	 */
14057
14058	while (!queue_empty(&req_q)) {
14059		msr = (msync_req_t)queue_first(&req_q);
14060		msr_lock(msr);
14061		while(msr->flag != VM_MSYNC_DONE) {
14062			assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
14063			msr_unlock(msr);
14064			thread_block(THREAD_CONTINUE_NULL);
14065			msr_lock(msr);
14066		}/* while */
14067		queue_remove(&req_q, msr, msync_req_t, req_q);
14068		msr_unlock(msr);
14069		vm_object_deallocate(msr->object);
14070		msync_req_free(msr);
14071	}/* queue_iterate */
14072
14073	/* for proper msync() behaviour */
14074	if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
14075		return(KERN_INVALID_ADDRESS);
14076
14077	return(KERN_SUCCESS);
14078}/* vm_msync */
14079
14080/*
14081 *	Routine:	convert_port_entry_to_map
14082 *	Purpose:
14083 *		Convert from a port specifying an entry or a task
14084 *		to a map. Doesn't consume the port ref; produces a map ref,
14085 *		which may be null.  Unlike convert_port_to_map, the
14086 *		port may be task or a named entry backed.
14087 *	Conditions:
14088 *		Nothing locked.
14089 */
14090
14091
14092vm_map_t
14093convert_port_entry_to_map(
14094	ipc_port_t	port)
14095{
14096	vm_map_t map;
14097	vm_named_entry_t	named_entry;
14098	uint32_t	try_failed_count = 0;
14099
14100	if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
14101		while(TRUE) {
14102			ip_lock(port);
14103			if(ip_active(port) && (ip_kotype(port)
14104					       == IKOT_NAMED_ENTRY)) {
14105				named_entry =
14106					(vm_named_entry_t)port->ip_kobject;
14107				if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
14108                       			ip_unlock(port);
14109
14110					try_failed_count++;
14111                       			mutex_pause(try_failed_count);
14112                       			continue;
14113                		}
14114				named_entry->ref_count++;
14115				lck_mtx_unlock(&(named_entry)->Lock);
14116				ip_unlock(port);
14117				if ((named_entry->is_sub_map) &&
14118				    (named_entry->protection
14119				     & VM_PROT_WRITE)) {
14120					map = named_entry->backing.map;
14121				} else {
14122					mach_destroy_memory_entry(port);
14123					return VM_MAP_NULL;
14124				}
14125				vm_map_reference_swap(map);
14126				mach_destroy_memory_entry(port);
14127				break;
14128			}
14129			else
14130				return VM_MAP_NULL;
14131		}
14132	}
14133	else
14134		map = convert_port_to_map(port);
14135
14136	return map;
14137}
14138
14139/*
14140 *	Routine:	convert_port_entry_to_object
14141 *	Purpose:
14142 *		Convert from a port specifying a named entry to an
14143 *		object. Doesn't consume the port ref; produces a map ref,
14144 *		which may be null.
14145 *	Conditions:
14146 *		Nothing locked.
14147 */
14148
14149
14150vm_object_t
14151convert_port_entry_to_object(
14152	ipc_port_t	port)
14153{
14154	vm_object_t		object = VM_OBJECT_NULL;
14155	vm_named_entry_t	named_entry;
14156	uint32_t		try_failed_count = 0;
14157
14158	if (IP_VALID(port) &&
14159	    (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
14160	try_again:
14161		ip_lock(port);
14162		if (ip_active(port) &&
14163		    (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
14164			named_entry = (vm_named_entry_t)port->ip_kobject;
14165			if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
14166				ip_unlock(port);
14167				try_failed_count++;
14168				mutex_pause(try_failed_count);
14169                       		goto try_again;
14170			}
14171			named_entry->ref_count++;
14172			lck_mtx_unlock(&(named_entry)->Lock);
14173			ip_unlock(port);
14174			if (!(named_entry->is_sub_map) &&
14175			    !(named_entry->is_pager) &&
14176			    !(named_entry->is_copy) &&
14177			    (named_entry->protection & VM_PROT_WRITE)) {
14178				object = named_entry->backing.object;
14179				vm_object_reference(object);
14180			}
14181			mach_destroy_memory_entry(port);
14182		}
14183	}
14184
14185	return object;
14186}
14187
14188/*
14189 * Export routines to other components for the things we access locally through
14190 * macros.
14191 */
14192#undef current_map
14193vm_map_t
14194current_map(void)
14195{
14196	return (current_map_fast());
14197}
14198
14199/*
14200 *	vm_map_reference:
14201 *
14202 *	Most code internal to the osfmk will go through a
14203 *	macro defining this.  This is always here for the
14204 *	use of other kernel components.
14205 */
14206#undef vm_map_reference
14207void
14208vm_map_reference(
14209	register vm_map_t	map)
14210{
14211	if (map == VM_MAP_NULL)
14212		return;
14213
14214	lck_mtx_lock(&map->s_lock);
14215#if	TASK_SWAPPER
14216	assert(map->res_count > 0);
14217	assert(map->ref_count >= map->res_count);
14218	map->res_count++;
14219#endif
14220	map->ref_count++;
14221	lck_mtx_unlock(&map->s_lock);
14222}
14223
14224/*
14225 *	vm_map_deallocate:
14226 *
14227 *	Removes a reference from the specified map,
14228 *	destroying it if no references remain.
14229 *	The map should not be locked.
14230 */
14231void
14232vm_map_deallocate(
14233	register vm_map_t	map)
14234{
14235	unsigned int		ref;
14236
14237	if (map == VM_MAP_NULL)
14238		return;
14239
14240	lck_mtx_lock(&map->s_lock);
14241	ref = --map->ref_count;
14242	if (ref > 0) {
14243		vm_map_res_deallocate(map);
14244		lck_mtx_unlock(&map->s_lock);
14245		return;
14246	}
14247	assert(map->ref_count == 0);
14248	lck_mtx_unlock(&map->s_lock);
14249
14250#if	TASK_SWAPPER
14251	/*
14252	 * The map residence count isn't decremented here because
14253	 * the vm_map_delete below will traverse the entire map,
14254	 * deleting entries, and the residence counts on objects
14255	 * and sharing maps will go away then.
14256	 */
14257#endif
14258
14259	vm_map_destroy(map, VM_MAP_NO_FLAGS);
14260}
14261
14262
14263void
14264vm_map_disable_NX(vm_map_t map)
14265{
14266        if (map == NULL)
14267	        return;
14268        if (map->pmap == NULL)
14269	        return;
14270
14271        pmap_disable_NX(map->pmap);
14272}
14273
14274void
14275vm_map_disallow_data_exec(vm_map_t map)
14276{
14277    if (map == NULL)
14278        return;
14279
14280    map->map_disallow_data_exec = TRUE;
14281}
14282
14283/* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
14284 * more descriptive.
14285 */
14286void
14287vm_map_set_32bit(vm_map_t map)
14288{
14289	map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
14290}
14291
14292
14293void
14294vm_map_set_64bit(vm_map_t map)
14295{
14296	map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
14297}
14298
14299vm_map_offset_t
14300vm_compute_max_offset(unsigned is64)
14301{
14302	return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
14303}
14304
14305uint64_t
14306vm_map_get_max_aslr_slide_pages(vm_map_t map)
14307{
14308	return (1 << (vm_map_is_64bit(map) ? 16 : 8));
14309}
14310
14311boolean_t
14312vm_map_is_64bit(
14313		vm_map_t map)
14314{
14315	return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
14316}
14317
14318boolean_t
14319vm_map_has_hard_pagezero(
14320		vm_map_t 	map,
14321		vm_map_offset_t	pagezero_size)
14322{
14323	/*
14324	 * XXX FBDP
14325	 * We should lock the VM map (for read) here but we can get away
14326	 * with it for now because there can't really be any race condition:
14327	 * the VM map's min_offset is changed only when the VM map is created
14328	 * and when the zero page is established (when the binary gets loaded),
14329	 * and this routine gets called only when the task terminates and the
14330	 * VM map is being torn down, and when a new map is created via
14331	 * load_machfile()/execve().
14332	 */
14333	return (map->min_offset >= pagezero_size);
14334}
14335
14336/*
14337 * Raise a VM map's maximun offset.
14338 */
14339kern_return_t
14340vm_map_raise_max_offset(
14341	vm_map_t	map,
14342	vm_map_offset_t	new_max_offset)
14343{
14344	kern_return_t	ret;
14345
14346	vm_map_lock(map);
14347	ret = KERN_INVALID_ADDRESS;
14348
14349	if (new_max_offset >= map->max_offset) {
14350		if (!vm_map_is_64bit(map)) {
14351			if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
14352				map->max_offset = new_max_offset;
14353				ret = KERN_SUCCESS;
14354			}
14355		} else {
14356			if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
14357				map->max_offset = new_max_offset;
14358				ret = KERN_SUCCESS;
14359			}
14360		}
14361	}
14362
14363	vm_map_unlock(map);
14364	return ret;
14365}
14366
14367
14368/*
14369 * Raise a VM map's minimum offset.
14370 * To strictly enforce "page zero" reservation.
14371 */
14372kern_return_t
14373vm_map_raise_min_offset(
14374	vm_map_t	map,
14375	vm_map_offset_t	new_min_offset)
14376{
14377	vm_map_entry_t	first_entry;
14378
14379	new_min_offset = vm_map_round_page(new_min_offset,
14380					   VM_MAP_PAGE_MASK(map));
14381
14382	vm_map_lock(map);
14383
14384	if (new_min_offset < map->min_offset) {
14385		/*
14386		 * Can't move min_offset backwards, as that would expose
14387		 * a part of the address space that was previously, and for
14388		 * possibly good reasons, inaccessible.
14389		 */
14390		vm_map_unlock(map);
14391		return KERN_INVALID_ADDRESS;
14392	}
14393
14394	first_entry = vm_map_first_entry(map);
14395	if (first_entry != vm_map_to_entry(map) &&
14396	    first_entry->vme_start < new_min_offset) {
14397		/*
14398		 * Some memory was already allocated below the new
14399		 * minimun offset.  It's too late to change it now...
14400		 */
14401		vm_map_unlock(map);
14402		return KERN_NO_SPACE;
14403	}
14404
14405	map->min_offset = new_min_offset;
14406
14407	vm_map_unlock(map);
14408
14409	return KERN_SUCCESS;
14410}
14411
14412/*
14413 * Set the limit on the maximum amount of user wired memory allowed for this map.
14414 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
14415 * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
14416 * don't have to reach over to the BSD data structures.
14417 */
14418
14419void
14420vm_map_set_user_wire_limit(vm_map_t 	map,
14421			   vm_size_t	limit)
14422{
14423	map->user_wire_limit = limit;
14424}
14425
14426
14427void vm_map_switch_protect(vm_map_t	map,
14428			   boolean_t	val)
14429{
14430	vm_map_lock(map);
14431	map->switch_protect=val;
14432	vm_map_unlock(map);
14433}
14434
14435/*
14436 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
14437 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
14438 * bump both counters.
14439 */
14440void
14441vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
14442{
14443	pmap_t pmap = vm_map_pmap(map);
14444
14445	ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
14446	ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
14447}
14448
14449void
14450vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
14451{
14452	pmap_t pmap = vm_map_pmap(map);
14453
14454	ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
14455	ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
14456}
14457
14458/* Add (generate) code signature for memory range */
14459#if CONFIG_DYNAMIC_CODE_SIGNING
14460kern_return_t vm_map_sign(vm_map_t map,
14461		 vm_map_offset_t start,
14462		 vm_map_offset_t end)
14463{
14464	vm_map_entry_t entry;
14465	vm_page_t m;
14466	vm_object_t object;
14467
14468	/*
14469	 * Vet all the input parameters and current type and state of the
14470	 * underlaying object.  Return with an error if anything is amiss.
14471	 */
14472	if (map == VM_MAP_NULL)
14473		return(KERN_INVALID_ARGUMENT);
14474
14475	vm_map_lock_read(map);
14476
14477	if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
14478		/*
14479		 * Must pass a valid non-submap address.
14480		 */
14481		vm_map_unlock_read(map);
14482		return(KERN_INVALID_ADDRESS);
14483	}
14484
14485	if((entry->vme_start > start) || (entry->vme_end < end)) {
14486		/*
14487		 * Map entry doesn't cover the requested range. Not handling
14488		 * this situation currently.
14489		 */
14490		vm_map_unlock_read(map);
14491		return(KERN_INVALID_ARGUMENT);
14492	}
14493
14494	object = entry->object.vm_object;
14495	if (object == VM_OBJECT_NULL) {
14496		/*
14497		 * Object must already be present or we can't sign.
14498		 */
14499		vm_map_unlock_read(map);
14500		return KERN_INVALID_ARGUMENT;
14501	}
14502
14503	vm_object_lock(object);
14504	vm_map_unlock_read(map);
14505
14506	while(start < end) {
14507		uint32_t refmod;
14508
14509		m = vm_page_lookup(object, start - entry->vme_start + entry->offset );
14510		if (m==VM_PAGE_NULL) {
14511			/* shoud we try to fault a page here? we can probably
14512			 * demand it exists and is locked for this request */
14513			vm_object_unlock(object);
14514			return KERN_FAILURE;
14515		}
14516		/* deal with special page status */
14517		if (m->busy ||
14518		    (m->unusual && (m->error || m->restart || m->private || m->absent))) {
14519			vm_object_unlock(object);
14520			return KERN_FAILURE;
14521		}
14522
14523		/* Page is OK... now "validate" it */
14524		/* This is the place where we'll call out to create a code
14525		 * directory, later */
14526		m->cs_validated = TRUE;
14527
14528		/* The page is now "clean" for codesigning purposes. That means
14529		 * we don't consider it as modified (wpmapped) anymore. But
14530		 * we'll disconnect the page so we note any future modification
14531		 * attempts. */
14532		m->wpmapped = FALSE;
14533		refmod = pmap_disconnect(m->phys_page);
14534
14535		/* Pull the dirty status from the pmap, since we cleared the
14536		 * wpmapped bit */
14537		if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
14538			SET_PAGE_DIRTY(m, FALSE);
14539		}
14540
14541		/* On to the next page */
14542		start += PAGE_SIZE;
14543	}
14544	vm_object_unlock(object);
14545
14546	return KERN_SUCCESS;
14547}
14548#endif
14549
14550kern_return_t vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
14551{
14552	vm_map_entry_t	entry = VM_MAP_ENTRY_NULL;
14553	vm_map_entry_t next_entry;
14554	kern_return_t	kr = KERN_SUCCESS;
14555	vm_map_t 	zap_map;
14556
14557	vm_map_lock(map);
14558
14559	/*
14560	 * We use a "zap_map" to avoid having to unlock
14561	 * the "map" in vm_map_delete().
14562	 */
14563	zap_map = vm_map_create(PMAP_NULL,
14564				map->min_offset,
14565				map->max_offset,
14566				map->hdr.entries_pageable);
14567
14568	if (zap_map == VM_MAP_NULL) {
14569		return KERN_RESOURCE_SHORTAGE;
14570	}
14571
14572	vm_map_set_page_shift(zap_map,
14573			      VM_MAP_PAGE_SHIFT(map));
14574
14575	for (entry = vm_map_first_entry(map);
14576	     entry != vm_map_to_entry(map);
14577	     entry = next_entry) {
14578		next_entry = entry->vme_next;
14579
14580		if (entry->object.vm_object && !entry->is_sub_map && (entry->object.vm_object->internal == TRUE)
14581		    && (entry->object.vm_object->ref_count == 1)) {
14582
14583			*reclaimed_resident += entry->object.vm_object->resident_page_count;
14584			*reclaimed_compressed += vm_compressor_pager_get_count(entry->object.vm_object->pager);
14585
14586			(void)vm_map_delete(map,
14587					    entry->vme_start,
14588					    entry->vme_end,
14589					    VM_MAP_REMOVE_SAVE_ENTRIES,
14590					    zap_map);
14591		}
14592	}
14593
14594	vm_map_unlock(map);
14595
14596        /*
14597	 * Get rid of the "zap_maps" and all the map entries that
14598         * they may still contain.
14599         */
14600        if (zap_map != VM_MAP_NULL) {
14601                vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
14602                zap_map = VM_MAP_NULL;
14603        }
14604
14605	return kr;
14606}
14607
14608#if CONFIG_FREEZE
14609
14610kern_return_t vm_map_freeze_walk(
14611             	vm_map_t map,
14612             	unsigned int *purgeable_count,
14613             	unsigned int *wired_count,
14614             	unsigned int *clean_count,
14615             	unsigned int *dirty_count,
14616             	unsigned int  dirty_budget,
14617             	boolean_t *has_shared)
14618{
14619	vm_map_entry_t entry;
14620
14621	vm_map_lock_read(map);
14622
14623	*purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
14624	*has_shared = FALSE;
14625
14626	for (entry = vm_map_first_entry(map);
14627	     entry != vm_map_to_entry(map);
14628	     entry = entry->vme_next) {
14629		unsigned int purgeable, clean, dirty, wired;
14630		boolean_t shared;
14631
14632		if ((entry->object.vm_object == 0) ||
14633		    (entry->is_sub_map) ||
14634		    (entry->object.vm_object->phys_contiguous)) {
14635			continue;
14636		}
14637
14638		default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, entry->object.vm_object, NULL);
14639
14640		*purgeable_count += purgeable;
14641		*wired_count += wired;
14642		*clean_count += clean;
14643		*dirty_count += dirty;
14644
14645		if (shared) {
14646			*has_shared = TRUE;
14647		}
14648
14649		/* Adjust pageout budget and finish up if reached */
14650		if (dirty_budget) {
14651			dirty_budget -= dirty;
14652			if (dirty_budget == 0) {
14653				break;
14654			}
14655		}
14656	}
14657
14658	vm_map_unlock_read(map);
14659
14660	return KERN_SUCCESS;
14661}
14662
14663kern_return_t vm_map_freeze(
14664             	vm_map_t map,
14665             	unsigned int *purgeable_count,
14666             	unsigned int *wired_count,
14667             	unsigned int *clean_count,
14668             	unsigned int *dirty_count,
14669             	unsigned int dirty_budget,
14670             	boolean_t *has_shared)
14671{
14672	vm_map_entry_t	entry2 = VM_MAP_ENTRY_NULL;
14673	kern_return_t	kr = KERN_SUCCESS;
14674	boolean_t	default_freezer_active = TRUE;
14675
14676	*purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
14677	*has_shared = FALSE;
14678
14679	/*
14680	 * We need the exclusive lock here so that we can
14681	 * block any page faults or lookups while we are
14682	 * in the middle of freezing this vm map.
14683	 */
14684	vm_map_lock(map);
14685
14686	if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
14687		default_freezer_active = FALSE;
14688	}
14689
14690	if (default_freezer_active) {
14691		if (map->default_freezer_handle == NULL) {
14692			map->default_freezer_handle = default_freezer_handle_allocate();
14693		}
14694
14695		if ((kr = default_freezer_handle_init(map->default_freezer_handle)) != KERN_SUCCESS) {
14696			/*
14697			 * Can happen if default_freezer_handle passed in is NULL
14698			 * Or, a table has already been allocated and associated
14699			 * with this handle, i.e. the map is already frozen.
14700			 */
14701			goto done;
14702		}
14703	}
14704
14705	for (entry2 = vm_map_first_entry(map);
14706	     entry2 != vm_map_to_entry(map);
14707	     entry2 = entry2->vme_next) {
14708
14709		vm_object_t	src_object = entry2->object.vm_object;
14710
14711		if (entry2->object.vm_object && !entry2->is_sub_map && !entry2->object.vm_object->phys_contiguous) {
14712			/* If eligible, scan the entry, moving eligible pages over to our parent object */
14713			if (default_freezer_active) {
14714				unsigned int purgeable, clean, dirty, wired;
14715				boolean_t shared;
14716
14717				default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared,
14718								src_object, map->default_freezer_handle);
14719
14720				*purgeable_count += purgeable;
14721				*wired_count += wired;
14722				*clean_count += clean;
14723				*dirty_count += dirty;
14724
14725				/* Adjust pageout budget and finish up if reached */
14726				if (dirty_budget) {
14727					dirty_budget -= dirty;
14728					if (dirty_budget == 0) {
14729						break;
14730					}
14731				}
14732
14733				if (shared) {
14734					*has_shared = TRUE;
14735				}
14736			} else {
14737				/*
14738				 * To the compressor.
14739				 */
14740				if (entry2->object.vm_object->internal == TRUE) {
14741					vm_object_pageout(entry2->object.vm_object);
14742				}
14743			}
14744		}
14745	}
14746
14747	if (default_freezer_active) {
14748		/* Finally, throw out the pages to swap */
14749		default_freezer_pageout(map->default_freezer_handle);
14750	}
14751
14752done:
14753	vm_map_unlock(map);
14754
14755	return kr;
14756}
14757
14758kern_return_t
14759vm_map_thaw(
14760	vm_map_t map)
14761{
14762	kern_return_t kr = KERN_SUCCESS;
14763
14764	if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
14765		/*
14766		 * We will on-demand thaw in the presence of the compressed pager.
14767		 */
14768		return kr;
14769	}
14770
14771	vm_map_lock(map);
14772
14773	if (map->default_freezer_handle == NULL) {
14774		/*
14775		 * This map is not in a frozen state.
14776		 */
14777		kr = KERN_FAILURE;
14778		goto out;
14779	}
14780
14781	kr = default_freezer_unpack(map->default_freezer_handle);
14782out:
14783	vm_map_unlock(map);
14784
14785	return kr;
14786}
14787#endif
14788
14789/*
14790 * vm_map_entry_should_cow_for_true_share:
14791 *
14792 * Determines if the map entry should be clipped and setup for copy-on-write
14793 * to avoid applying "true_share" to a large VM object when only a subset is
14794 * targeted.
14795 *
14796 * For now, we target only the map entries created for the Objective C
14797 * Garbage Collector, which initially have the following properties:
14798 *	- alias == VM_MEMORY_MALLOC
14799 * 	- wired_count == 0
14800 * 	- !needs_copy
14801 * and a VM object with:
14802 * 	- internal
14803 * 	- copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
14804 * 	- !true_share
14805 * 	- vo_size == ANON_CHUNK_SIZE
14806 */
14807boolean_t
14808vm_map_entry_should_cow_for_true_share(
14809	vm_map_entry_t	entry)
14810{
14811	vm_object_t	object;
14812
14813	if (entry->is_sub_map) {
14814		/* entry does not point at a VM object */
14815		return FALSE;
14816	}
14817
14818	if (entry->needs_copy) {
14819		/* already set for copy_on_write: done! */
14820		return FALSE;
14821	}
14822
14823	if (entry->alias != VM_MEMORY_MALLOC &&
14824	    entry->alias != VM_MEMORY_MALLOC_SMALL) {
14825		/* not a malloc heap or Obj-C Garbage Collector heap */
14826		return FALSE;
14827	}
14828
14829	if (entry->wired_count) {
14830		/* wired: can't change the map entry... */
14831		vm_counters.should_cow_but_wired++;
14832		return FALSE;
14833	}
14834
14835	object = entry->object.vm_object;
14836
14837	if (object == VM_OBJECT_NULL) {
14838		/* no object yet... */
14839		return FALSE;
14840	}
14841
14842	if (!object->internal) {
14843		/* not an internal object */
14844		return FALSE;
14845	}
14846
14847	if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
14848		/* not the default copy strategy */
14849		return FALSE;
14850	}
14851
14852	if (object->true_share) {
14853		/* already true_share: too late to avoid it */
14854		return FALSE;
14855	}
14856
14857	if (entry->alias == VM_MEMORY_MALLOC &&
14858	    object->vo_size != ANON_CHUNK_SIZE) {
14859		/* ... not an object created for the ObjC Garbage Collector */
14860		return FALSE;
14861	}
14862
14863	if (entry->alias == VM_MEMORY_MALLOC_SMALL &&
14864	    object->vo_size != 2048 * 4096) {
14865		/* ... not a "MALLOC_SMALL" heap */
14866		return FALSE;
14867	}
14868
14869	/*
14870	 * All the criteria match: we have a large object being targeted for "true_share".
14871	 * To limit the adverse side-effects linked with "true_share", tell the caller to
14872	 * try and avoid setting up the entire object for "true_share" by clipping the
14873	 * targeted range and setting it up for copy-on-write.
14874	 */
14875	return TRUE;
14876}
14877
14878vm_map_offset_t
14879vm_map_round_page_mask(
14880 	vm_map_offset_t	offset,
14881	vm_map_offset_t	mask)
14882{
14883	return VM_MAP_ROUND_PAGE(offset, mask);
14884}
14885
14886vm_map_offset_t
14887vm_map_trunc_page_mask(
14888	vm_map_offset_t	offset,
14889	vm_map_offset_t	mask)
14890{
14891	return VM_MAP_TRUNC_PAGE(offset, mask);
14892}
14893
14894int
14895vm_map_page_shift(
14896	vm_map_t map)
14897{
14898	return VM_MAP_PAGE_SHIFT(map);
14899}
14900
14901int
14902vm_map_page_size(
14903	vm_map_t map)
14904{
14905	return VM_MAP_PAGE_SIZE(map);
14906}
14907
14908int
14909vm_map_page_mask(
14910	vm_map_t map)
14911{
14912	return VM_MAP_PAGE_MASK(map);
14913}
14914
14915kern_return_t
14916vm_map_set_page_shift(
14917	vm_map_t  	map,
14918	int		pageshift)
14919{
14920	if (map->hdr.nentries != 0) {
14921		/* too late to change page size */
14922		return KERN_FAILURE;
14923	}
14924
14925	map->hdr.page_shift = pageshift;
14926
14927	return KERN_SUCCESS;
14928}
14929
14930int
14931vm_map_purge(
14932	vm_map_t	map)
14933{
14934	int		num_object_purged;
14935	vm_map_entry_t	entry;
14936	vm_map_offset_t	next_address;
14937	vm_object_t	object;
14938	int		state;
14939	kern_return_t	kr;
14940
14941	num_object_purged = 0;
14942
14943	vm_map_lock_read(map);
14944	entry = vm_map_first_entry(map);
14945	while (entry != vm_map_to_entry(map)) {
14946		if (entry->is_sub_map) {
14947			goto next;
14948		}
14949		if (! (entry->protection & VM_PROT_WRITE)) {
14950			goto next;
14951		}
14952		object = entry->object.vm_object;
14953		if (object == VM_OBJECT_NULL) {
14954			goto next;
14955		}
14956		if (object->purgable != VM_PURGABLE_VOLATILE) {
14957			goto next;
14958		}
14959
14960		vm_object_lock(object);
14961#if 00
14962		if (entry->offset != 0 ||
14963		    (entry->vme_end - entry->vme_start) != object->vo_size) {
14964			vm_object_unlock(object);
14965			goto next;
14966		}
14967#endif
14968		next_address = entry->vme_end;
14969		vm_map_unlock_read(map);
14970		state = VM_PURGABLE_EMPTY;
14971		kr = vm_object_purgable_control(object,
14972						VM_PURGABLE_SET_STATE,
14973						&state);
14974		if (kr == KERN_SUCCESS) {
14975			num_object_purged++;
14976		}
14977		vm_object_unlock(object);
14978
14979		vm_map_lock_read(map);
14980		if (vm_map_lookup_entry(map, next_address, &entry)) {
14981			continue;
14982		}
14983	next:
14984		entry = entry->vme_next;
14985	}
14986	vm_map_unlock_read(map);
14987
14988	return num_object_purged;
14989}
14990
14991kern_return_t
14992vm_map_query_volatile(
14993	vm_map_t	map,
14994	mach_vm_size_t	*volatile_virtual_size_p,
14995	mach_vm_size_t	*volatile_resident_size_p,
14996	mach_vm_size_t	*volatile_pmap_size_p)
14997{
14998	mach_vm_size_t	volatile_virtual_size;
14999	mach_vm_size_t	volatile_resident_count;
15000	mach_vm_size_t	volatile_pmap_count;
15001	mach_vm_size_t	resident_count;
15002	vm_map_entry_t	entry;
15003	vm_object_t	object;
15004
15005	/* map should be locked by caller */
15006
15007	volatile_virtual_size = 0;
15008	volatile_resident_count = 0;
15009	volatile_pmap_count = 0;
15010
15011	for (entry = vm_map_first_entry(map);
15012	     entry != vm_map_to_entry(map);
15013	     entry = entry->vme_next) {
15014		if (entry->is_sub_map) {
15015			continue;
15016		}
15017		if (! (entry->protection & VM_PROT_WRITE)) {
15018			continue;
15019		}
15020		object = entry->object.vm_object;
15021		if (object == VM_OBJECT_NULL) {
15022			continue;
15023		}
15024		if (object->purgable != VM_PURGABLE_VOLATILE) {
15025			continue;
15026		}
15027		if (entry->offset != 0) {
15028			/*
15029			 * If the map entry has been split and the object now
15030			 * appears several times in the VM map, we don't want
15031			 * to count the object's resident_page_count more than
15032			 * once.  We count it only for the first one, starting
15033			 * at offset 0 and ignore the other VM map entries.
15034			 */
15035			continue;
15036		}
15037		resident_count = object->resident_page_count;
15038		if ((entry->offset / PAGE_SIZE) >= resident_count) {
15039			resident_count = 0;
15040		} else {
15041			resident_count -= (entry->offset / PAGE_SIZE);
15042		}
15043
15044		volatile_virtual_size += entry->vme_end - entry->vme_start;
15045		volatile_resident_count += resident_count;
15046		volatile_pmap_count += pmap_query_resident(map->pmap,
15047							   entry->vme_start,
15048							   entry->vme_end);
15049	}
15050
15051	/* map is still locked on return */
15052
15053	*volatile_virtual_size_p = volatile_virtual_size;
15054	*volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
15055	*volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
15056
15057	return KERN_SUCCESS;
15058}
15059
15060#if VM_SCAN_FOR_SHADOW_CHAIN
15061int vm_map_shadow_max(vm_map_t map);
15062int vm_map_shadow_max(
15063	vm_map_t map)
15064{
15065	int		shadows, shadows_max;
15066	vm_map_entry_t	entry;
15067	vm_object_t	object, next_object;
15068
15069	if (map == NULL)
15070		return 0;
15071
15072	shadows_max = 0;
15073
15074	vm_map_lock_read(map);
15075
15076	for (entry = vm_map_first_entry(map);
15077	     entry != vm_map_to_entry(map);
15078	     entry = entry->vme_next) {
15079		if (entry->is_sub_map) {
15080			continue;
15081		}
15082		object = entry->object.vm_object;
15083		if (object == NULL) {
15084			continue;
15085		}
15086		vm_object_lock_shared(object);
15087		for (shadows = 0;
15088		     object->shadow != NULL;
15089		     shadows++, object = next_object) {
15090			next_object = object->shadow;
15091			vm_object_lock_shared(next_object);
15092			vm_object_unlock(object);
15093		}
15094		vm_object_unlock(object);
15095		if (shadows > shadows_max) {
15096			shadows_max = shadows;
15097		}
15098	}
15099
15100	vm_map_unlock_read(map);
15101
15102	return shadows_max;
15103}
15104#endif /* VM_SCAN_FOR_SHADOW_CHAIN */
15105