1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
49 *  School of Computer Science
50 *  Carnegie Mellon University
51 *  Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 *	File:	vm/vm_map.c
60 *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
61 *	Date:	1985
62 *
63 *	Virtual memory mapping module.
64 */
65
66#include <task_swapper.h>
67#include <mach_assert.h>
68#include <libkern/OSAtomic.h>
69
70#include <mach/kern_return.h>
71#include <mach/port.h>
72#include <mach/vm_attributes.h>
73#include <mach/vm_param.h>
74#include <mach/vm_behavior.h>
75#include <mach/vm_statistics.h>
76#include <mach/memory_object.h>
77#include <mach/mach_vm.h>
78#include <machine/cpu_capabilities.h>
79#include <mach/sdt.h>
80
81#include <kern/assert.h>
82#include <kern/counters.h>
83#include <kern/kalloc.h>
84#include <kern/zalloc.h>
85
86#include <vm/cpm.h>
87#include <vm/vm_compressor_pager.h>
88#include <vm/vm_init.h>
89#include <vm/vm_fault.h>
90#include <vm/vm_map.h>
91#include <vm/vm_object.h>
92#include <vm/vm_page.h>
93#include <vm/vm_pageout.h>
94#include <vm/vm_kern.h>
95#include <ipc/ipc_port.h>
96#include <kern/sched_prim.h>
97#include <kern/misc_protos.h>
98#include <kern/xpr.h>
99
100#include <mach/vm_map_server.h>
101#include <mach/mach_host_server.h>
102#include <vm/vm_protos.h>
103#include <vm/vm_purgeable_internal.h>
104
105#include <vm/vm_protos.h>
106#include <vm/vm_shared_region.h>
107#include <vm/vm_map_store.h>
108
109extern u_int32_t random(void);	/* from <libkern/libkern.h> */
110/* Internal prototypes
111 */
112
113static void vm_map_simplify_range(
114	vm_map_t	map,
115	vm_map_offset_t	start,
116	vm_map_offset_t	end);	/* forward */
117
118static boolean_t	vm_map_range_check(
119	vm_map_t	map,
120	vm_map_offset_t	start,
121	vm_map_offset_t	end,
122	vm_map_entry_t	*entry);
123
124static vm_map_entry_t	_vm_map_entry_create(
125	struct vm_map_header	*map_header, boolean_t map_locked);
126
127static void		_vm_map_entry_dispose(
128	struct vm_map_header	*map_header,
129	vm_map_entry_t		entry);
130
131static void		vm_map_pmap_enter(
132	vm_map_t		map,
133	vm_map_offset_t 	addr,
134	vm_map_offset_t		end_addr,
135	vm_object_t 		object,
136	vm_object_offset_t	offset,
137	vm_prot_t		protection);
138
139static void		_vm_map_clip_end(
140	struct vm_map_header	*map_header,
141	vm_map_entry_t		entry,
142	vm_map_offset_t		end);
143
144static void		_vm_map_clip_start(
145	struct vm_map_header	*map_header,
146	vm_map_entry_t		entry,
147	vm_map_offset_t		start);
148
149static void		vm_map_entry_delete(
150	vm_map_t	map,
151	vm_map_entry_t	entry);
152
153static kern_return_t	vm_map_delete(
154	vm_map_t	map,
155	vm_map_offset_t	start,
156	vm_map_offset_t	end,
157	int		flags,
158	vm_map_t	zap_map);
159
160static kern_return_t	vm_map_copy_overwrite_unaligned(
161	vm_map_t	dst_map,
162	vm_map_entry_t	entry,
163	vm_map_copy_t	copy,
164	vm_map_address_t start,
165	boolean_t	discard_on_success);
166
167static kern_return_t	vm_map_copy_overwrite_aligned(
168	vm_map_t	dst_map,
169	vm_map_entry_t	tmp_entry,
170	vm_map_copy_t	copy,
171	vm_map_offset_t start,
172	pmap_t		pmap);
173
174static kern_return_t	vm_map_copyin_kernel_buffer(
175	vm_map_t	src_map,
176	vm_map_address_t src_addr,
177	vm_map_size_t	len,
178	boolean_t	src_destroy,
179	vm_map_copy_t	*copy_result);  /* OUT */
180
181static kern_return_t	vm_map_copyout_kernel_buffer(
182	vm_map_t	map,
183	vm_map_address_t *addr,	/* IN/OUT */
184	vm_map_copy_t	copy,
185	boolean_t	overwrite,
186	boolean_t	consume_on_success);
187
188static void		vm_map_fork_share(
189	vm_map_t	old_map,
190	vm_map_entry_t	old_entry,
191	vm_map_t	new_map);
192
193static boolean_t	vm_map_fork_copy(
194	vm_map_t	old_map,
195	vm_map_entry_t	*old_entry_p,
196	vm_map_t	new_map);
197
198void		vm_map_region_top_walk(
199	vm_map_entry_t		   entry,
200	vm_region_top_info_t       top);
201
202void		vm_map_region_walk(
203	vm_map_t		   map,
204	vm_map_offset_t		   va,
205	vm_map_entry_t		   entry,
206	vm_object_offset_t	   offset,
207	vm_object_size_t	   range,
208	vm_region_extended_info_t  extended,
209	boolean_t		   look_for_pages,
210	mach_msg_type_number_t count);
211
212static kern_return_t	vm_map_wire_nested(
213	vm_map_t		   map,
214	vm_map_offset_t		   start,
215	vm_map_offset_t		   end,
216	vm_prot_t		   access_type,
217	boolean_t		   user_wire,
218	pmap_t			   map_pmap,
219	vm_map_offset_t		   pmap_addr);
220
221static kern_return_t	vm_map_unwire_nested(
222	vm_map_t		   map,
223	vm_map_offset_t		   start,
224	vm_map_offset_t		   end,
225	boolean_t		   user_wire,
226	pmap_t			   map_pmap,
227	vm_map_offset_t		   pmap_addr);
228
229static kern_return_t	vm_map_overwrite_submap_recurse(
230	vm_map_t		   dst_map,
231	vm_map_offset_t		   dst_addr,
232	vm_map_size_t		   dst_size);
233
234static kern_return_t	vm_map_copy_overwrite_nested(
235	vm_map_t		   dst_map,
236	vm_map_offset_t		   dst_addr,
237	vm_map_copy_t		   copy,
238	boolean_t		   interruptible,
239	pmap_t			   pmap,
240	boolean_t		   discard_on_success);
241
242static kern_return_t	vm_map_remap_extract(
243	vm_map_t		map,
244	vm_map_offset_t		addr,
245	vm_map_size_t		size,
246	boolean_t		copy,
247	struct vm_map_header 	*map_header,
248	vm_prot_t		*cur_protection,
249	vm_prot_t		*max_protection,
250	vm_inherit_t		inheritance,
251	boolean_t		pageable);
252
253static kern_return_t	vm_map_remap_range_allocate(
254	vm_map_t		map,
255	vm_map_address_t	*address,
256	vm_map_size_t		size,
257	vm_map_offset_t		mask,
258	int			flags,
259	vm_map_entry_t		*map_entry);
260
261static void		vm_map_region_look_for_page(
262	vm_map_t		   map,
263	vm_map_offset_t            va,
264	vm_object_t		   object,
265	vm_object_offset_t	   offset,
266	int                        max_refcnt,
267	int                        depth,
268	vm_region_extended_info_t  extended,
269	mach_msg_type_number_t count);
270
271static int		vm_map_region_count_obj_refs(
272	vm_map_entry_t    	   entry,
273	vm_object_t       	   object);
274
275
276static kern_return_t	vm_map_willneed(
277	vm_map_t	map,
278	vm_map_offset_t	start,
279	vm_map_offset_t	end);
280
281static kern_return_t	vm_map_reuse_pages(
282	vm_map_t	map,
283	vm_map_offset_t	start,
284	vm_map_offset_t	end);
285
286static kern_return_t	vm_map_reusable_pages(
287	vm_map_t	map,
288	vm_map_offset_t	start,
289	vm_map_offset_t	end);
290
291static kern_return_t	vm_map_can_reuse(
292	vm_map_t	map,
293	vm_map_offset_t	start,
294	vm_map_offset_t	end);
295
296
297/*
298 * Macros to copy a vm_map_entry. We must be careful to correctly
299 * manage the wired page count. vm_map_entry_copy() creates a new
300 * map entry to the same memory - the wired count in the new entry
301 * must be set to zero. vm_map_entry_copy_full() creates a new
302 * entry that is identical to the old entry.  This preserves the
303 * wire count; it's used for map splitting and zone changing in
304 * vm_map_copyout.
305 */
306
307#define vm_map_entry_copy(NEW,OLD)	\
308MACRO_BEGIN				\
309boolean_t _vmec_reserved = (NEW)->from_reserved_zone;	\
310	*(NEW) = *(OLD);                \
311	(NEW)->is_shared = FALSE;	\
312	(NEW)->needs_wakeup = FALSE;    \
313	(NEW)->in_transition = FALSE;   \
314	(NEW)->wired_count = 0;         \
315	(NEW)->user_wired_count = 0;    \
316	(NEW)->permanent = FALSE;	\
317	(NEW)->used_for_jit = FALSE;	\
318	(NEW)->from_reserved_zone = _vmec_reserved;			\
319MACRO_END
320
321#define vm_map_entry_copy_full(NEW,OLD)			\
322MACRO_BEGIN						\
323boolean_t _vmecf_reserved = (NEW)->from_reserved_zone;	\
324(*(NEW) = *(OLD));					\
325(NEW)->from_reserved_zone = _vmecf_reserved;			\
326MACRO_END
327
328/*
329 *	Decide if we want to allow processes to execute from their data or stack areas.
330 *	override_nx() returns true if we do.  Data/stack execution can be enabled independently
331 *	for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
332 *	or allow_stack_exec to enable data execution for that type of data area for that particular
333 *	ABI (or both by or'ing the flags together).  These are initialized in the architecture
334 *	specific pmap files since the default behavior varies according to architecture.  The
335 *	main reason it varies is because of the need to provide binary compatibility with old
336 *	applications that were written before these restrictions came into being.  In the old
337 *	days, an app could execute anything it could read, but this has slowly been tightened
338 *	up over time.  The default behavior is:
339 *
340 *	32-bit PPC apps		may execute from both stack and data areas
341 *	32-bit Intel apps	may exeucte from data areas but not stack
342 *	64-bit PPC/Intel apps	may not execute from either data or stack
343 *
344 *	An application on any architecture may override these defaults by explicitly
345 *	adding PROT_EXEC permission to the page in question with the mprotect(2)
346 *	system call.  This code here just determines what happens when an app tries to
347 * 	execute from a page that lacks execute permission.
348 *
349 *	Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
350 *	default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
351 *	a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
352 *	execution from data areas for a particular binary even if the arch normally permits it. As
353 *	a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
354 *	to support some complicated use cases, notably browsers with out-of-process plugins that
355 *	are not all NX-safe.
356 */
357
358extern int allow_data_exec, allow_stack_exec;
359
360int
361override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
362{
363	int current_abi;
364
365	/*
366	 * Determine if the app is running in 32 or 64 bit mode.
367	 */
368
369	if (vm_map_is_64bit(map))
370		current_abi = VM_ABI_64;
371	else
372		current_abi = VM_ABI_32;
373
374	/*
375	 * Determine if we should allow the execution based on whether it's a
376	 * stack or data area and the current architecture.
377	 */
378
379	if (user_tag == VM_MEMORY_STACK)
380		return allow_stack_exec & current_abi;
381
382	return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
383}
384
385
386/*
387 *	Virtual memory maps provide for the mapping, protection,
388 *	and sharing of virtual memory objects.  In addition,
389 *	this module provides for an efficient virtual copy of
390 *	memory from one map to another.
391 *
392 *	Synchronization is required prior to most operations.
393 *
394 *	Maps consist of an ordered doubly-linked list of simple
395 *	entries; a single hint is used to speed up lookups.
396 *
397 *	Sharing maps have been deleted from this version of Mach.
398 *	All shared objects are now mapped directly into the respective
399 *	maps.  This requires a change in the copy on write strategy;
400 *	the asymmetric (delayed) strategy is used for shared temporary
401 *	objects instead of the symmetric (shadow) strategy.  All maps
402 *	are now "top level" maps (either task map, kernel map or submap
403 *	of the kernel map).
404 *
405 *	Since portions of maps are specified by start/end addreses,
406 *	which may not align with existing map entries, all
407 *	routines merely "clip" entries to these start/end values.
408 *	[That is, an entry is split into two, bordering at a
409 *	start or end value.]  Note that these clippings may not
410 *	always be necessary (as the two resulting entries are then
411 *	not changed); however, the clipping is done for convenience.
412 *	No attempt is currently made to "glue back together" two
413 *	abutting entries.
414 *
415 *	The symmetric (shadow) copy strategy implements virtual copy
416 *	by copying VM object references from one map to
417 *	another, and then marking both regions as copy-on-write.
418 *	It is important to note that only one writeable reference
419 *	to a VM object region exists in any map when this strategy
420 *	is used -- this means that shadow object creation can be
421 *	delayed until a write operation occurs.  The symmetric (delayed)
422 *	strategy allows multiple maps to have writeable references to
423 *	the same region of a vm object, and hence cannot delay creating
424 *	its copy objects.  See vm_object_copy_quickly() in vm_object.c.
425 *	Copying of permanent objects is completely different; see
426 *	vm_object_copy_strategically() in vm_object.c.
427 */
428
429static zone_t	vm_map_zone;		/* zone for vm_map structures */
430static zone_t	vm_map_entry_zone;	/* zone for vm_map_entry structures */
431static zone_t	vm_map_entry_reserved_zone;	/* zone with reserve for non-blocking
432					 * allocations */
433static zone_t	vm_map_copy_zone;	/* zone for vm_map_copy structures */
434
435
436/*
437 *	Placeholder object for submap operations.  This object is dropped
438 *	into the range by a call to vm_map_find, and removed when
439 *	vm_map_submap creates the submap.
440 */
441
442vm_object_t	vm_submap_object;
443
444static void		*map_data;
445static vm_size_t	map_data_size;
446static void		*kentry_data;
447static vm_size_t	kentry_data_size;
448
449#define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
450
451/* Skip acquiring locks if we're in the midst of a kernel core dump */
452unsigned int not_in_kdp = 1;
453
454unsigned int vm_map_set_cache_attr_count = 0;
455
456kern_return_t
457vm_map_set_cache_attr(
458	vm_map_t	map,
459	vm_map_offset_t	va)
460{
461	vm_map_entry_t	map_entry;
462	vm_object_t	object;
463	kern_return_t	kr = KERN_SUCCESS;
464
465	vm_map_lock_read(map);
466
467	if (!vm_map_lookup_entry(map, va, &map_entry) ||
468	    map_entry->is_sub_map) {
469		/*
470		 * that memory is not properly mapped
471		 */
472		kr = KERN_INVALID_ARGUMENT;
473		goto done;
474	}
475	object = map_entry->object.vm_object;
476
477	if (object == VM_OBJECT_NULL) {
478		/*
479		 * there should be a VM object here at this point
480		 */
481		kr = KERN_INVALID_ARGUMENT;
482		goto done;
483	}
484	vm_object_lock(object);
485	object->set_cache_attr = TRUE;
486	vm_object_unlock(object);
487
488	vm_map_set_cache_attr_count++;
489done:
490	vm_map_unlock_read(map);
491
492	return kr;
493}
494
495
496#if CONFIG_CODE_DECRYPTION
497/*
498 * vm_map_apple_protected:
499 * This remaps the requested part of the object with an object backed by
500 * the decrypting pager.
501 * crypt_info contains entry points and session data for the crypt module.
502 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
503 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
504 */
505kern_return_t
506vm_map_apple_protected(
507	vm_map_t	map,
508	vm_map_offset_t	start,
509	vm_map_offset_t	end,
510	struct pager_crypt_info *crypt_info)
511{
512	boolean_t	map_locked;
513	kern_return_t	kr;
514	vm_map_entry_t	map_entry;
515	memory_object_t	protected_mem_obj;
516	vm_object_t	protected_object;
517	vm_map_offset_t	map_addr;
518
519	vm_map_lock_read(map);
520	map_locked = TRUE;
521
522	/* lookup the protected VM object */
523	if (!vm_map_lookup_entry(map,
524				 start,
525				 &map_entry) ||
526	    map_entry->vme_end < end ||
527	    map_entry->is_sub_map) {
528		/* that memory is not properly mapped */
529		kr = KERN_INVALID_ARGUMENT;
530		goto done;
531	}
532	protected_object = map_entry->object.vm_object;
533	if (protected_object == VM_OBJECT_NULL) {
534		/* there should be a VM object here at this point */
535		kr = KERN_INVALID_ARGUMENT;
536		goto done;
537	}
538
539	/* make sure protected object stays alive while map is unlocked */
540	vm_object_reference(protected_object);
541
542	vm_map_unlock_read(map);
543	map_locked = FALSE;
544
545	/*
546	 * Lookup (and create if necessary) the protected memory object
547	 * matching that VM object.
548	 * If successful, this also grabs a reference on the memory object,
549	 * to guarantee that it doesn't go away before we get a chance to map
550	 * it.
551	 */
552	protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
553
554	/* release extra ref on protected object */
555	vm_object_deallocate(protected_object);
556
557	if (protected_mem_obj == NULL) {
558		kr = KERN_FAILURE;
559		goto done;
560	}
561
562	/* map this memory object in place of the current one */
563	map_addr = start;
564	kr = vm_map_enter_mem_object(map,
565				     &map_addr,
566				     end - start,
567				     (mach_vm_offset_t) 0,
568				     VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
569				     (ipc_port_t) protected_mem_obj,
570				     (map_entry->offset +
571				      (start - map_entry->vme_start)),
572				     TRUE,
573				     map_entry->protection,
574				     map_entry->max_protection,
575				     map_entry->inheritance);
576	assert(map_addr == start);
577	/*
578	 * Release the reference obtained by apple_protect_pager_setup().
579	 * The mapping (if it succeeded) is now holding a reference on the
580	 * memory object.
581	 */
582	memory_object_deallocate(protected_mem_obj);
583
584done:
585	if (map_locked) {
586		vm_map_unlock_read(map);
587	}
588	return kr;
589}
590#endif	/* CONFIG_CODE_DECRYPTION */
591
592
593lck_grp_t		vm_map_lck_grp;
594lck_grp_attr_t	vm_map_lck_grp_attr;
595lck_attr_t		vm_map_lck_attr;
596
597
598/*
599 *	vm_map_init:
600 *
601 *	Initialize the vm_map module.  Must be called before
602 *	any other vm_map routines.
603 *
604 *	Map and entry structures are allocated from zones -- we must
605 *	initialize those zones.
606 *
607 *	There are three zones of interest:
608 *
609 *	vm_map_zone:		used to allocate maps.
610 *	vm_map_entry_zone:	used to allocate map entries.
611 *	vm_map_entry_reserved_zone:	fallback zone for kernel map entries
612 *
613 *	The kernel allocates map entries from a special zone that is initially
614 *	"crammed" with memory.  It would be difficult (perhaps impossible) for
615 *	the kernel to allocate more memory to a entry zone when it became
616 *	empty since the very act of allocating memory implies the creation
617 *	of a new entry.
618 */
619void
620vm_map_init(
621	void)
622{
623	vm_size_t entry_zone_alloc_size;
624	const char *mez_name = "VM map entries";
625
626	vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
627			    PAGE_SIZE, "maps");
628	zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
629#if	defined(__LP64__)
630	entry_zone_alloc_size = PAGE_SIZE * 5;
631#else
632	entry_zone_alloc_size = PAGE_SIZE * 6;
633#endif
634	vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
635				  1024*1024, entry_zone_alloc_size,
636				  mez_name);
637	zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
638	zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
639	zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
640
641	vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
642				   kentry_data_size * 64, kentry_data_size,
643				   "Reserved VM map entries");
644	zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
645
646	vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
647				 16*1024, PAGE_SIZE, "VM map copies");
648	zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
649
650	/*
651	 *	Cram the map and kentry zones with initial data.
652	 *	Set reserved_zone non-collectible to aid zone_gc().
653	 */
654	zone_change(vm_map_zone, Z_COLLECT, FALSE);
655
656	zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
657	zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
658	zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
659	zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
660	zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
661	zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
662	zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
663
664	zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
665	zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
666
667	lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
668	lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
669	lck_attr_setdefault(&vm_map_lck_attr);
670
671#if CONFIG_FREEZE
672	default_freezer_init();
673#endif /* CONFIG_FREEZE */
674}
675
676void
677vm_map_steal_memory(
678	void)
679{
680	uint32_t kentry_initial_pages;
681
682	map_data_size = round_page(10 * sizeof(struct _vm_map));
683	map_data = pmap_steal_memory(map_data_size);
684
685	/*
686	 * kentry_initial_pages corresponds to the number of kernel map entries
687	 * required during bootstrap until the asynchronous replenishment
688	 * scheme is activated and/or entries are available from the general
689	 * map entry pool.
690	 */
691#if	defined(__LP64__)
692	kentry_initial_pages = 10;
693#else
694	kentry_initial_pages = 6;
695#endif
696
697#if CONFIG_GZALLOC
698	/* If using the guard allocator, reserve more memory for the kernel
699	 * reserved map entry pool.
700	*/
701	if (gzalloc_enabled())
702		kentry_initial_pages *= 1024;
703#endif
704
705	kentry_data_size = kentry_initial_pages * PAGE_SIZE;
706	kentry_data = pmap_steal_memory(kentry_data_size);
707}
708
709void vm_kernel_reserved_entry_init(void) {
710	zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
711}
712
713/*
714 *	vm_map_create:
715 *
716 *	Creates and returns a new empty VM map with
717 *	the given physical map structure, and having
718 *	the given lower and upper address bounds.
719 */
720vm_map_t
721vm_map_create(
722	pmap_t			pmap,
723	vm_map_offset_t	min,
724	vm_map_offset_t	max,
725	boolean_t		pageable)
726{
727	static int		color_seed = 0;
728	register vm_map_t	result;
729
730	result = (vm_map_t) zalloc(vm_map_zone);
731	if (result == VM_MAP_NULL)
732		panic("vm_map_create");
733
734	vm_map_first_entry(result) = vm_map_to_entry(result);
735	vm_map_last_entry(result)  = vm_map_to_entry(result);
736	result->hdr.nentries = 0;
737	result->hdr.entries_pageable = pageable;
738
739	vm_map_store_init( &(result->hdr) );
740
741	result->hdr.page_shift = PAGE_SHIFT;
742
743	result->size = 0;
744	result->user_wire_limit = MACH_VM_MAX_ADDRESS;	/* default limit is unlimited */
745	result->user_wire_size  = 0;
746	result->ref_count = 1;
747#if	TASK_SWAPPER
748	result->res_count = 1;
749	result->sw_state = MAP_SW_IN;
750#endif	/* TASK_SWAPPER */
751	result->pmap = pmap;
752	result->min_offset = min;
753	result->max_offset = max;
754	result->wiring_required = FALSE;
755	result->no_zero_fill = FALSE;
756	result->mapped_in_other_pmaps = FALSE;
757	result->wait_for_space = FALSE;
758	result->switch_protect = FALSE;
759	result->disable_vmentry_reuse = FALSE;
760	result->map_disallow_data_exec = FALSE;
761	result->highest_entry_end = 0;
762	result->first_free = vm_map_to_entry(result);
763	result->hint = vm_map_to_entry(result);
764	result->color_rr = (color_seed++) & vm_color_mask;
765 	result->jit_entry_exists = FALSE;
766#if CONFIG_FREEZE
767	result->default_freezer_handle = NULL;
768#endif
769	vm_map_lock_init(result);
770	lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
771
772	return(result);
773}
774
775/*
776 *	vm_map_entry_create:	[ internal use only ]
777 *
778 *	Allocates a VM map entry for insertion in the
779 *	given map (or map copy).  No fields are filled.
780 */
781#define	vm_map_entry_create(map, map_locked)	_vm_map_entry_create(&(map)->hdr, map_locked)
782
783#define	vm_map_copy_entry_create(copy, map_locked)					\
784	_vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
785unsigned reserved_zalloc_count, nonreserved_zalloc_count;
786
787static vm_map_entry_t
788_vm_map_entry_create(
789	struct vm_map_header	*map_header, boolean_t __unused map_locked)
790{
791	zone_t	zone;
792	vm_map_entry_t	entry;
793
794	zone = vm_map_entry_zone;
795
796	assert(map_header->entries_pageable ? !map_locked : TRUE);
797
798	if (map_header->entries_pageable) {
799		entry = (vm_map_entry_t) zalloc(zone);
800	}
801	else {
802		entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
803
804		if (entry == VM_MAP_ENTRY_NULL) {
805			zone = vm_map_entry_reserved_zone;
806			entry = (vm_map_entry_t) zalloc(zone);
807			OSAddAtomic(1, &reserved_zalloc_count);
808		} else
809			OSAddAtomic(1, &nonreserved_zalloc_count);
810	}
811
812	if (entry == VM_MAP_ENTRY_NULL)
813		panic("vm_map_entry_create");
814	entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
815
816	vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
817#if	MAP_ENTRY_CREATION_DEBUG
818	entry->vme_creation_maphdr = map_header;
819	fastbacktrace(&entry->vme_creation_bt[0],
820		      (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t)));
821#endif
822	return(entry);
823}
824
825/*
826 *	vm_map_entry_dispose:	[ internal use only ]
827 *
828 *	Inverse of vm_map_entry_create.
829 *
830 * 	write map lock held so no need to
831 *	do anything special to insure correctness
832 * 	of the stores
833 */
834#define	vm_map_entry_dispose(map, entry)			\
835	_vm_map_entry_dispose(&(map)->hdr, (entry))
836
837#define	vm_map_copy_entry_dispose(map, entry) \
838	_vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
839
840static void
841_vm_map_entry_dispose(
842	register struct vm_map_header	*map_header,
843	register vm_map_entry_t		entry)
844{
845	register zone_t		zone;
846
847	if (map_header->entries_pageable || !(entry->from_reserved_zone))
848		zone = vm_map_entry_zone;
849	else
850		zone = vm_map_entry_reserved_zone;
851
852	if (!map_header->entries_pageable) {
853		if (zone == vm_map_entry_zone)
854			OSAddAtomic(-1, &nonreserved_zalloc_count);
855		else
856			OSAddAtomic(-1, &reserved_zalloc_count);
857	}
858
859	zfree(zone, entry);
860}
861
862#if MACH_ASSERT
863static boolean_t first_free_check = FALSE;
864boolean_t
865first_free_is_valid(
866	vm_map_t	map)
867{
868	if (!first_free_check)
869		return TRUE;
870
871	return( first_free_is_valid_store( map ));
872}
873#endif /* MACH_ASSERT */
874
875
876#define vm_map_copy_entry_link(copy, after_where, entry)		\
877	_vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
878
879#define vm_map_copy_entry_unlink(copy, entry)				\
880	_vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
881
882#if	MACH_ASSERT && TASK_SWAPPER
883/*
884 *	vm_map_res_reference:
885 *
886 *	Adds another valid residence count to the given map.
887 *
888 *	Map is locked so this function can be called from
889 *	vm_map_swapin.
890 *
891 */
892void vm_map_res_reference(register vm_map_t map)
893{
894	/* assert map is locked */
895	assert(map->res_count >= 0);
896	assert(map->ref_count >= map->res_count);
897	if (map->res_count == 0) {
898		lck_mtx_unlock(&map->s_lock);
899		vm_map_lock(map);
900		vm_map_swapin(map);
901		lck_mtx_lock(&map->s_lock);
902		++map->res_count;
903		vm_map_unlock(map);
904	} else
905		++map->res_count;
906}
907
908/*
909 *	vm_map_reference_swap:
910 *
911 *	Adds valid reference and residence counts to the given map.
912 *
913 *	The map may not be in memory (i.e. zero residence count).
914 *
915 */
916void vm_map_reference_swap(register vm_map_t map)
917{
918	assert(map != VM_MAP_NULL);
919	lck_mtx_lock(&map->s_lock);
920	assert(map->res_count >= 0);
921	assert(map->ref_count >= map->res_count);
922	map->ref_count++;
923	vm_map_res_reference(map);
924	lck_mtx_unlock(&map->s_lock);
925}
926
927/*
928 *	vm_map_res_deallocate:
929 *
930 *	Decrement residence count on a map; possibly causing swapout.
931 *
932 *	The map must be in memory (i.e. non-zero residence count).
933 *
934 *	The map is locked, so this function is callable from vm_map_deallocate.
935 *
936 */
937void vm_map_res_deallocate(register vm_map_t map)
938{
939	assert(map->res_count > 0);
940	if (--map->res_count == 0) {
941		lck_mtx_unlock(&map->s_lock);
942		vm_map_lock(map);
943		vm_map_swapout(map);
944		vm_map_unlock(map);
945		lck_mtx_lock(&map->s_lock);
946	}
947	assert(map->ref_count >= map->res_count);
948}
949#endif	/* MACH_ASSERT && TASK_SWAPPER */
950
951/*
952 *	vm_map_destroy:
953 *
954 *	Actually destroy a map.
955 */
956void
957vm_map_destroy(
958	vm_map_t	map,
959	int		flags)
960{
961	vm_map_lock(map);
962
963	/* clean up regular map entries */
964	(void) vm_map_delete(map, map->min_offset, map->max_offset,
965			     flags, VM_MAP_NULL);
966	/* clean up leftover special mappings (commpage, etc...) */
967	(void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
968			     flags, VM_MAP_NULL);
969
970#if CONFIG_FREEZE
971	if (map->default_freezer_handle) {
972		default_freezer_handle_deallocate(map->default_freezer_handle);
973		map->default_freezer_handle = NULL;
974	}
975#endif
976	vm_map_unlock(map);
977
978	assert(map->hdr.nentries == 0);
979
980	if(map->pmap)
981		pmap_destroy(map->pmap);
982
983	zfree(vm_map_zone, map);
984}
985
986#if	TASK_SWAPPER
987/*
988 * vm_map_swapin/vm_map_swapout
989 *
990 * Swap a map in and out, either referencing or releasing its resources.
991 * These functions are internal use only; however, they must be exported
992 * because they may be called from macros, which are exported.
993 *
994 * In the case of swapout, there could be races on the residence count,
995 * so if the residence count is up, we return, assuming that a
996 * vm_map_deallocate() call in the near future will bring us back.
997 *
998 * Locking:
999 *	-- We use the map write lock for synchronization among races.
1000 *	-- The map write lock, and not the simple s_lock, protects the
1001 *	   swap state of the map.
1002 *	-- If a map entry is a share map, then we hold both locks, in
1003 *	   hierarchical order.
1004 *
1005 * Synchronization Notes:
1006 *	1) If a vm_map_swapin() call happens while swapout in progress, it
1007 *	will block on the map lock and proceed when swapout is through.
1008 *	2) A vm_map_reference() call at this time is illegal, and will
1009 *	cause a panic.  vm_map_reference() is only allowed on resident
1010 *	maps, since it refuses to block.
1011 *	3) A vm_map_swapin() call during a swapin will block, and
1012 *	proceeed when the first swapin is done, turning into a nop.
1013 *	This is the reason the res_count is not incremented until
1014 *	after the swapin is complete.
1015 *	4) There is a timing hole after the checks of the res_count, before
1016 *	the map lock is taken, during which a swapin may get the lock
1017 *	before a swapout about to happen.  If this happens, the swapin
1018 *	will detect the state and increment the reference count, causing
1019 *	the swapout to be a nop, thereby delaying it until a later
1020 *	vm_map_deallocate.  If the swapout gets the lock first, then
1021 *	the swapin will simply block until the swapout is done, and
1022 *	then proceed.
1023 *
1024 * Because vm_map_swapin() is potentially an expensive operation, it
1025 * should be used with caution.
1026 *
1027 * Invariants:
1028 *	1) A map with a residence count of zero is either swapped, or
1029 *	   being swapped.
1030 *	2) A map with a non-zero residence count is either resident,
1031 *	   or being swapped in.
1032 */
1033
1034int vm_map_swap_enable = 1;
1035
1036void vm_map_swapin (vm_map_t map)
1037{
1038	register vm_map_entry_t entry;
1039
1040	if (!vm_map_swap_enable)	/* debug */
1041		return;
1042
1043	/*
1044	 * Map is locked
1045	 * First deal with various races.
1046	 */
1047	if (map->sw_state == MAP_SW_IN)
1048		/*
1049		 * we raced with swapout and won.  Returning will incr.
1050		 * the res_count, turning the swapout into a nop.
1051		 */
1052		return;
1053
1054	/*
1055	 * The residence count must be zero.  If we raced with another
1056	 * swapin, the state would have been IN; if we raced with a
1057	 * swapout (after another competing swapin), we must have lost
1058	 * the race to get here (see above comment), in which case
1059	 * res_count is still 0.
1060	 */
1061	assert(map->res_count == 0);
1062
1063	/*
1064	 * There are no intermediate states of a map going out or
1065	 * coming in, since the map is locked during the transition.
1066	 */
1067	assert(map->sw_state == MAP_SW_OUT);
1068
1069	/*
1070	 * We now operate upon each map entry.  If the entry is a sub-
1071	 * or share-map, we call vm_map_res_reference upon it.
1072	 * If the entry is an object, we call vm_object_res_reference
1073	 * (this may iterate through the shadow chain).
1074	 * Note that we hold the map locked the entire time,
1075	 * even if we get back here via a recursive call in
1076	 * vm_map_res_reference.
1077	 */
1078	entry = vm_map_first_entry(map);
1079
1080	while (entry != vm_map_to_entry(map)) {
1081		if (entry->object.vm_object != VM_OBJECT_NULL) {
1082			if (entry->is_sub_map) {
1083				vm_map_t lmap = entry->object.sub_map;
1084				lck_mtx_lock(&lmap->s_lock);
1085				vm_map_res_reference(lmap);
1086				lck_mtx_unlock(&lmap->s_lock);
1087			} else {
1088				vm_object_t object = entry->object.vm_object;
1089				vm_object_lock(object);
1090				/*
1091				 * This call may iterate through the
1092				 * shadow chain.
1093				 */
1094				vm_object_res_reference(object);
1095				vm_object_unlock(object);
1096			}
1097		}
1098		entry = entry->vme_next;
1099	}
1100	assert(map->sw_state == MAP_SW_OUT);
1101	map->sw_state = MAP_SW_IN;
1102}
1103
1104void vm_map_swapout(vm_map_t map)
1105{
1106	register vm_map_entry_t entry;
1107
1108	/*
1109	 * Map is locked
1110	 * First deal with various races.
1111	 * If we raced with a swapin and lost, the residence count
1112	 * will have been incremented to 1, and we simply return.
1113	 */
1114	lck_mtx_lock(&map->s_lock);
1115	if (map->res_count != 0) {
1116		lck_mtx_unlock(&map->s_lock);
1117		return;
1118	}
1119	lck_mtx_unlock(&map->s_lock);
1120
1121	/*
1122	 * There are no intermediate states of a map going out or
1123	 * coming in, since the map is locked during the transition.
1124	 */
1125	assert(map->sw_state == MAP_SW_IN);
1126
1127	if (!vm_map_swap_enable)
1128		return;
1129
1130	/*
1131	 * We now operate upon each map entry.  If the entry is a sub-
1132	 * or share-map, we call vm_map_res_deallocate upon it.
1133	 * If the entry is an object, we call vm_object_res_deallocate
1134	 * (this may iterate through the shadow chain).
1135	 * Note that we hold the map locked the entire time,
1136	 * even if we get back here via a recursive call in
1137	 * vm_map_res_deallocate.
1138	 */
1139	entry = vm_map_first_entry(map);
1140
1141	while (entry != vm_map_to_entry(map)) {
1142		if (entry->object.vm_object != VM_OBJECT_NULL) {
1143			if (entry->is_sub_map) {
1144				vm_map_t lmap = entry->object.sub_map;
1145				lck_mtx_lock(&lmap->s_lock);
1146				vm_map_res_deallocate(lmap);
1147				lck_mtx_unlock(&lmap->s_lock);
1148			} else {
1149				vm_object_t object = entry->object.vm_object;
1150				vm_object_lock(object);
1151				/*
1152				 * This call may take a long time,
1153				 * since it could actively push
1154				 * out pages (if we implement it
1155				 * that way).
1156				 */
1157				vm_object_res_deallocate(object);
1158				vm_object_unlock(object);
1159			}
1160		}
1161		entry = entry->vme_next;
1162	}
1163	assert(map->sw_state == MAP_SW_IN);
1164	map->sw_state = MAP_SW_OUT;
1165}
1166
1167#endif	/* TASK_SWAPPER */
1168
1169/*
1170 *	vm_map_lookup_entry:	[ internal use only ]
1171 *
1172 *	Calls into the vm map store layer to find the map
1173 *	entry containing (or immediately preceding) the
1174 *	specified address in the given map; the entry is returned
1175 *	in the "entry" parameter.  The boolean
1176 *	result indicates whether the address is
1177 *	actually contained in the map.
1178 */
1179boolean_t
1180vm_map_lookup_entry(
1181	register vm_map_t		map,
1182	register vm_map_offset_t	address,
1183	vm_map_entry_t		*entry)		/* OUT */
1184{
1185	return ( vm_map_store_lookup_entry( map, address, entry ));
1186}
1187
1188/*
1189 *	Routine:	vm_map_find_space
1190 *	Purpose:
1191 *		Allocate a range in the specified virtual address map,
1192 *		returning the entry allocated for that range.
1193 *		Used by kmem_alloc, etc.
1194 *
1195 *		The map must be NOT be locked. It will be returned locked
1196 *		on KERN_SUCCESS, unlocked on failure.
1197 *
1198 *		If an entry is allocated, the object/offset fields
1199 *		are initialized to zero.
1200 */
1201kern_return_t
1202vm_map_find_space(
1203	register vm_map_t	map,
1204	vm_map_offset_t		*address,	/* OUT */
1205	vm_map_size_t		size,
1206	vm_map_offset_t		mask,
1207	int			flags,
1208	vm_map_entry_t		*o_entry)	/* OUT */
1209{
1210	register vm_map_entry_t	entry, new_entry;
1211	register vm_map_offset_t	start;
1212	register vm_map_offset_t	end;
1213
1214	if (size == 0) {
1215		*address = 0;
1216		return KERN_INVALID_ARGUMENT;
1217	}
1218
1219	if (flags & VM_FLAGS_GUARD_AFTER) {
1220		/* account for the back guard page in the size */
1221		size += VM_MAP_PAGE_SIZE(map);
1222	}
1223
1224	new_entry = vm_map_entry_create(map, FALSE);
1225
1226	/*
1227	 *	Look for the first possible address; if there's already
1228	 *	something at this address, we have to start after it.
1229	 */
1230
1231	vm_map_lock(map);
1232
1233	if( map->disable_vmentry_reuse == TRUE) {
1234		VM_MAP_HIGHEST_ENTRY(map, entry, start);
1235	} else {
1236		assert(first_free_is_valid(map));
1237		if ((entry = map->first_free) == vm_map_to_entry(map))
1238			start = map->min_offset;
1239		else
1240			start = entry->vme_end;
1241	}
1242
1243	/*
1244	 *	In any case, the "entry" always precedes
1245	 *	the proposed new region throughout the loop:
1246	 */
1247
1248	while (TRUE) {
1249		register vm_map_entry_t	next;
1250
1251		/*
1252		 *	Find the end of the proposed new region.
1253		 *	Be sure we didn't go beyond the end, or
1254		 *	wrap around the address.
1255		 */
1256
1257		if (flags & VM_FLAGS_GUARD_BEFORE) {
1258			/* reserve space for the front guard page */
1259			start += VM_MAP_PAGE_SIZE(map);
1260		}
1261		end = ((start + mask) & ~mask);
1262
1263		if (end < start) {
1264			vm_map_entry_dispose(map, new_entry);
1265			vm_map_unlock(map);
1266			return(KERN_NO_SPACE);
1267		}
1268		start = end;
1269		end += size;
1270
1271		if ((end > map->max_offset) || (end < start)) {
1272			vm_map_entry_dispose(map, new_entry);
1273			vm_map_unlock(map);
1274			return(KERN_NO_SPACE);
1275		}
1276
1277		/*
1278		 *	If there are no more entries, we must win.
1279		 */
1280
1281		next = entry->vme_next;
1282		if (next == vm_map_to_entry(map))
1283			break;
1284
1285		/*
1286		 *	If there is another entry, it must be
1287		 *	after the end of the potential new region.
1288		 */
1289
1290		if (next->vme_start >= end)
1291			break;
1292
1293		/*
1294		 *	Didn't fit -- move to the next entry.
1295		 */
1296
1297		entry = next;
1298		start = entry->vme_end;
1299	}
1300
1301	/*
1302	 *	At this point,
1303	 *		"start" and "end" should define the endpoints of the
1304	 *			available new range, and
1305	 *		"entry" should refer to the region before the new
1306	 *			range, and
1307	 *
1308	 *		the map should be locked.
1309	 */
1310
1311	if (flags & VM_FLAGS_GUARD_BEFORE) {
1312		/* go back for the front guard page */
1313		start -= VM_MAP_PAGE_SIZE(map);
1314	}
1315	*address = start;
1316
1317	assert(start < end);
1318	new_entry->vme_start = start;
1319	new_entry->vme_end = end;
1320	assert(page_aligned(new_entry->vme_start));
1321	assert(page_aligned(new_entry->vme_end));
1322	assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1323				   VM_MAP_PAGE_MASK(map)));
1324	assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1325				   VM_MAP_PAGE_MASK(map)));
1326
1327	new_entry->is_shared = FALSE;
1328	new_entry->is_sub_map = FALSE;
1329	new_entry->use_pmap = FALSE;
1330	new_entry->object.vm_object = VM_OBJECT_NULL;
1331	new_entry->offset = (vm_object_offset_t) 0;
1332
1333	new_entry->needs_copy = FALSE;
1334
1335	new_entry->inheritance = VM_INHERIT_DEFAULT;
1336	new_entry->protection = VM_PROT_DEFAULT;
1337	new_entry->max_protection = VM_PROT_ALL;
1338	new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1339	new_entry->wired_count = 0;
1340	new_entry->user_wired_count = 0;
1341
1342	new_entry->in_transition = FALSE;
1343	new_entry->needs_wakeup = FALSE;
1344	new_entry->no_cache = FALSE;
1345	new_entry->permanent = FALSE;
1346	new_entry->superpage_size = FALSE;
1347	if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1348		new_entry->map_aligned = TRUE;
1349	} else {
1350		new_entry->map_aligned = FALSE;
1351	}
1352
1353	new_entry->used_for_jit = 0;
1354
1355	new_entry->alias = 0;
1356	new_entry->zero_wired_pages = FALSE;
1357
1358	VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1359
1360	/*
1361	 *	Insert the new entry into the list
1362	 */
1363
1364	vm_map_store_entry_link(map, entry, new_entry);
1365
1366	map->size += size;
1367
1368	/*
1369	 *	Update the lookup hint
1370	 */
1371	SAVE_HINT_MAP_WRITE(map, new_entry);
1372
1373	*o_entry = new_entry;
1374	return(KERN_SUCCESS);
1375}
1376
1377int vm_map_pmap_enter_print = FALSE;
1378int vm_map_pmap_enter_enable = FALSE;
1379
1380/*
1381 *	Routine:	vm_map_pmap_enter [internal only]
1382 *
1383 *	Description:
1384 *		Force pages from the specified object to be entered into
1385 *		the pmap at the specified address if they are present.
1386 *		As soon as a page not found in the object the scan ends.
1387 *
1388 *	Returns:
1389 *		Nothing.
1390 *
1391 *	In/out conditions:
1392 *		The source map should not be locked on entry.
1393 */
1394static void
1395vm_map_pmap_enter(
1396	vm_map_t		map,
1397	register vm_map_offset_t 	addr,
1398	register vm_map_offset_t	end_addr,
1399	register vm_object_t 	object,
1400	vm_object_offset_t	offset,
1401	vm_prot_t		protection)
1402{
1403	int			type_of_fault;
1404	kern_return_t		kr;
1405
1406	if(map->pmap == 0)
1407		return;
1408
1409	while (addr < end_addr) {
1410		register vm_page_t	m;
1411
1412		vm_object_lock(object);
1413
1414		m = vm_page_lookup(object, offset);
1415		/*
1416		 * ENCRYPTED SWAP:
1417		 * The user should never see encrypted data, so do not
1418		 * enter an encrypted page in the page table.
1419		 */
1420		if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1421		    m->fictitious ||
1422		    (m->unusual && ( m->error || m->restart || m->absent))) {
1423			vm_object_unlock(object);
1424			return;
1425		}
1426
1427		if (vm_map_pmap_enter_print) {
1428			printf("vm_map_pmap_enter:");
1429			printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1430			       map, (unsigned long long)addr, object, (unsigned long long)offset);
1431		}
1432		type_of_fault = DBG_CACHE_HIT_FAULT;
1433		kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1434				    VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL,
1435				    &type_of_fault);
1436
1437		vm_object_unlock(object);
1438
1439		offset += PAGE_SIZE_64;
1440		addr += PAGE_SIZE;
1441	}
1442}
1443
1444boolean_t vm_map_pmap_is_empty(
1445	vm_map_t	map,
1446	vm_map_offset_t	start,
1447	vm_map_offset_t end);
1448boolean_t vm_map_pmap_is_empty(
1449	vm_map_t	map,
1450	vm_map_offset_t	start,
1451	vm_map_offset_t	end)
1452{
1453#ifdef MACHINE_PMAP_IS_EMPTY
1454	return pmap_is_empty(map->pmap, start, end);
1455#else 	/* MACHINE_PMAP_IS_EMPTY */
1456	vm_map_offset_t	offset;
1457	ppnum_t		phys_page;
1458
1459	if (map->pmap == NULL) {
1460		return TRUE;
1461	}
1462
1463	for (offset = start;
1464	     offset < end;
1465	     offset += PAGE_SIZE) {
1466		phys_page = pmap_find_phys(map->pmap, offset);
1467		if (phys_page) {
1468			kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1469				"page %d at 0x%llx\n",
1470				map, (long long)start, (long long)end,
1471				phys_page, (long long)offset);
1472			return FALSE;
1473		}
1474	}
1475	return TRUE;
1476#endif	/* MACHINE_PMAP_IS_EMPTY */
1477}
1478
1479#define MAX_TRIES_TO_GET_RANDOM_ADDRESS	1000
1480kern_return_t
1481vm_map_random_address_for_size(
1482	vm_map_t	map,
1483	vm_map_offset_t	*address,
1484	vm_map_size_t	size)
1485{
1486	kern_return_t	kr = KERN_SUCCESS;
1487	int		tries = 0;
1488	vm_map_offset_t	random_addr = 0;
1489	vm_map_offset_t hole_end;
1490
1491	vm_map_entry_t	next_entry = VM_MAP_ENTRY_NULL;
1492	vm_map_entry_t	prev_entry = VM_MAP_ENTRY_NULL;
1493	vm_map_size_t	vm_hole_size = 0;
1494	vm_map_size_t	addr_space_size;
1495
1496	addr_space_size = vm_map_max(map) - vm_map_min(map);
1497
1498	assert(page_aligned(size));
1499
1500	while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1501		random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
1502		random_addr = vm_map_trunc_page(
1503			vm_map_min(map) +(random_addr % addr_space_size),
1504			VM_MAP_PAGE_MASK(map));
1505
1506		if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
1507			if (prev_entry == vm_map_to_entry(map)) {
1508				next_entry = vm_map_first_entry(map);
1509			} else {
1510				next_entry = prev_entry->vme_next;
1511			}
1512			if (next_entry == vm_map_to_entry(map)) {
1513				hole_end = vm_map_max(map);
1514			} else {
1515				hole_end = next_entry->vme_start;
1516			}
1517			vm_hole_size = hole_end - random_addr;
1518			if (vm_hole_size >= size) {
1519				*address = random_addr;
1520				break;
1521			}
1522		}
1523		tries++;
1524	}
1525
1526	if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1527		kr = KERN_NO_SPACE;
1528	}
1529	return kr;
1530}
1531
1532/*
1533 *	Routine:	vm_map_enter
1534 *
1535 *	Description:
1536 *		Allocate a range in the specified virtual address map.
1537 *		The resulting range will refer to memory defined by
1538 *		the given memory object and offset into that object.
1539 *
1540 *		Arguments are as defined in the vm_map call.
1541 */
1542int _map_enter_debug = 0;
1543static unsigned int vm_map_enter_restore_successes = 0;
1544static unsigned int vm_map_enter_restore_failures = 0;
1545kern_return_t
1546vm_map_enter(
1547	vm_map_t		map,
1548	vm_map_offset_t		*address,	/* IN/OUT */
1549	vm_map_size_t		size,
1550	vm_map_offset_t		mask,
1551	int			flags,
1552	vm_object_t		object,
1553	vm_object_offset_t	offset,
1554	boolean_t		needs_copy,
1555	vm_prot_t		cur_protection,
1556	vm_prot_t		max_protection,
1557	vm_inherit_t		inheritance)
1558{
1559	vm_map_entry_t		entry, new_entry;
1560	vm_map_offset_t		start, tmp_start, tmp_offset;
1561	vm_map_offset_t		end, tmp_end;
1562	vm_map_offset_t		tmp2_start, tmp2_end;
1563	vm_map_offset_t		step;
1564	kern_return_t		result = KERN_SUCCESS;
1565	vm_map_t		zap_old_map = VM_MAP_NULL;
1566	vm_map_t		zap_new_map = VM_MAP_NULL;
1567	boolean_t		map_locked = FALSE;
1568	boolean_t		pmap_empty = TRUE;
1569	boolean_t		new_mapping_established = FALSE;
1570	boolean_t		anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1571	boolean_t		purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1572	boolean_t		overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1573	boolean_t		no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1574	boolean_t		is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1575	boolean_t		permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1576	boolean_t		entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0);
1577	unsigned int		superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1578	char			alias;
1579	vm_map_offset_t		effective_min_offset, effective_max_offset;
1580	kern_return_t		kr;
1581	boolean_t		clear_map_aligned = FALSE;
1582
1583	if (superpage_size) {
1584		switch (superpage_size) {
1585			/*
1586			 * Note that the current implementation only supports
1587			 * a single size for superpages, SUPERPAGE_SIZE, per
1588			 * architecture. As soon as more sizes are supposed
1589			 * to be supported, SUPERPAGE_SIZE has to be replaced
1590			 * with a lookup of the size depending on superpage_size.
1591			 */
1592#ifdef __x86_64__
1593			case SUPERPAGE_SIZE_ANY:
1594				/* handle it like 2 MB and round up to page size */
1595				size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
1596			case SUPERPAGE_SIZE_2MB:
1597				break;
1598#endif
1599			default:
1600				return KERN_INVALID_ARGUMENT;
1601		}
1602		mask = SUPERPAGE_SIZE-1;
1603		if (size & (SUPERPAGE_SIZE-1))
1604			return KERN_INVALID_ARGUMENT;
1605		inheritance = VM_INHERIT_NONE;	/* fork() children won't inherit superpages */
1606	}
1607
1608
1609
1610	if (is_submap) {
1611		if (purgable) {
1612			/* submaps can not be purgeable */
1613			return KERN_INVALID_ARGUMENT;
1614		}
1615		if (object == VM_OBJECT_NULL) {
1616			/* submaps can not be created lazily */
1617			return KERN_INVALID_ARGUMENT;
1618		}
1619	}
1620	if (flags & VM_FLAGS_ALREADY) {
1621		/*
1622		 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1623		 * is already present.  For it to be meaningul, the requested
1624		 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1625		 * we shouldn't try and remove what was mapped there first
1626		 * (!VM_FLAGS_OVERWRITE).
1627		 */
1628		if ((flags & VM_FLAGS_ANYWHERE) ||
1629		    (flags & VM_FLAGS_OVERWRITE)) {
1630			return KERN_INVALID_ARGUMENT;
1631		}
1632	}
1633
1634	effective_min_offset = map->min_offset;
1635
1636	if (flags & VM_FLAGS_BEYOND_MAX) {
1637		/*
1638		 * Allow an insertion beyond the map's max offset.
1639		 */
1640		if (vm_map_is_64bit(map))
1641			effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1642		else
1643			effective_max_offset = 0x00000000FFFFF000ULL;
1644	} else {
1645		effective_max_offset = map->max_offset;
1646	}
1647
1648	if (size == 0 ||
1649	    (offset & PAGE_MASK_64) != 0) {
1650		*address = 0;
1651		return KERN_INVALID_ARGUMENT;
1652	}
1653
1654	VM_GET_FLAGS_ALIAS(flags, alias);
1655
1656#define	RETURN(value)	{ result = value; goto BailOut; }
1657
1658	assert(page_aligned(*address));
1659	assert(page_aligned(size));
1660
1661	if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
1662		/*
1663		 * In most cases, the caller rounds the size up to the
1664		 * map's page size.
1665		 * If we get a size that is explicitly not map-aligned here,
1666		 * we'll have to respect the caller's wish and mark the
1667		 * mapping as "not map-aligned" to avoid tripping the
1668		 * map alignment checks later.
1669		 */
1670		clear_map_aligned = TRUE;
1671	}
1672
1673	/*
1674	 * Only zero-fill objects are allowed to be purgable.
1675	 * LP64todo - limit purgable objects to 32-bits for now
1676	 */
1677	if (purgable &&
1678	    (offset != 0 ||
1679	     (object != VM_OBJECT_NULL &&
1680	      (object->vo_size != size ||
1681	       object->purgable == VM_PURGABLE_DENY))
1682	     || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
1683		return KERN_INVALID_ARGUMENT;
1684
1685	if (!anywhere && overwrite) {
1686		/*
1687		 * Create a temporary VM map to hold the old mappings in the
1688		 * affected area while we create the new one.
1689		 * This avoids releasing the VM map lock in
1690		 * vm_map_entry_delete() and allows atomicity
1691		 * when we want to replace some mappings with a new one.
1692		 * It also allows us to restore the old VM mappings if the
1693		 * new mapping fails.
1694		 */
1695		zap_old_map = vm_map_create(PMAP_NULL,
1696					    *address,
1697					    *address + size,
1698					    map->hdr.entries_pageable);
1699		vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
1700	}
1701
1702StartAgain: ;
1703
1704	start = *address;
1705
1706	if (anywhere) {
1707		vm_map_lock(map);
1708		map_locked = TRUE;
1709
1710		if (entry_for_jit) {
1711			if (map->jit_entry_exists) {
1712				result = KERN_INVALID_ARGUMENT;
1713				goto BailOut;
1714			}
1715			/*
1716			 * Get a random start address.
1717			 */
1718			result = vm_map_random_address_for_size(map, address, size);
1719			if (result != KERN_SUCCESS) {
1720				goto BailOut;
1721			}
1722			start = *address;
1723		}
1724
1725
1726		/*
1727		 *	Calculate the first possible address.
1728		 */
1729
1730		if (start < effective_min_offset)
1731			start = effective_min_offset;
1732		if (start > effective_max_offset)
1733			RETURN(KERN_NO_SPACE);
1734
1735		/*
1736		 *	Look for the first possible address;
1737		 *	if there's already something at this
1738		 *	address, we have to start after it.
1739		 */
1740
1741		if( map->disable_vmentry_reuse == TRUE) {
1742			VM_MAP_HIGHEST_ENTRY(map, entry, start);
1743		} else {
1744			assert(first_free_is_valid(map));
1745
1746			entry = map->first_free;
1747
1748			if (entry == vm_map_to_entry(map)) {
1749				entry = NULL;
1750			} else {
1751			       if (entry->vme_next == vm_map_to_entry(map)){
1752				       /*
1753					* Hole at the end of the map.
1754					*/
1755					entry = NULL;
1756			       } else {
1757					if (start < (entry->vme_next)->vme_start ) {
1758						start = entry->vme_end;
1759						start = vm_map_round_page(start,
1760									  VM_MAP_PAGE_MASK(map));
1761					} else {
1762						/*
1763						 * Need to do a lookup.
1764						 */
1765						entry = NULL;
1766					}
1767			       }
1768			}
1769
1770			if (entry == NULL) {
1771				vm_map_entry_t	tmp_entry;
1772				if (vm_map_lookup_entry(map, start, &tmp_entry)) {
1773					assert(!entry_for_jit);
1774					start = tmp_entry->vme_end;
1775					start = vm_map_round_page(start,
1776								  VM_MAP_PAGE_MASK(map));
1777				}
1778				entry = tmp_entry;
1779			}
1780		}
1781
1782		/*
1783		 *	In any case, the "entry" always precedes
1784		 *	the proposed new region throughout the
1785		 *	loop:
1786		 */
1787
1788		while (TRUE) {
1789			register vm_map_entry_t	next;
1790
1791			/*
1792			 *	Find the end of the proposed new region.
1793			 *	Be sure we didn't go beyond the end, or
1794			 *	wrap around the address.
1795			 */
1796
1797			end = ((start + mask) & ~mask);
1798			end = vm_map_round_page(end,
1799						VM_MAP_PAGE_MASK(map));
1800			if (end < start)
1801				RETURN(KERN_NO_SPACE);
1802			start = end;
1803			assert(VM_MAP_PAGE_ALIGNED(start,
1804						   VM_MAP_PAGE_MASK(map)));
1805			end += size;
1806
1807			if ((end > effective_max_offset) || (end < start)) {
1808				if (map->wait_for_space) {
1809					if (size <= (effective_max_offset -
1810						     effective_min_offset)) {
1811						assert_wait((event_t)map,
1812							    THREAD_ABORTSAFE);
1813						vm_map_unlock(map);
1814						map_locked = FALSE;
1815						thread_block(THREAD_CONTINUE_NULL);
1816						goto StartAgain;
1817					}
1818				}
1819				RETURN(KERN_NO_SPACE);
1820			}
1821
1822			/*
1823			 *	If there are no more entries, we must win.
1824			 */
1825
1826			next = entry->vme_next;
1827			if (next == vm_map_to_entry(map))
1828				break;
1829
1830			/*
1831			 *	If there is another entry, it must be
1832			 *	after the end of the potential new region.
1833			 */
1834
1835			if (next->vme_start >= end)
1836				break;
1837
1838			/*
1839			 *	Didn't fit -- move to the next entry.
1840			 */
1841
1842			entry = next;
1843			start = entry->vme_end;
1844			start = vm_map_round_page(start,
1845						  VM_MAP_PAGE_MASK(map));
1846		}
1847		*address = start;
1848		assert(VM_MAP_PAGE_ALIGNED(*address,
1849					   VM_MAP_PAGE_MASK(map)));
1850	} else {
1851		/*
1852		 *	Verify that:
1853		 *		the address doesn't itself violate
1854		 *		the mask requirement.
1855		 */
1856
1857		vm_map_lock(map);
1858		map_locked = TRUE;
1859		if ((start & mask) != 0)
1860			RETURN(KERN_NO_SPACE);
1861
1862		/*
1863		 *	...	the address is within bounds
1864		 */
1865
1866		end = start + size;
1867
1868		if ((start < effective_min_offset) ||
1869		    (end > effective_max_offset) ||
1870		    (start >= end)) {
1871			RETURN(KERN_INVALID_ADDRESS);
1872		}
1873
1874		if (overwrite && zap_old_map != VM_MAP_NULL) {
1875			/*
1876			 * Fixed mapping and "overwrite" flag: attempt to
1877			 * remove all existing mappings in the specified
1878			 * address range, saving them in our "zap_old_map".
1879			 */
1880			(void) vm_map_delete(map, start, end,
1881					     VM_MAP_REMOVE_SAVE_ENTRIES,
1882					     zap_old_map);
1883		}
1884
1885		/*
1886		 *	...	the starting address isn't allocated
1887		 */
1888
1889		if (vm_map_lookup_entry(map, start, &entry)) {
1890			if (! (flags & VM_FLAGS_ALREADY)) {
1891				RETURN(KERN_NO_SPACE);
1892			}
1893			/*
1894			 * Check if what's already there is what we want.
1895			 */
1896			tmp_start = start;
1897			tmp_offset = offset;
1898			if (entry->vme_start < start) {
1899				tmp_start -= start - entry->vme_start;
1900				tmp_offset -= start - entry->vme_start;
1901
1902			}
1903			for (; entry->vme_start < end;
1904			     entry = entry->vme_next) {
1905				/*
1906				 * Check if the mapping's attributes
1907				 * match the existing map entry.
1908				 */
1909				if (entry == vm_map_to_entry(map) ||
1910				    entry->vme_start != tmp_start ||
1911				    entry->is_sub_map != is_submap ||
1912				    entry->offset != tmp_offset ||
1913				    entry->needs_copy != needs_copy ||
1914				    entry->protection != cur_protection ||
1915				    entry->max_protection != max_protection ||
1916				    entry->inheritance != inheritance ||
1917				    entry->alias != alias) {
1918					/* not the same mapping ! */
1919					RETURN(KERN_NO_SPACE);
1920				}
1921				/*
1922				 * Check if the same object is being mapped.
1923				 */
1924				if (is_submap) {
1925					if (entry->object.sub_map !=
1926					    (vm_map_t) object) {
1927						/* not the same submap */
1928						RETURN(KERN_NO_SPACE);
1929					}
1930				} else {
1931					if (entry->object.vm_object != object) {
1932						/* not the same VM object... */
1933						vm_object_t obj2;
1934
1935						obj2 = entry->object.vm_object;
1936						if ((obj2 == VM_OBJECT_NULL ||
1937						     obj2->internal) &&
1938						    (object == VM_OBJECT_NULL ||
1939						     object->internal)) {
1940							/*
1941							 * ... but both are
1942							 * anonymous memory,
1943							 * so equivalent.
1944							 */
1945						} else {
1946							RETURN(KERN_NO_SPACE);
1947						}
1948					}
1949				}
1950
1951				tmp_offset += entry->vme_end - entry->vme_start;
1952				tmp_start += entry->vme_end - entry->vme_start;
1953				if (entry->vme_end >= end) {
1954					/* reached the end of our mapping */
1955					break;
1956				}
1957			}
1958			/* it all matches:  let's use what's already there ! */
1959			RETURN(KERN_MEMORY_PRESENT);
1960		}
1961
1962		/*
1963		 *	...	the next region doesn't overlap the
1964		 *		end point.
1965		 */
1966
1967		if ((entry->vme_next != vm_map_to_entry(map)) &&
1968		    (entry->vme_next->vme_start < end))
1969			RETURN(KERN_NO_SPACE);
1970	}
1971
1972	/*
1973	 *	At this point,
1974	 *		"start" and "end" should define the endpoints of the
1975	 *			available new range, and
1976	 *		"entry" should refer to the region before the new
1977	 *			range, and
1978	 *
1979	 *		the map should be locked.
1980	 */
1981
1982	/*
1983	 *	See whether we can avoid creating a new entry (and object) by
1984	 *	extending one of our neighbors.  [So far, we only attempt to
1985	 *	extend from below.]  Note that we can never extend/join
1986	 *	purgable objects because they need to remain distinct
1987	 *	entities in order to implement their "volatile object"
1988	 *	semantics.
1989	 */
1990
1991	if (purgable || entry_for_jit) {
1992		if (object == VM_OBJECT_NULL) {
1993			object = vm_object_allocate(size);
1994			object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1995			if (purgable) {
1996				object->purgable = VM_PURGABLE_NONVOLATILE;
1997			}
1998			offset = (vm_object_offset_t)0;
1999		}
2000	} else if ((is_submap == FALSE) &&
2001		   (object == VM_OBJECT_NULL) &&
2002		   (entry != vm_map_to_entry(map)) &&
2003		   (entry->vme_end == start) &&
2004		   (!entry->is_shared) &&
2005		   (!entry->is_sub_map) &&
2006		   ((alias == VM_MEMORY_REALLOC) || (entry->alias == alias)) &&
2007		   (entry->inheritance == inheritance) &&
2008		   (entry->protection == cur_protection) &&
2009		   (entry->max_protection == max_protection) &&
2010		   (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2011		   (entry->in_transition == 0) &&
2012		   (entry->no_cache == no_cache) &&
2013		   /*
2014		    * No coalescing if not map-aligned, to avoid propagating
2015		    * that condition any further than needed:
2016		    */
2017		   (!entry->map_aligned || !clear_map_aligned) &&
2018		   ((entry->vme_end - entry->vme_start) + size <=
2019		    (alias == VM_MEMORY_REALLOC ?
2020		     ANON_CHUNK_SIZE :
2021		     NO_COALESCE_LIMIT)) &&
2022		   (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2023		if (vm_object_coalesce(entry->object.vm_object,
2024				       VM_OBJECT_NULL,
2025				       entry->offset,
2026				       (vm_object_offset_t) 0,
2027				       (vm_map_size_t)(entry->vme_end - entry->vme_start),
2028				       (vm_map_size_t)(end - entry->vme_end))) {
2029
2030			/*
2031			 *	Coalesced the two objects - can extend
2032			 *	the previous map entry to include the
2033			 *	new range.
2034			 */
2035			map->size += (end - entry->vme_end);
2036			assert(entry->vme_start < end);
2037			assert(VM_MAP_PAGE_ALIGNED(end,
2038						   VM_MAP_PAGE_MASK(map)));
2039			entry->vme_end = end;
2040			vm_map_store_update_first_free(map, map->first_free);
2041			RETURN(KERN_SUCCESS);
2042		}
2043	}
2044
2045	step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2046	new_entry = NULL;
2047
2048	for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2049		tmp2_end = tmp2_start + step;
2050		/*
2051		 *	Create a new entry
2052		 *	LP64todo - for now, we can only allocate 4GB internal objects
2053		 *	because the default pager can't page bigger ones.  Remove this
2054		 *	when it can.
2055		 *
2056		 * XXX FBDP
2057		 * The reserved "page zero" in each process's address space can
2058		 * be arbitrarily large.  Splitting it into separate 4GB objects and
2059		 * therefore different VM map entries serves no purpose and just
2060		 * slows down operations on the VM map, so let's not split the
2061		 * allocation into 4GB chunks if the max protection is NONE.  That
2062		 * memory should never be accessible, so it will never get to the
2063		 * default pager.
2064		 */
2065		tmp_start = tmp2_start;
2066		if (object == VM_OBJECT_NULL &&
2067		    size > (vm_map_size_t)ANON_CHUNK_SIZE &&
2068		    max_protection != VM_PROT_NONE &&
2069		    superpage_size == 0)
2070			tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
2071		else
2072			tmp_end = tmp2_end;
2073		do {
2074			new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
2075							object,	offset, needs_copy,
2076							FALSE, FALSE,
2077							cur_protection, max_protection,
2078							VM_BEHAVIOR_DEFAULT,
2079							(entry_for_jit)? VM_INHERIT_NONE: inheritance,
2080							0, no_cache,
2081							permanent,
2082							superpage_size,
2083							clear_map_aligned);
2084			new_entry->alias = alias;
2085			if (entry_for_jit){
2086				if (!(map->jit_entry_exists)){
2087					new_entry->used_for_jit = TRUE;
2088					map->jit_entry_exists = TRUE;
2089				}
2090			}
2091
2092			if (is_submap) {
2093				vm_map_t	submap;
2094				boolean_t	submap_is_64bit;
2095				boolean_t	use_pmap;
2096
2097				new_entry->is_sub_map = TRUE;
2098				submap = (vm_map_t) object;
2099				submap_is_64bit = vm_map_is_64bit(submap);
2100				use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
2101	#ifndef NO_NESTED_PMAP
2102				if (use_pmap && submap->pmap == NULL) {
2103					ledger_t ledger = map->pmap->ledger;
2104					/* we need a sub pmap to nest... */
2105					submap->pmap = pmap_create(ledger, 0,
2106					    submap_is_64bit);
2107					if (submap->pmap == NULL) {
2108						/* let's proceed without nesting... */
2109					}
2110				}
2111				if (use_pmap && submap->pmap != NULL) {
2112					kr = pmap_nest(map->pmap,
2113						       submap->pmap,
2114						       tmp_start,
2115						       tmp_start,
2116						       tmp_end - tmp_start);
2117					if (kr != KERN_SUCCESS) {
2118						printf("vm_map_enter: "
2119						       "pmap_nest(0x%llx,0x%llx) "
2120						       "error 0x%x\n",
2121						       (long long)tmp_start,
2122						       (long long)tmp_end,
2123						       kr);
2124					} else {
2125						/* we're now nested ! */
2126						new_entry->use_pmap = TRUE;
2127						pmap_empty = FALSE;
2128					}
2129				}
2130	#endif /* NO_NESTED_PMAP */
2131			}
2132			entry = new_entry;
2133
2134			if (superpage_size) {
2135				vm_page_t pages, m;
2136				vm_object_t sp_object;
2137
2138				entry->offset = 0;
2139
2140				/* allocate one superpage */
2141				kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2142				if (kr != KERN_SUCCESS) {
2143					new_mapping_established = TRUE; /* will cause deallocation of whole range */
2144					RETURN(kr);
2145				}
2146
2147				/* create one vm_object per superpage */
2148				sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2149				sp_object->phys_contiguous = TRUE;
2150				sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
2151				entry->object.vm_object = sp_object;
2152
2153				/* enter the base pages into the object */
2154				vm_object_lock(sp_object);
2155				for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2156					m = pages;
2157					pmap_zero_page(m->phys_page);
2158					pages = NEXT_PAGE(m);
2159					*(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2160					vm_page_insert(m, sp_object, offset);
2161				}
2162				vm_object_unlock(sp_object);
2163			}
2164		} while (tmp_end != tmp2_end &&
2165			 (tmp_start = tmp_end) &&
2166			 (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2167			  tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2168	}
2169
2170	vm_map_unlock(map);
2171	map_locked = FALSE;
2172
2173	new_mapping_established = TRUE;
2174
2175	/*	Wire down the new entry if the user
2176	 *	requested all new map entries be wired.
2177	 */
2178	if ((map->wiring_required)||(superpage_size)) {
2179		pmap_empty = FALSE; /* pmap won't be empty */
2180		kr = vm_map_wire(map, start, end,
2181				     new_entry->protection, TRUE);
2182		RETURN(kr);
2183	}
2184
2185	if ((object != VM_OBJECT_NULL) &&
2186	    (vm_map_pmap_enter_enable) &&
2187	    (!anywhere)	 &&
2188	    (!needs_copy) &&
2189	    (size < (128*1024))) {
2190		pmap_empty = FALSE; /* pmap won't be empty */
2191
2192		if (override_nx(map, alias) && cur_protection)
2193		        cur_protection |= VM_PROT_EXECUTE;
2194
2195		vm_map_pmap_enter(map, start, end,
2196				  object, offset, cur_protection);
2197	}
2198
2199BailOut: ;
2200	if (result == KERN_SUCCESS) {
2201		vm_prot_t pager_prot;
2202		memory_object_t pager;
2203
2204		if (pmap_empty &&
2205		    !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2206			assert(vm_map_pmap_is_empty(map,
2207						    *address,
2208						    *address+size));
2209		}
2210
2211		/*
2212		 * For "named" VM objects, let the pager know that the
2213		 * memory object is being mapped.  Some pagers need to keep
2214		 * track of this, to know when they can reclaim the memory
2215		 * object, for example.
2216		 * VM calls memory_object_map() for each mapping (specifying
2217		 * the protection of each mapping) and calls
2218		 * memory_object_last_unmap() when all the mappings are gone.
2219		 */
2220		pager_prot = max_protection;
2221		if (needs_copy) {
2222			/*
2223			 * Copy-On-Write mapping: won't modify
2224			 * the memory object.
2225			 */
2226			pager_prot &= ~VM_PROT_WRITE;
2227		}
2228		if (!is_submap &&
2229		    object != VM_OBJECT_NULL &&
2230		    object->named &&
2231		    object->pager != MEMORY_OBJECT_NULL) {
2232			vm_object_lock(object);
2233			pager = object->pager;
2234			if (object->named &&
2235			    pager != MEMORY_OBJECT_NULL) {
2236				assert(object->pager_ready);
2237				vm_object_mapping_wait(object, THREAD_UNINT);
2238				vm_object_mapping_begin(object);
2239				vm_object_unlock(object);
2240
2241				kr = memory_object_map(pager, pager_prot);
2242				assert(kr == KERN_SUCCESS);
2243
2244				vm_object_lock(object);
2245				vm_object_mapping_end(object);
2246			}
2247			vm_object_unlock(object);
2248		}
2249	} else {
2250		if (new_mapping_established) {
2251			/*
2252			 * We have to get rid of the new mappings since we
2253			 * won't make them available to the user.
2254			 * Try and do that atomically, to minimize the risk
2255			 * that someone else create new mappings that range.
2256			 */
2257			zap_new_map = vm_map_create(PMAP_NULL,
2258						    *address,
2259						    *address + size,
2260						    map->hdr.entries_pageable);
2261			vm_map_set_page_shift(zap_new_map,
2262					      VM_MAP_PAGE_SHIFT(map));
2263			if (!map_locked) {
2264				vm_map_lock(map);
2265				map_locked = TRUE;
2266			}
2267			(void) vm_map_delete(map, *address, *address+size,
2268					     VM_MAP_REMOVE_SAVE_ENTRIES,
2269					     zap_new_map);
2270		}
2271		if (zap_old_map != VM_MAP_NULL &&
2272		    zap_old_map->hdr.nentries != 0) {
2273			vm_map_entry_t	entry1, entry2;
2274
2275			/*
2276			 * The new mapping failed.  Attempt to restore
2277			 * the old mappings, saved in the "zap_old_map".
2278			 */
2279			if (!map_locked) {
2280				vm_map_lock(map);
2281				map_locked = TRUE;
2282			}
2283
2284			/* first check if the coast is still clear */
2285			start = vm_map_first_entry(zap_old_map)->vme_start;
2286			end = vm_map_last_entry(zap_old_map)->vme_end;
2287			if (vm_map_lookup_entry(map, start, &entry1) ||
2288			    vm_map_lookup_entry(map, end, &entry2) ||
2289			    entry1 != entry2) {
2290				/*
2291				 * Part of that range has already been
2292				 * re-mapped:  we can't restore the old
2293				 * mappings...
2294				 */
2295				vm_map_enter_restore_failures++;
2296			} else {
2297				/*
2298				 * Transfer the saved map entries from
2299				 * "zap_old_map" to the original "map",
2300				 * inserting them all after "entry1".
2301				 */
2302				for (entry2 = vm_map_first_entry(zap_old_map);
2303				     entry2 != vm_map_to_entry(zap_old_map);
2304				     entry2 = vm_map_first_entry(zap_old_map)) {
2305					vm_map_size_t entry_size;
2306
2307					entry_size = (entry2->vme_end -
2308						      entry2->vme_start);
2309					vm_map_store_entry_unlink(zap_old_map,
2310							    entry2);
2311					zap_old_map->size -= entry_size;
2312					vm_map_store_entry_link(map, entry1, entry2);
2313					map->size += entry_size;
2314					entry1 = entry2;
2315				}
2316				if (map->wiring_required) {
2317					/*
2318					 * XXX TODO: we should rewire the
2319					 * old pages here...
2320					 */
2321				}
2322				vm_map_enter_restore_successes++;
2323			}
2324		}
2325	}
2326
2327	if (map_locked) {
2328		vm_map_unlock(map);
2329	}
2330
2331	/*
2332	 * Get rid of the "zap_maps" and all the map entries that
2333	 * they may still contain.
2334	 */
2335	if (zap_old_map != VM_MAP_NULL) {
2336		vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2337		zap_old_map = VM_MAP_NULL;
2338	}
2339	if (zap_new_map != VM_MAP_NULL) {
2340		vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2341		zap_new_map = VM_MAP_NULL;
2342	}
2343
2344	return result;
2345
2346#undef	RETURN
2347}
2348
2349kern_return_t
2350vm_map_enter_mem_object(
2351	vm_map_t		target_map,
2352	vm_map_offset_t		*address,
2353	vm_map_size_t		initial_size,
2354	vm_map_offset_t		mask,
2355	int			flags,
2356	ipc_port_t		port,
2357	vm_object_offset_t	offset,
2358	boolean_t		copy,
2359	vm_prot_t		cur_protection,
2360	vm_prot_t		max_protection,
2361	vm_inherit_t		inheritance)
2362{
2363	vm_map_address_t	map_addr;
2364	vm_map_size_t		map_size;
2365	vm_object_t		object;
2366	vm_object_size_t	size;
2367	kern_return_t		result;
2368	boolean_t		mask_cur_protection, mask_max_protection;
2369	vm_map_offset_t		offset_in_mapping;
2370
2371	mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2372	mask_max_protection = max_protection & VM_PROT_IS_MASK;
2373	cur_protection &= ~VM_PROT_IS_MASK;
2374	max_protection &= ~VM_PROT_IS_MASK;
2375
2376	/*
2377	 * Check arguments for validity
2378	 */
2379	if ((target_map == VM_MAP_NULL) ||
2380	    (cur_protection & ~VM_PROT_ALL) ||
2381	    (max_protection & ~VM_PROT_ALL) ||
2382	    (inheritance > VM_INHERIT_LAST_VALID) ||
2383	    initial_size == 0)
2384		return KERN_INVALID_ARGUMENT;
2385
2386	map_addr = vm_map_trunc_page(*address,
2387				     VM_MAP_PAGE_MASK(target_map));
2388	map_size = vm_map_round_page(initial_size,
2389				     VM_MAP_PAGE_MASK(target_map));
2390	size = vm_object_round_page(initial_size);
2391
2392	/*
2393	 * Find the vm object (if any) corresponding to this port.
2394	 */
2395	if (!IP_VALID(port)) {
2396		object = VM_OBJECT_NULL;
2397		offset = 0;
2398		copy = FALSE;
2399	} else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2400		vm_named_entry_t	named_entry;
2401
2402		named_entry = (vm_named_entry_t) port->ip_kobject;
2403
2404		if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2405			offset += named_entry->data_offset;
2406		}
2407
2408		/* a few checks to make sure user is obeying rules */
2409		if (size == 0) {
2410			if (offset >= named_entry->size)
2411				return KERN_INVALID_RIGHT;
2412			size = named_entry->size - offset;
2413		}
2414		if (mask_max_protection) {
2415			max_protection &= named_entry->protection;
2416		}
2417		if (mask_cur_protection) {
2418			cur_protection &= named_entry->protection;
2419		}
2420		if ((named_entry->protection & max_protection) !=
2421		    max_protection)
2422			return KERN_INVALID_RIGHT;
2423		if ((named_entry->protection & cur_protection) !=
2424		    cur_protection)
2425			return KERN_INVALID_RIGHT;
2426		if (offset + size < offset) {
2427			/* overflow */
2428			return KERN_INVALID_ARGUMENT;
2429		}
2430		if (named_entry->size < (offset + size))
2431			return KERN_INVALID_ARGUMENT;
2432
2433		if (named_entry->is_copy) {
2434			/* for a vm_map_copy, we can only map it whole */
2435			if ((size != named_entry->size) &&
2436			    (vm_map_round_page(size,
2437					       VM_MAP_PAGE_MASK(target_map)) ==
2438			     named_entry->size)) {
2439				/* XXX FBDP use the rounded size... */
2440				size = vm_map_round_page(
2441					size,
2442					VM_MAP_PAGE_MASK(target_map));
2443			}
2444
2445			if (offset != 0 ||
2446			    size != named_entry->size) {
2447				return KERN_INVALID_ARGUMENT;
2448			}
2449		}
2450
2451		/* the callers parameter offset is defined to be the */
2452		/* offset from beginning of named entry offset in object */
2453		offset = offset + named_entry->offset;
2454
2455		if (! VM_MAP_PAGE_ALIGNED(size,
2456					  VM_MAP_PAGE_MASK(target_map))) {
2457			/*
2458			 * Let's not map more than requested;
2459			 * vm_map_enter() will handle this "not map-aligned"
2460			 * case.
2461			 */
2462			map_size = size;
2463		}
2464
2465		named_entry_lock(named_entry);
2466		if (named_entry->is_sub_map) {
2467			vm_map_t		submap;
2468
2469			if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2470				panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
2471			}
2472
2473			submap = named_entry->backing.map;
2474			vm_map_lock(submap);
2475			vm_map_reference(submap);
2476			vm_map_unlock(submap);
2477			named_entry_unlock(named_entry);
2478
2479			result = vm_map_enter(target_map,
2480					      &map_addr,
2481					      map_size,
2482					      mask,
2483					      flags | VM_FLAGS_SUBMAP,
2484					      (vm_object_t) submap,
2485					      offset,
2486					      copy,
2487					      cur_protection,
2488					      max_protection,
2489					      inheritance);
2490			if (result != KERN_SUCCESS) {
2491				vm_map_deallocate(submap);
2492			} else {
2493				/*
2494				 * No need to lock "submap" just to check its
2495				 * "mapped" flag: that flag is never reset
2496				 * once it's been set and if we race, we'll
2497				 * just end up setting it twice, which is OK.
2498				 */
2499				if (submap->mapped_in_other_pmaps == FALSE &&
2500				    vm_map_pmap(submap) != PMAP_NULL &&
2501				    vm_map_pmap(submap) !=
2502				    vm_map_pmap(target_map)) {
2503					/*
2504					 * This submap is being mapped in a map
2505					 * that uses a different pmap.
2506					 * Set its "mapped_in_other_pmaps" flag
2507					 * to indicate that we now need to
2508					 * remove mappings from all pmaps rather
2509					 * than just the submap's pmap.
2510					 */
2511					vm_map_lock(submap);
2512					submap->mapped_in_other_pmaps = TRUE;
2513					vm_map_unlock(submap);
2514				}
2515				*address = map_addr;
2516			}
2517			return result;
2518
2519		} else if (named_entry->is_pager) {
2520			unsigned int	access;
2521			vm_prot_t	protections;
2522			unsigned int	wimg_mode;
2523
2524			protections = named_entry->protection & VM_PROT_ALL;
2525			access = GET_MAP_MEM(named_entry->protection);
2526
2527			if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2528				panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
2529			}
2530
2531			object = vm_object_enter(named_entry->backing.pager,
2532						 named_entry->size,
2533						 named_entry->internal,
2534						 FALSE,
2535						 FALSE);
2536			if (object == VM_OBJECT_NULL) {
2537				named_entry_unlock(named_entry);
2538				return KERN_INVALID_OBJECT;
2539			}
2540
2541			/* JMM - drop reference on pager here */
2542
2543			/* create an extra ref for the named entry */
2544			vm_object_lock(object);
2545			vm_object_reference_locked(object);
2546			named_entry->backing.object = object;
2547			named_entry->is_pager = FALSE;
2548			named_entry_unlock(named_entry);
2549
2550			wimg_mode = object->wimg_bits;
2551
2552			if (access == MAP_MEM_IO) {
2553				wimg_mode = VM_WIMG_IO;
2554			} else if (access == MAP_MEM_COPYBACK) {
2555				wimg_mode = VM_WIMG_USE_DEFAULT;
2556			} else if (access == MAP_MEM_INNERWBACK) {
2557				wimg_mode = VM_WIMG_INNERWBACK;
2558			} else if (access == MAP_MEM_WTHRU) {
2559				wimg_mode = VM_WIMG_WTHRU;
2560			} else if (access == MAP_MEM_WCOMB) {
2561				wimg_mode = VM_WIMG_WCOMB;
2562			}
2563
2564			/* wait for object (if any) to be ready */
2565			if (!named_entry->internal) {
2566				while (!object->pager_ready) {
2567					vm_object_wait(
2568						object,
2569						VM_OBJECT_EVENT_PAGER_READY,
2570						THREAD_UNINT);
2571					vm_object_lock(object);
2572				}
2573			}
2574
2575			if (object->wimg_bits != wimg_mode)
2576				vm_object_change_wimg_mode(object, wimg_mode);
2577
2578			object->true_share = TRUE;
2579
2580			if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2581				object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2582			vm_object_unlock(object);
2583
2584		} else if (named_entry->is_copy) {
2585			kern_return_t	kr;
2586			vm_map_copy_t	copy_map;
2587			vm_map_entry_t	copy_entry;
2588			vm_map_offset_t	copy_addr;
2589
2590			if (flags & ~(VM_FLAGS_FIXED |
2591				      VM_FLAGS_ANYWHERE |
2592				      VM_FLAGS_OVERWRITE |
2593				      VM_FLAGS_RETURN_DATA_ADDR)) {
2594				named_entry_unlock(named_entry);
2595				return KERN_INVALID_ARGUMENT;
2596			}
2597
2598			if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2599				offset_in_mapping = offset - vm_object_trunc_page(offset);
2600				offset = vm_object_trunc_page(offset);
2601				map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
2602			}
2603
2604			copy_map = named_entry->backing.copy;
2605			assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
2606			if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
2607				/* unsupported type; should not happen */
2608				printf("vm_map_enter_mem_object: "
2609				       "memory_entry->backing.copy "
2610				       "unsupported type 0x%x\n",
2611				       copy_map->type);
2612				named_entry_unlock(named_entry);
2613				return KERN_INVALID_ARGUMENT;
2614			}
2615
2616			/* reserve a contiguous range */
2617			kr = vm_map_enter(target_map,
2618					  &map_addr,
2619					  map_size,
2620					  mask,
2621					  flags & (VM_FLAGS_ANYWHERE |
2622						   VM_FLAGS_OVERWRITE |
2623						   VM_FLAGS_RETURN_DATA_ADDR),
2624					  VM_OBJECT_NULL,
2625					  0,
2626					  FALSE, /* copy */
2627					  cur_protection,
2628					  max_protection,
2629					  inheritance);
2630			if (kr != KERN_SUCCESS) {
2631				named_entry_unlock(named_entry);
2632				return kr;
2633			}
2634
2635			copy_addr = map_addr;
2636
2637			for (copy_entry = vm_map_copy_first_entry(copy_map);
2638			     copy_entry != vm_map_copy_to_entry(copy_map);
2639			     copy_entry = copy_entry->vme_next) {
2640				int			remap_flags = 0;
2641				vm_map_t		copy_submap;
2642				vm_object_t		copy_object;
2643				vm_map_size_t		copy_size;
2644				vm_object_offset_t	copy_offset;
2645
2646				copy_offset = copy_entry->offset;
2647				copy_size = (copy_entry->vme_end -
2648					     copy_entry->vme_start);
2649
2650				/* sanity check */
2651				if (copy_addr + copy_size >
2652				    map_addr + map_size) {
2653					/* over-mapping too much !? */
2654					kr = KERN_INVALID_ARGUMENT;
2655					/* abort */
2656					break;
2657				}
2658
2659				/* take a reference on the object */
2660				if (copy_entry->is_sub_map) {
2661					remap_flags |= VM_FLAGS_SUBMAP;
2662					copy_submap =
2663						copy_entry->object.sub_map;
2664					vm_map_lock(copy_submap);
2665					vm_map_reference(copy_submap);
2666					vm_map_unlock(copy_submap);
2667					copy_object = (vm_object_t) copy_submap;
2668				} else {
2669					copy_object =
2670						copy_entry->object.vm_object;
2671					vm_object_reference(copy_object);
2672				}
2673
2674				/* over-map the object into destination */
2675				remap_flags |= flags;
2676				remap_flags |= VM_FLAGS_FIXED;
2677				remap_flags |= VM_FLAGS_OVERWRITE;
2678				remap_flags &= ~VM_FLAGS_ANYWHERE;
2679				kr = vm_map_enter(target_map,
2680						  &copy_addr,
2681						  copy_size,
2682						  (vm_map_offset_t) 0,
2683						  remap_flags,
2684						  copy_object,
2685						  copy_offset,
2686						  copy,
2687						  cur_protection,
2688						  max_protection,
2689						  inheritance);
2690				if (kr != KERN_SUCCESS) {
2691					if (copy_entry->is_sub_map) {
2692						vm_map_deallocate(copy_submap);
2693					} else {
2694						vm_object_deallocate(copy_object);
2695					}
2696					/* abort */
2697					break;
2698				}
2699
2700				/* next mapping */
2701				copy_addr += copy_size;
2702			}
2703
2704			if (kr == KERN_SUCCESS) {
2705				if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2706					*address = map_addr + offset_in_mapping;
2707				} else {
2708					*address = map_addr;
2709				}
2710			}
2711			named_entry_unlock(named_entry);
2712
2713			if (kr != KERN_SUCCESS) {
2714				if (! (flags & VM_FLAGS_OVERWRITE)) {
2715					/* deallocate the contiguous range */
2716					(void) vm_deallocate(target_map,
2717							     map_addr,
2718							     map_size);
2719				}
2720			}
2721
2722			return kr;
2723
2724		} else {
2725			/* This is the case where we are going to map */
2726			/* an already mapped object.  If the object is */
2727			/* not ready it is internal.  An external     */
2728			/* object cannot be mapped until it is ready  */
2729			/* we can therefore avoid the ready check     */
2730			/* in this case.  */
2731			if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2732				offset_in_mapping = offset - vm_object_trunc_page(offset);
2733				offset = vm_object_trunc_page(offset);
2734				map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
2735			}
2736
2737			object = named_entry->backing.object;
2738			assert(object != VM_OBJECT_NULL);
2739			named_entry_unlock(named_entry);
2740			vm_object_reference(object);
2741		}
2742	} else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2743		/*
2744		 * JMM - This is temporary until we unify named entries
2745		 * and raw memory objects.
2746		 *
2747		 * Detected fake ip_kotype for a memory object.  In
2748		 * this case, the port isn't really a port at all, but
2749		 * instead is just a raw memory object.
2750		 */
2751		if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2752			panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
2753		}
2754
2755		object = vm_object_enter((memory_object_t)port,
2756					 size, FALSE, FALSE, FALSE);
2757		if (object == VM_OBJECT_NULL)
2758			return KERN_INVALID_OBJECT;
2759
2760		/* wait for object (if any) to be ready */
2761		if (object != VM_OBJECT_NULL) {
2762			if (object == kernel_object) {
2763				printf("Warning: Attempt to map kernel object"
2764					" by a non-private kernel entity\n");
2765				return KERN_INVALID_OBJECT;
2766			}
2767			if (!object->pager_ready) {
2768				vm_object_lock(object);
2769
2770				while (!object->pager_ready) {
2771					vm_object_wait(object,
2772						       VM_OBJECT_EVENT_PAGER_READY,
2773						       THREAD_UNINT);
2774					vm_object_lock(object);
2775				}
2776				vm_object_unlock(object);
2777			}
2778		}
2779	} else {
2780		return KERN_INVALID_OBJECT;
2781	}
2782
2783	if (object != VM_OBJECT_NULL &&
2784	    object->named &&
2785	    object->pager != MEMORY_OBJECT_NULL &&
2786	    object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2787		memory_object_t pager;
2788		vm_prot_t	pager_prot;
2789		kern_return_t	kr;
2790
2791		/*
2792		 * For "named" VM objects, let the pager know that the
2793		 * memory object is being mapped.  Some pagers need to keep
2794		 * track of this, to know when they can reclaim the memory
2795		 * object, for example.
2796		 * VM calls memory_object_map() for each mapping (specifying
2797		 * the protection of each mapping) and calls
2798		 * memory_object_last_unmap() when all the mappings are gone.
2799		 */
2800		pager_prot = max_protection;
2801		if (copy) {
2802			/*
2803			 * Copy-On-Write mapping: won't modify the
2804			 * memory object.
2805			 */
2806			pager_prot &= ~VM_PROT_WRITE;
2807		}
2808		vm_object_lock(object);
2809		pager = object->pager;
2810		if (object->named &&
2811		    pager != MEMORY_OBJECT_NULL &&
2812		    object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2813			assert(object->pager_ready);
2814			vm_object_mapping_wait(object, THREAD_UNINT);
2815			vm_object_mapping_begin(object);
2816			vm_object_unlock(object);
2817
2818			kr = memory_object_map(pager, pager_prot);
2819			assert(kr == KERN_SUCCESS);
2820
2821			vm_object_lock(object);
2822			vm_object_mapping_end(object);
2823		}
2824		vm_object_unlock(object);
2825	}
2826
2827	/*
2828	 *	Perform the copy if requested
2829	 */
2830
2831	if (copy) {
2832		vm_object_t		new_object;
2833		vm_object_offset_t	new_offset;
2834
2835		result = vm_object_copy_strategically(object, offset, size,
2836						      &new_object, &new_offset,
2837						      &copy);
2838
2839
2840		if (result == KERN_MEMORY_RESTART_COPY) {
2841			boolean_t success;
2842			boolean_t src_needs_copy;
2843
2844			/*
2845			 * XXX
2846			 * We currently ignore src_needs_copy.
2847			 * This really is the issue of how to make
2848			 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2849			 * non-kernel users to use. Solution forthcoming.
2850			 * In the meantime, since we don't allow non-kernel
2851			 * memory managers to specify symmetric copy,
2852			 * we won't run into problems here.
2853			 */
2854			new_object = object;
2855			new_offset = offset;
2856			success = vm_object_copy_quickly(&new_object,
2857							 new_offset, size,
2858							 &src_needs_copy,
2859							 &copy);
2860			assert(success);
2861			result = KERN_SUCCESS;
2862		}
2863		/*
2864		 *	Throw away the reference to the
2865		 *	original object, as it won't be mapped.
2866		 */
2867
2868		vm_object_deallocate(object);
2869
2870		if (result != KERN_SUCCESS)
2871			return result;
2872
2873		object = new_object;
2874		offset = new_offset;
2875	}
2876
2877	result = vm_map_enter(target_map,
2878			      &map_addr, map_size,
2879			      (vm_map_offset_t)mask,
2880			      flags,
2881			      object, offset,
2882			      copy,
2883			      cur_protection, max_protection, inheritance);
2884	if (result != KERN_SUCCESS)
2885		vm_object_deallocate(object);
2886
2887	if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2888		*address = map_addr + offset_in_mapping;
2889	} else {
2890		*address = map_addr;
2891	}
2892	return result;
2893}
2894
2895
2896
2897
2898kern_return_t
2899vm_map_enter_mem_object_control(
2900	vm_map_t		target_map,
2901	vm_map_offset_t		*address,
2902	vm_map_size_t		initial_size,
2903	vm_map_offset_t		mask,
2904	int			flags,
2905	memory_object_control_t	control,
2906	vm_object_offset_t	offset,
2907	boolean_t		copy,
2908	vm_prot_t		cur_protection,
2909	vm_prot_t		max_protection,
2910	vm_inherit_t		inheritance)
2911{
2912	vm_map_address_t	map_addr;
2913	vm_map_size_t		map_size;
2914	vm_object_t		object;
2915	vm_object_size_t	size;
2916	kern_return_t		result;
2917	memory_object_t		pager;
2918	vm_prot_t		pager_prot;
2919	kern_return_t		kr;
2920
2921	/*
2922	 * Check arguments for validity
2923	 */
2924	if ((target_map == VM_MAP_NULL) ||
2925	    (cur_protection & ~VM_PROT_ALL) ||
2926	    (max_protection & ~VM_PROT_ALL) ||
2927	    (inheritance > VM_INHERIT_LAST_VALID) ||
2928	    initial_size == 0)
2929		return KERN_INVALID_ARGUMENT;
2930
2931	map_addr = vm_map_trunc_page(*address,
2932				     VM_MAP_PAGE_MASK(target_map));
2933	map_size = vm_map_round_page(initial_size,
2934				     VM_MAP_PAGE_MASK(target_map));
2935	size = vm_object_round_page(initial_size);
2936
2937	object = memory_object_control_to_vm_object(control);
2938
2939	if (object == VM_OBJECT_NULL)
2940		return KERN_INVALID_OBJECT;
2941
2942	if (object == kernel_object) {
2943		printf("Warning: Attempt to map kernel object"
2944		       " by a non-private kernel entity\n");
2945		return KERN_INVALID_OBJECT;
2946	}
2947
2948	vm_object_lock(object);
2949	object->ref_count++;
2950	vm_object_res_reference(object);
2951
2952	/*
2953	 * For "named" VM objects, let the pager know that the
2954	 * memory object is being mapped.  Some pagers need to keep
2955	 * track of this, to know when they can reclaim the memory
2956	 * object, for example.
2957	 * VM calls memory_object_map() for each mapping (specifying
2958	 * the protection of each mapping) and calls
2959	 * memory_object_last_unmap() when all the mappings are gone.
2960	 */
2961	pager_prot = max_protection;
2962	if (copy) {
2963		pager_prot &= ~VM_PROT_WRITE;
2964	}
2965	pager = object->pager;
2966	if (object->named &&
2967	    pager != MEMORY_OBJECT_NULL &&
2968	    object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2969		assert(object->pager_ready);
2970		vm_object_mapping_wait(object, THREAD_UNINT);
2971		vm_object_mapping_begin(object);
2972		vm_object_unlock(object);
2973
2974		kr = memory_object_map(pager, pager_prot);
2975		assert(kr == KERN_SUCCESS);
2976
2977		vm_object_lock(object);
2978		vm_object_mapping_end(object);
2979	}
2980	vm_object_unlock(object);
2981
2982	/*
2983	 *	Perform the copy if requested
2984	 */
2985
2986	if (copy) {
2987		vm_object_t		new_object;
2988		vm_object_offset_t	new_offset;
2989
2990		result = vm_object_copy_strategically(object, offset, size,
2991						      &new_object, &new_offset,
2992						      &copy);
2993
2994
2995		if (result == KERN_MEMORY_RESTART_COPY) {
2996			boolean_t success;
2997			boolean_t src_needs_copy;
2998
2999			/*
3000			 * XXX
3001			 * We currently ignore src_needs_copy.
3002			 * This really is the issue of how to make
3003			 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3004			 * non-kernel users to use. Solution forthcoming.
3005			 * In the meantime, since we don't allow non-kernel
3006			 * memory managers to specify symmetric copy,
3007			 * we won't run into problems here.
3008			 */
3009			new_object = object;
3010			new_offset = offset;
3011			success = vm_object_copy_quickly(&new_object,
3012							 new_offset, size,
3013							 &src_needs_copy,
3014							 &copy);
3015			assert(success);
3016			result = KERN_SUCCESS;
3017		}
3018		/*
3019		 *	Throw away the reference to the
3020		 *	original object, as it won't be mapped.
3021		 */
3022
3023		vm_object_deallocate(object);
3024
3025		if (result != KERN_SUCCESS)
3026			return result;
3027
3028		object = new_object;
3029		offset = new_offset;
3030	}
3031
3032	result = vm_map_enter(target_map,
3033			      &map_addr, map_size,
3034			      (vm_map_offset_t)mask,
3035			      flags,
3036			      object, offset,
3037			      copy,
3038			      cur_protection, max_protection, inheritance);
3039	if (result != KERN_SUCCESS)
3040		vm_object_deallocate(object);
3041	*address = map_addr;
3042
3043	return result;
3044}
3045
3046
3047#if	VM_CPM
3048
3049#ifdef MACH_ASSERT
3050extern pmap_paddr_t	avail_start, avail_end;
3051#endif
3052
3053/*
3054 *	Allocate memory in the specified map, with the caveat that
3055 *	the memory is physically contiguous.  This call may fail
3056 *	if the system can't find sufficient contiguous memory.
3057 *	This call may cause or lead to heart-stopping amounts of
3058 *	paging activity.
3059 *
3060 *	Memory obtained from this call should be freed in the
3061 *	normal way, viz., via vm_deallocate.
3062 */
3063kern_return_t
3064vm_map_enter_cpm(
3065	vm_map_t		map,
3066	vm_map_offset_t	*addr,
3067	vm_map_size_t		size,
3068	int			flags)
3069{
3070	vm_object_t		cpm_obj;
3071	pmap_t			pmap;
3072	vm_page_t		m, pages;
3073	kern_return_t		kr;
3074	vm_map_offset_t		va, start, end, offset;
3075#if	MACH_ASSERT
3076	vm_map_offset_t		prev_addr = 0;
3077#endif	/* MACH_ASSERT */
3078
3079	boolean_t		anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
3080
3081	if (size == 0) {
3082		*addr = 0;
3083		return KERN_SUCCESS;
3084	}
3085	if (anywhere)
3086		*addr = vm_map_min(map);
3087	else
3088		*addr = vm_map_trunc_page(*addr,
3089					  VM_MAP_PAGE_MASK(map));
3090	size = vm_map_round_page(size,
3091				 VM_MAP_PAGE_MASK(map));
3092
3093	/*
3094	 * LP64todo - cpm_allocate should probably allow
3095	 * allocations of >4GB, but not with the current
3096	 * algorithm, so just cast down the size for now.
3097	 */
3098	if (size > VM_MAX_ADDRESS)
3099		return KERN_RESOURCE_SHORTAGE;
3100	if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
3101			       &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
3102		return kr;
3103
3104	cpm_obj = vm_object_allocate((vm_object_size_t)size);
3105	assert(cpm_obj != VM_OBJECT_NULL);
3106	assert(cpm_obj->internal);
3107	assert(cpm_obj->vo_size == (vm_object_size_t)size);
3108	assert(cpm_obj->can_persist == FALSE);
3109	assert(cpm_obj->pager_created == FALSE);
3110	assert(cpm_obj->pageout == FALSE);
3111	assert(cpm_obj->shadow == VM_OBJECT_NULL);
3112
3113	/*
3114	 *	Insert pages into object.
3115	 */
3116
3117	vm_object_lock(cpm_obj);
3118	for (offset = 0; offset < size; offset += PAGE_SIZE) {
3119		m = pages;
3120		pages = NEXT_PAGE(m);
3121		*(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3122
3123		assert(!m->gobbled);
3124		assert(!m->wanted);
3125		assert(!m->pageout);
3126		assert(!m->tabled);
3127		assert(VM_PAGE_WIRED(m));
3128		/*
3129		 * ENCRYPTED SWAP:
3130		 * "m" is not supposed to be pageable, so it
3131		 * should not be encrypted.  It wouldn't be safe
3132		 * to enter it in a new VM object while encrypted.
3133		 */
3134		ASSERT_PAGE_DECRYPTED(m);
3135		assert(m->busy);
3136		assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
3137
3138		m->busy = FALSE;
3139		vm_page_insert(m, cpm_obj, offset);
3140	}
3141	assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
3142	vm_object_unlock(cpm_obj);
3143
3144	/*
3145	 *	Hang onto a reference on the object in case a
3146	 *	multi-threaded application for some reason decides
3147	 *	to deallocate the portion of the address space into
3148	 *	which we will insert this object.
3149	 *
3150	 *	Unfortunately, we must insert the object now before
3151	 *	we can talk to the pmap module about which addresses
3152	 *	must be wired down.  Hence, the race with a multi-
3153	 *	threaded app.
3154	 */
3155	vm_object_reference(cpm_obj);
3156
3157	/*
3158	 *	Insert object into map.
3159	 */
3160
3161	kr = vm_map_enter(
3162		map,
3163		addr,
3164		size,
3165		(vm_map_offset_t)0,
3166		flags,
3167		cpm_obj,
3168		(vm_object_offset_t)0,
3169		FALSE,
3170		VM_PROT_ALL,
3171		VM_PROT_ALL,
3172		VM_INHERIT_DEFAULT);
3173
3174	if (kr != KERN_SUCCESS) {
3175		/*
3176		 *	A CPM object doesn't have can_persist set,
3177		 *	so all we have to do is deallocate it to
3178		 *	free up these pages.
3179		 */
3180		assert(cpm_obj->pager_created == FALSE);
3181		assert(cpm_obj->can_persist == FALSE);
3182		assert(cpm_obj->pageout == FALSE);
3183		assert(cpm_obj->shadow == VM_OBJECT_NULL);
3184		vm_object_deallocate(cpm_obj); /* kill acquired ref */
3185		vm_object_deallocate(cpm_obj); /* kill creation ref */
3186	}
3187
3188	/*
3189	 *	Inform the physical mapping system that the
3190	 *	range of addresses may not fault, so that
3191	 *	page tables and such can be locked down as well.
3192	 */
3193	start = *addr;
3194	end = start + size;
3195	pmap = vm_map_pmap(map);
3196	pmap_pageable(pmap, start, end, FALSE);
3197
3198	/*
3199	 *	Enter each page into the pmap, to avoid faults.
3200	 *	Note that this loop could be coded more efficiently,
3201	 *	if the need arose, rather than looking up each page
3202	 *	again.
3203	 */
3204	for (offset = 0, va = start; offset < size;
3205	     va += PAGE_SIZE, offset += PAGE_SIZE) {
3206	        int type_of_fault;
3207
3208		vm_object_lock(cpm_obj);
3209		m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3210		assert(m != VM_PAGE_NULL);
3211
3212		vm_page_zero_fill(m);
3213
3214		type_of_fault = DBG_ZERO_FILL_FAULT;
3215
3216		vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
3217			       VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL,
3218			       &type_of_fault);
3219
3220		vm_object_unlock(cpm_obj);
3221	}
3222
3223#if	MACH_ASSERT
3224	/*
3225	 *	Verify ordering in address space.
3226	 */
3227	for (offset = 0; offset < size; offset += PAGE_SIZE) {
3228		vm_object_lock(cpm_obj);
3229		m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3230		vm_object_unlock(cpm_obj);
3231		if (m == VM_PAGE_NULL)
3232			panic("vm_allocate_cpm:  obj %p off 0x%llx no page",
3233			      cpm_obj, (uint64_t)offset);
3234		assert(m->tabled);
3235		assert(!m->busy);
3236		assert(!m->wanted);
3237		assert(!m->fictitious);
3238		assert(!m->private);
3239		assert(!m->absent);
3240		assert(!m->error);
3241		assert(!m->cleaning);
3242		assert(!m->laundry);
3243		assert(!m->precious);
3244		assert(!m->clustered);
3245		if (offset != 0) {
3246			if (m->phys_page != prev_addr + 1) {
3247				printf("start 0x%llx end 0x%llx va 0x%llx\n",
3248				       (uint64_t)start, (uint64_t)end, (uint64_t)va);
3249				printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
3250				printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
3251				panic("vm_allocate_cpm:  pages not contig!");
3252			}
3253		}
3254		prev_addr = m->phys_page;
3255	}
3256#endif	/* MACH_ASSERT */
3257
3258	vm_object_deallocate(cpm_obj); /* kill extra ref */
3259
3260	return kr;
3261}
3262
3263
3264#else	/* VM_CPM */
3265
3266/*
3267 *	Interface is defined in all cases, but unless the kernel
3268 *	is built explicitly for this option, the interface does
3269 *	nothing.
3270 */
3271
3272kern_return_t
3273vm_map_enter_cpm(
3274	__unused vm_map_t	map,
3275	__unused vm_map_offset_t	*addr,
3276	__unused vm_map_size_t	size,
3277	__unused int		flags)
3278{
3279	return KERN_FAILURE;
3280}
3281#endif /* VM_CPM */
3282
3283/* Not used without nested pmaps */
3284#ifndef NO_NESTED_PMAP
3285/*
3286 * Clip and unnest a portion of a nested submap mapping.
3287 */
3288
3289
3290static void
3291vm_map_clip_unnest(
3292	vm_map_t	map,
3293	vm_map_entry_t	entry,
3294	vm_map_offset_t	start_unnest,
3295	vm_map_offset_t	end_unnest)
3296{
3297	vm_map_offset_t old_start_unnest = start_unnest;
3298	vm_map_offset_t old_end_unnest = end_unnest;
3299
3300	assert(entry->is_sub_map);
3301	assert(entry->object.sub_map != NULL);
3302
3303	/*
3304	 * Query the platform for the optimal unnest range.
3305	 * DRK: There's some duplication of effort here, since
3306	 * callers may have adjusted the range to some extent. This
3307	 * routine was introduced to support 1GiB subtree nesting
3308	 * for x86 platforms, which can also nest on 2MiB boundaries
3309	 * depending on size/alignment.
3310	 */
3311	if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
3312		log_unnest_badness(map, old_start_unnest, old_end_unnest);
3313	}
3314
3315	if (entry->vme_start > start_unnest ||
3316	    entry->vme_end < end_unnest) {
3317		panic("vm_map_clip_unnest(0x%llx,0x%llx): "
3318		      "bad nested entry: start=0x%llx end=0x%llx\n",
3319		      (long long)start_unnest, (long long)end_unnest,
3320		      (long long)entry->vme_start, (long long)entry->vme_end);
3321	}
3322
3323	if (start_unnest > entry->vme_start) {
3324		_vm_map_clip_start(&map->hdr,
3325				   entry,
3326				   start_unnest);
3327		vm_map_store_update_first_free(map, map->first_free);
3328	}
3329	if (entry->vme_end > end_unnest) {
3330		_vm_map_clip_end(&map->hdr,
3331				 entry,
3332				 end_unnest);
3333		vm_map_store_update_first_free(map, map->first_free);
3334	}
3335
3336	pmap_unnest(map->pmap,
3337		    entry->vme_start,
3338		    entry->vme_end - entry->vme_start);
3339	if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
3340		/* clean up parent map/maps */
3341		vm_map_submap_pmap_clean(
3342			map, entry->vme_start,
3343			entry->vme_end,
3344			entry->object.sub_map,
3345			entry->offset);
3346	}
3347	entry->use_pmap = FALSE;
3348	if (entry->alias == VM_MEMORY_SHARED_PMAP) {
3349		entry->alias = VM_MEMORY_UNSHARED_PMAP;
3350	}
3351}
3352#endif	/* NO_NESTED_PMAP */
3353
3354/*
3355 *	vm_map_clip_start:	[ internal use only ]
3356 *
3357 *	Asserts that the given entry begins at or after
3358 *	the specified address; if necessary,
3359 *	it splits the entry into two.
3360 */
3361void
3362vm_map_clip_start(
3363	vm_map_t	map,
3364	vm_map_entry_t	entry,
3365	vm_map_offset_t	startaddr)
3366{
3367#ifndef NO_NESTED_PMAP
3368	if (entry->use_pmap &&
3369	    startaddr >= entry->vme_start) {
3370		vm_map_offset_t	start_unnest, end_unnest;
3371
3372		/*
3373		 * Make sure "startaddr" is no longer in a nested range
3374		 * before we clip.  Unnest only the minimum range the platform
3375		 * can handle.
3376		 * vm_map_clip_unnest may perform additional adjustments to
3377		 * the unnest range.
3378		 */
3379		start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
3380		end_unnest = start_unnest + pmap_nesting_size_min;
3381		vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3382	}
3383#endif /* NO_NESTED_PMAP */
3384	if (startaddr > entry->vme_start) {
3385		if (entry->object.vm_object &&
3386		    !entry->is_sub_map &&
3387		    entry->object.vm_object->phys_contiguous) {
3388			pmap_remove(map->pmap,
3389				    (addr64_t)(entry->vme_start),
3390				    (addr64_t)(entry->vme_end));
3391		}
3392		_vm_map_clip_start(&map->hdr, entry, startaddr);
3393		vm_map_store_update_first_free(map, map->first_free);
3394	}
3395}
3396
3397
3398#define vm_map_copy_clip_start(copy, entry, startaddr) \
3399	MACRO_BEGIN \
3400	if ((startaddr) > (entry)->vme_start) \
3401		_vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
3402	MACRO_END
3403
3404/*
3405 *	This routine is called only when it is known that
3406 *	the entry must be split.
3407 */
3408static void
3409_vm_map_clip_start(
3410	register struct vm_map_header	*map_header,
3411	register vm_map_entry_t		entry,
3412	register vm_map_offset_t		start)
3413{
3414	register vm_map_entry_t	new_entry;
3415
3416	/*
3417	 *	Split off the front portion --
3418	 *	note that we must insert the new
3419	 *	entry BEFORE this one, so that
3420	 *	this entry has the specified starting
3421	 *	address.
3422	 */
3423
3424	new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
3425	vm_map_entry_copy_full(new_entry, entry);
3426
3427	assert(VM_MAP_PAGE_ALIGNED(start,
3428				   VM_MAP_HDR_PAGE_MASK(map_header)));
3429	new_entry->vme_end = start;
3430	assert(new_entry->vme_start < new_entry->vme_end);
3431	entry->offset += (start - entry->vme_start);
3432	assert(start < entry->vme_end);
3433	assert(VM_MAP_PAGE_ALIGNED(start,
3434				   VM_MAP_HDR_PAGE_MASK(map_header)));
3435	entry->vme_start = start;
3436
3437	_vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
3438
3439	if (entry->is_sub_map)
3440		vm_map_reference(new_entry->object.sub_map);
3441	else
3442		vm_object_reference(new_entry->object.vm_object);
3443}
3444
3445
3446/*
3447 *	vm_map_clip_end:	[ internal use only ]
3448 *
3449 *	Asserts that the given entry ends at or before
3450 *	the specified address; if necessary,
3451 *	it splits the entry into two.
3452 */
3453void
3454vm_map_clip_end(
3455	vm_map_t	map,
3456	vm_map_entry_t	entry,
3457	vm_map_offset_t	endaddr)
3458{
3459	if (endaddr > entry->vme_end) {
3460		/*
3461		 * Within the scope of this clipping, limit "endaddr" to
3462		 * the end of this map entry...
3463		 */
3464		endaddr = entry->vme_end;
3465	}
3466#ifndef NO_NESTED_PMAP
3467	if (entry->use_pmap) {
3468		vm_map_offset_t	start_unnest, end_unnest;
3469
3470		/*
3471		 * Make sure the range between the start of this entry and
3472		 * the new "endaddr" is no longer nested before we clip.
3473		 * Unnest only the minimum range the platform can handle.
3474		 * vm_map_clip_unnest may perform additional adjustments to
3475		 * the unnest range.
3476		 */
3477		start_unnest = entry->vme_start;
3478		end_unnest =
3479			(endaddr + pmap_nesting_size_min - 1) &
3480			~(pmap_nesting_size_min - 1);
3481		vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3482	}
3483#endif /* NO_NESTED_PMAP */
3484	if (endaddr < entry->vme_end) {
3485		if (entry->object.vm_object &&
3486		    !entry->is_sub_map &&
3487		    entry->object.vm_object->phys_contiguous) {
3488			pmap_remove(map->pmap,
3489				    (addr64_t)(entry->vme_start),
3490				    (addr64_t)(entry->vme_end));
3491		}
3492		_vm_map_clip_end(&map->hdr, entry, endaddr);
3493		vm_map_store_update_first_free(map, map->first_free);
3494	}
3495}
3496
3497
3498#define vm_map_copy_clip_end(copy, entry, endaddr) \
3499	MACRO_BEGIN \
3500	if ((endaddr) < (entry)->vme_end) \
3501		_vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
3502	MACRO_END
3503
3504/*
3505 *	This routine is called only when it is known that
3506 *	the entry must be split.
3507 */
3508static void
3509_vm_map_clip_end(
3510	register struct vm_map_header	*map_header,
3511	register vm_map_entry_t		entry,
3512	register vm_map_offset_t	end)
3513{
3514	register vm_map_entry_t	new_entry;
3515
3516	/*
3517	 *	Create a new entry and insert it
3518	 *	AFTER the specified entry
3519	 */
3520
3521	new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
3522	vm_map_entry_copy_full(new_entry, entry);
3523
3524	assert(entry->vme_start < end);
3525	assert(VM_MAP_PAGE_ALIGNED(end,
3526				   VM_MAP_HDR_PAGE_MASK(map_header)));
3527	new_entry->vme_start = entry->vme_end = end;
3528	new_entry->offset += (end - entry->vme_start);
3529	assert(new_entry->vme_start < new_entry->vme_end);
3530
3531	_vm_map_store_entry_link(map_header, entry, new_entry);
3532
3533	if (entry->is_sub_map)
3534		vm_map_reference(new_entry->object.sub_map);
3535	else
3536		vm_object_reference(new_entry->object.vm_object);
3537}
3538
3539
3540/*
3541 *	VM_MAP_RANGE_CHECK:	[ internal use only ]
3542 *
3543 *	Asserts that the starting and ending region
3544 *	addresses fall within the valid range of the map.
3545 */
3546#define	VM_MAP_RANGE_CHECK(map, start, end)	\
3547	MACRO_BEGIN				\
3548	if (start < vm_map_min(map))		\
3549		start = vm_map_min(map);	\
3550	if (end > vm_map_max(map))		\
3551		end = vm_map_max(map);		\
3552	if (start > end)			\
3553		start = end;			\
3554	MACRO_END
3555
3556/*
3557 *	vm_map_range_check:	[ internal use only ]
3558 *
3559 *	Check that the region defined by the specified start and
3560 *	end addresses are wholly contained within a single map
3561 *	entry or set of adjacent map entries of the spacified map,
3562 *	i.e. the specified region contains no unmapped space.
3563 *	If any or all of the region is unmapped, FALSE is returned.
3564 *	Otherwise, TRUE is returned and if the output argument 'entry'
3565 *	is not NULL it points to the map entry containing the start
3566 *	of the region.
3567 *
3568 *	The map is locked for reading on entry and is left locked.
3569 */
3570static boolean_t
3571vm_map_range_check(
3572	register vm_map_t	map,
3573	register vm_map_offset_t	start,
3574	register vm_map_offset_t	end,
3575	vm_map_entry_t		*entry)
3576{
3577	vm_map_entry_t		cur;
3578	register vm_map_offset_t	prev;
3579
3580	/*
3581	 * 	Basic sanity checks first
3582	 */
3583	if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
3584		return (FALSE);
3585
3586	/*
3587	 * 	Check first if the region starts within a valid
3588	 *	mapping for the map.
3589	 */
3590	if (!vm_map_lookup_entry(map, start, &cur))
3591		return (FALSE);
3592
3593	/*
3594	 *	Optimize for the case that the region is contained
3595	 *	in a single map entry.
3596	 */
3597	if (entry != (vm_map_entry_t *) NULL)
3598		*entry = cur;
3599	if (end <= cur->vme_end)
3600		return (TRUE);
3601
3602	/*
3603	 * 	If the region is not wholly contained within a
3604	 * 	single entry, walk the entries looking for holes.
3605	 */
3606	prev = cur->vme_end;
3607	cur = cur->vme_next;
3608	while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
3609		if (end <= cur->vme_end)
3610			return (TRUE);
3611		prev = cur->vme_end;
3612		cur = cur->vme_next;
3613	}
3614	return (FALSE);
3615}
3616
3617/*
3618 *	vm_map_submap:		[ kernel use only ]
3619 *
3620 *	Mark the given range as handled by a subordinate map.
3621 *
3622 *	This range must have been created with vm_map_find using
3623 *	the vm_submap_object, and no other operations may have been
3624 *	performed on this range prior to calling vm_map_submap.
3625 *
3626 *	Only a limited number of operations can be performed
3627 *	within this rage after calling vm_map_submap:
3628 *		vm_fault
3629 *	[Don't try vm_map_copyin!]
3630 *
3631 *	To remove a submapping, one must first remove the
3632 *	range from the superior map, and then destroy the
3633 *	submap (if desired).  [Better yet, don't try it.]
3634 */
3635kern_return_t
3636vm_map_submap(
3637	vm_map_t		map,
3638	vm_map_offset_t	start,
3639	vm_map_offset_t	end,
3640	vm_map_t		submap,
3641	vm_map_offset_t	offset,
3642#ifdef NO_NESTED_PMAP
3643	__unused
3644#endif	/* NO_NESTED_PMAP */
3645	boolean_t		use_pmap)
3646{
3647	vm_map_entry_t		entry;
3648	register kern_return_t	result = KERN_INVALID_ARGUMENT;
3649	register vm_object_t	object;
3650
3651	vm_map_lock(map);
3652
3653	if (! vm_map_lookup_entry(map, start, &entry)) {
3654		entry = entry->vme_next;
3655	}
3656
3657	if (entry == vm_map_to_entry(map) ||
3658	    entry->is_sub_map) {
3659		vm_map_unlock(map);
3660		return KERN_INVALID_ARGUMENT;
3661	}
3662
3663	assert(!entry->use_pmap); /* we don't want to unnest anything here */
3664	vm_map_clip_start(map, entry, start);
3665	vm_map_clip_end(map, entry, end);
3666
3667	if ((entry->vme_start == start) && (entry->vme_end == end) &&
3668	    (!entry->is_sub_map) &&
3669	    ((object = entry->object.vm_object) == vm_submap_object) &&
3670	    (object->resident_page_count == 0) &&
3671	    (object->copy == VM_OBJECT_NULL) &&
3672	    (object->shadow == VM_OBJECT_NULL) &&
3673	    (!object->pager_created)) {
3674		entry->offset = (vm_object_offset_t)offset;
3675		entry->object.vm_object = VM_OBJECT_NULL;
3676		vm_object_deallocate(object);
3677		entry->is_sub_map = TRUE;
3678		entry->object.sub_map = submap;
3679		vm_map_reference(submap);
3680		if (submap->mapped_in_other_pmaps == FALSE &&
3681		    vm_map_pmap(submap) != PMAP_NULL &&
3682		    vm_map_pmap(submap) != vm_map_pmap(map)) {
3683			/*
3684			 * This submap is being mapped in a map
3685			 * that uses a different pmap.
3686			 * Set its "mapped_in_other_pmaps" flag
3687			 * to indicate that we now need to
3688			 * remove mappings from all pmaps rather
3689			 * than just the submap's pmap.
3690			 */
3691			submap->mapped_in_other_pmaps = TRUE;
3692		}
3693
3694#ifndef NO_NESTED_PMAP
3695		if (use_pmap) {
3696			/* nest if platform code will allow */
3697			if(submap->pmap == NULL) {
3698				ledger_t ledger = map->pmap->ledger;
3699				submap->pmap = pmap_create(ledger,
3700						(vm_map_size_t) 0, FALSE);
3701				if(submap->pmap == PMAP_NULL) {
3702					vm_map_unlock(map);
3703					return(KERN_NO_SPACE);
3704				}
3705			}
3706			result = pmap_nest(map->pmap,
3707					   (entry->object.sub_map)->pmap,
3708					   (addr64_t)start,
3709					   (addr64_t)start,
3710					   (uint64_t)(end - start));
3711			if(result)
3712				panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
3713			entry->use_pmap = TRUE;
3714		}
3715#else	/* NO_NESTED_PMAP */
3716		pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
3717#endif	/* NO_NESTED_PMAP */
3718		result = KERN_SUCCESS;
3719	}
3720	vm_map_unlock(map);
3721
3722	return(result);
3723}
3724
3725/*
3726 *	vm_map_protect:
3727 *
3728 *	Sets the protection of the specified address
3729 *	region in the target map.  If "set_max" is
3730 *	specified, the maximum protection is to be set;
3731 *	otherwise, only the current protection is affected.
3732 */
3733kern_return_t
3734vm_map_protect(
3735	register vm_map_t	map,
3736	register vm_map_offset_t	start,
3737	register vm_map_offset_t	end,
3738	register vm_prot_t	new_prot,
3739	register boolean_t	set_max)
3740{
3741	register vm_map_entry_t		current;
3742	register vm_map_offset_t	prev;
3743	vm_map_entry_t			entry;
3744	vm_prot_t			new_max;
3745
3746	XPR(XPR_VM_MAP,
3747	    "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
3748	    map, start, end, new_prot, set_max);
3749
3750	vm_map_lock(map);
3751
3752	/* LP64todo - remove this check when vm_map_commpage64()
3753	 * no longer has to stuff in a map_entry for the commpage
3754	 * above the map's max_offset.
3755	 */
3756	if (start >= map->max_offset) {
3757		vm_map_unlock(map);
3758		return(KERN_INVALID_ADDRESS);
3759	}
3760
3761	while(1) {
3762		/*
3763		 * 	Lookup the entry.  If it doesn't start in a valid
3764		 *	entry, return an error.
3765		 */
3766		if (! vm_map_lookup_entry(map, start, &entry)) {
3767			vm_map_unlock(map);
3768			return(KERN_INVALID_ADDRESS);
3769		}
3770
3771		if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
3772			start = SUPERPAGE_ROUND_DOWN(start);
3773			continue;
3774		}
3775		break;
3776 	}
3777	if (entry->superpage_size)
3778 		end = SUPERPAGE_ROUND_UP(end);
3779
3780	/*
3781	 *	Make a first pass to check for protection and address
3782	 *	violations.
3783	 */
3784
3785	current = entry;
3786	prev = current->vme_start;
3787	while ((current != vm_map_to_entry(map)) &&
3788	       (current->vme_start < end)) {
3789
3790		/*
3791		 * If there is a hole, return an error.
3792		 */
3793		if (current->vme_start != prev) {
3794			vm_map_unlock(map);
3795			return(KERN_INVALID_ADDRESS);
3796		}
3797
3798		new_max = current->max_protection;
3799		if(new_prot & VM_PROT_COPY) {
3800			new_max |= VM_PROT_WRITE;
3801			if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
3802				vm_map_unlock(map);
3803				return(KERN_PROTECTION_FAILURE);
3804			}
3805		} else {
3806			if ((new_prot & new_max) != new_prot) {
3807				vm_map_unlock(map);
3808				return(KERN_PROTECTION_FAILURE);
3809			}
3810		}
3811
3812
3813		prev = current->vme_end;
3814		current = current->vme_next;
3815	}
3816	if (end > prev) {
3817		vm_map_unlock(map);
3818		return(KERN_INVALID_ADDRESS);
3819	}
3820
3821	/*
3822	 *	Go back and fix up protections.
3823	 *	Clip to start here if the range starts within
3824	 *	the entry.
3825	 */
3826
3827	current = entry;
3828	if (current != vm_map_to_entry(map)) {
3829		/* clip and unnest if necessary */
3830		vm_map_clip_start(map, current, start);
3831	}
3832
3833	while ((current != vm_map_to_entry(map)) &&
3834	       (current->vme_start < end)) {
3835
3836		vm_prot_t	old_prot;
3837
3838		vm_map_clip_end(map, current, end);
3839
3840		assert(!current->use_pmap); /* clipping did unnest if needed */
3841
3842		old_prot = current->protection;
3843
3844		if(new_prot & VM_PROT_COPY) {
3845			/* caller is asking specifically to copy the      */
3846			/* mapped data, this implies that max protection  */
3847			/* will include write.  Caller must be prepared   */
3848			/* for loss of shared memory communication in the */
3849			/* target area after taking this step */
3850
3851			if (current->is_sub_map == FALSE && current->object.vm_object == VM_OBJECT_NULL){
3852				current->object.vm_object = vm_object_allocate((vm_map_size_t)(current->vme_end - current->vme_start));
3853				current->offset = 0;
3854			}
3855			current->needs_copy = TRUE;
3856			current->max_protection |= VM_PROT_WRITE;
3857		}
3858
3859		if (set_max)
3860			current->protection =
3861				(current->max_protection =
3862				 new_prot & ~VM_PROT_COPY) &
3863				old_prot;
3864		else
3865			current->protection = new_prot & ~VM_PROT_COPY;
3866
3867		/*
3868		 *	Update physical map if necessary.
3869		 *	If the request is to turn off write protection,
3870		 *	we won't do it for real (in pmap). This is because
3871		 *	it would cause copy-on-write to fail.  We've already
3872		 *	set, the new protection in the map, so if a
3873		 *	write-protect fault occurred, it will be fixed up
3874		 *	properly, COW or not.
3875		 */
3876		if (current->protection != old_prot) {
3877			/* Look one level in we support nested pmaps */
3878			/* from mapped submaps which are direct entries */
3879			/* in our map */
3880
3881			vm_prot_t prot;
3882
3883			prot = current->protection & ~VM_PROT_WRITE;
3884
3885			if (override_nx(map, current->alias) && prot)
3886			        prot |= VM_PROT_EXECUTE;
3887
3888			if (current->is_sub_map && current->use_pmap) {
3889				pmap_protect(current->object.sub_map->pmap,
3890					     current->vme_start,
3891					     current->vme_end,
3892					     prot);
3893			} else {
3894				pmap_protect(map->pmap,
3895					     current->vme_start,
3896					     current->vme_end,
3897					     prot);
3898			}
3899		}
3900		current = current->vme_next;
3901	}
3902
3903	current = entry;
3904	while ((current != vm_map_to_entry(map)) &&
3905	       (current->vme_start <= end)) {
3906		vm_map_simplify_entry(map, current);
3907		current = current->vme_next;
3908	}
3909
3910	vm_map_unlock(map);
3911	return(KERN_SUCCESS);
3912}
3913
3914/*
3915 *	vm_map_inherit:
3916 *
3917 *	Sets the inheritance of the specified address
3918 *	range in the target map.  Inheritance
3919 *	affects how the map will be shared with
3920 *	child maps at the time of vm_map_fork.
3921 */
3922kern_return_t
3923vm_map_inherit(
3924	register vm_map_t	map,
3925	register vm_map_offset_t	start,
3926	register vm_map_offset_t	end,
3927	register vm_inherit_t	new_inheritance)
3928{
3929	register vm_map_entry_t	entry;
3930	vm_map_entry_t	temp_entry;
3931
3932	vm_map_lock(map);
3933
3934	VM_MAP_RANGE_CHECK(map, start, end);
3935
3936	if (vm_map_lookup_entry(map, start, &temp_entry)) {
3937		entry = temp_entry;
3938	}
3939	else {
3940		temp_entry = temp_entry->vme_next;
3941		entry = temp_entry;
3942	}
3943
3944	/* first check entire range for submaps which can't support the */
3945	/* given inheritance. */
3946	while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3947		if(entry->is_sub_map) {
3948			if(new_inheritance == VM_INHERIT_COPY) {
3949				vm_map_unlock(map);
3950				return(KERN_INVALID_ARGUMENT);
3951			}
3952		}
3953
3954		entry = entry->vme_next;
3955	}
3956
3957	entry = temp_entry;
3958	if (entry != vm_map_to_entry(map)) {
3959		/* clip and unnest if necessary */
3960		vm_map_clip_start(map, entry, start);
3961	}
3962
3963	while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3964		vm_map_clip_end(map, entry, end);
3965		assert(!entry->use_pmap); /* clip did unnest if needed */
3966
3967		entry->inheritance = new_inheritance;
3968
3969		entry = entry->vme_next;
3970	}
3971
3972	vm_map_unlock(map);
3973	return(KERN_SUCCESS);
3974}
3975
3976/*
3977 * Update the accounting for the amount of wired memory in this map.  If the user has
3978 * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
3979 */
3980
3981static kern_return_t
3982add_wire_counts(
3983	vm_map_t	map,
3984	vm_map_entry_t	entry,
3985	boolean_t	user_wire)
3986{
3987	vm_map_size_t	size;
3988
3989	if (user_wire) {
3990		unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
3991
3992		/*
3993		 * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
3994		 * this map entry.
3995		 */
3996
3997		if (entry->user_wired_count == 0) {
3998			size = entry->vme_end - entry->vme_start;
3999
4000			/*
4001			 * Since this is the first time the user is wiring this map entry, check to see if we're
4002			 * exceeding the user wire limits.  There is a per map limit which is the smaller of either
4003			 * the process's rlimit or the global vm_user_wire_limit which caps this value.  There is also
4004			 * a system-wide limit on the amount of memory all users can wire.  If the user is over either
4005			 * limit, then we fail.
4006			 */
4007
4008			if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
4009			   size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
4010		    	   size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
4011				return KERN_RESOURCE_SHORTAGE;
4012
4013			/*
4014			 * The first time the user wires an entry, we also increment the wired_count and add this to
4015			 * the total that has been wired in the map.
4016			 */
4017
4018			if (entry->wired_count >= MAX_WIRE_COUNT)
4019				return KERN_FAILURE;
4020
4021			entry->wired_count++;
4022			map->user_wire_size += size;
4023		}
4024
4025		if (entry->user_wired_count >= MAX_WIRE_COUNT)
4026			return KERN_FAILURE;
4027
4028		entry->user_wired_count++;
4029
4030	} else {
4031
4032		/*
4033		 * The kernel's wiring the memory.  Just bump the count and continue.
4034		 */
4035
4036		if (entry->wired_count >= MAX_WIRE_COUNT)
4037			panic("vm_map_wire: too many wirings");
4038
4039		entry->wired_count++;
4040	}
4041
4042	return KERN_SUCCESS;
4043}
4044
4045/*
4046 * Update the memory wiring accounting now that the given map entry is being unwired.
4047 */
4048
4049static void
4050subtract_wire_counts(
4051	vm_map_t	map,
4052	vm_map_entry_t	entry,
4053	boolean_t	user_wire)
4054{
4055
4056	if (user_wire) {
4057
4058		/*
4059		 * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
4060		 */
4061
4062		if (entry->user_wired_count == 1) {
4063
4064			/*
4065			 * We're removing the last user wire reference.  Decrement the wired_count and the total
4066			 * user wired memory for this map.
4067			 */
4068
4069			assert(entry->wired_count >= 1);
4070			entry->wired_count--;
4071			map->user_wire_size -= entry->vme_end - entry->vme_start;
4072		}
4073
4074		assert(entry->user_wired_count >= 1);
4075		entry->user_wired_count--;
4076
4077	} else {
4078
4079		/*
4080		 * The kernel is unwiring the memory.   Just update the count.
4081		 */
4082
4083		assert(entry->wired_count >= 1);
4084		entry->wired_count--;
4085	}
4086}
4087
4088/*
4089 *	vm_map_wire:
4090 *
4091 *	Sets the pageability of the specified address range in the
4092 *	target map as wired.  Regions specified as not pageable require
4093 *	locked-down physical memory and physical page maps.  The
4094 *	access_type variable indicates types of accesses that must not
4095 *	generate page faults.  This is checked against protection of
4096 *	memory being locked-down.
4097 *
4098 *	The map must not be locked, but a reference must remain to the
4099 *	map throughout the call.
4100 */
4101static kern_return_t
4102vm_map_wire_nested(
4103	register vm_map_t	map,
4104	register vm_map_offset_t	start,
4105	register vm_map_offset_t	end,
4106	register vm_prot_t	access_type,
4107	boolean_t		user_wire,
4108	pmap_t			map_pmap,
4109	vm_map_offset_t		pmap_addr)
4110{
4111	register vm_map_entry_t	entry;
4112	struct vm_map_entry	*first_entry, tmp_entry;
4113	vm_map_t		real_map;
4114	register vm_map_offset_t	s,e;
4115	kern_return_t		rc;
4116	boolean_t		need_wakeup;
4117	boolean_t		main_map = FALSE;
4118	wait_interrupt_t	interruptible_state;
4119	thread_t		cur_thread;
4120	unsigned int		last_timestamp;
4121	vm_map_size_t		size;
4122
4123	vm_map_lock(map);
4124	if(map_pmap == NULL)
4125		main_map = TRUE;
4126	last_timestamp = map->timestamp;
4127
4128	VM_MAP_RANGE_CHECK(map, start, end);
4129	assert(page_aligned(start));
4130	assert(page_aligned(end));
4131	assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
4132	assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
4133	if (start == end) {
4134		/* We wired what the caller asked for, zero pages */
4135		vm_map_unlock(map);
4136		return KERN_SUCCESS;
4137	}
4138
4139	need_wakeup = FALSE;
4140	cur_thread = current_thread();
4141
4142	s = start;
4143	rc = KERN_SUCCESS;
4144
4145	if (vm_map_lookup_entry(map, s, &first_entry)) {
4146		entry = first_entry;
4147		/*
4148		 * vm_map_clip_start will be done later.
4149		 * We don't want to unnest any nested submaps here !
4150		 */
4151	} else {
4152		/* Start address is not in map */
4153		rc = KERN_INVALID_ADDRESS;
4154		goto done;
4155	}
4156
4157	while ((entry != vm_map_to_entry(map)) && (s < end)) {
4158		/*
4159		 * At this point, we have wired from "start" to "s".
4160		 * We still need to wire from "s" to "end".
4161		 *
4162		 * "entry" hasn't been clipped, so it could start before "s"
4163		 * and/or end after "end".
4164		 */
4165
4166		/* "e" is how far we want to wire in this entry */
4167		e = entry->vme_end;
4168		if (e > end)
4169			e = end;
4170
4171		/*
4172		 * If another thread is wiring/unwiring this entry then
4173		 * block after informing other thread to wake us up.
4174		 */
4175		if (entry->in_transition) {
4176			wait_result_t wait_result;
4177
4178			/*
4179			 * We have not clipped the entry.  Make sure that
4180			 * the start address is in range so that the lookup
4181			 * below will succeed.
4182			 * "s" is the current starting point: we've already
4183			 * wired from "start" to "s" and we still have
4184			 * to wire from "s" to "end".
4185			 */
4186
4187			entry->needs_wakeup = TRUE;
4188
4189			/*
4190			 * wake up anybody waiting on entries that we have
4191			 * already wired.
4192			 */
4193			if (need_wakeup) {
4194				vm_map_entry_wakeup(map);
4195				need_wakeup = FALSE;
4196			}
4197			/*
4198			 * User wiring is interruptible
4199			 */
4200			wait_result = vm_map_entry_wait(map,
4201							(user_wire) ? THREAD_ABORTSAFE :
4202							THREAD_UNINT);
4203			if (user_wire && wait_result ==	THREAD_INTERRUPTED) {
4204				/*
4205				 * undo the wirings we have done so far
4206				 * We do not clear the needs_wakeup flag,
4207				 * because we cannot tell if we were the
4208				 * only one waiting.
4209				 */
4210				rc = KERN_FAILURE;
4211				goto done;
4212			}
4213
4214			/*
4215			 * Cannot avoid a lookup here. reset timestamp.
4216			 */
4217			last_timestamp = map->timestamp;
4218
4219			/*
4220			 * The entry could have been clipped, look it up again.
4221			 * Worse that can happen is, it may not exist anymore.
4222			 */
4223			if (!vm_map_lookup_entry(map, s, &first_entry)) {
4224				/*
4225				 * User: undo everything upto the previous
4226				 * entry.  let vm_map_unwire worry about
4227				 * checking the validity of the range.
4228				 */
4229				rc = KERN_FAILURE;
4230				goto done;
4231			}
4232			entry = first_entry;
4233			continue;
4234		}
4235
4236		if (entry->is_sub_map) {
4237			vm_map_offset_t	sub_start;
4238			vm_map_offset_t	sub_end;
4239			vm_map_offset_t	local_start;
4240			vm_map_offset_t	local_end;
4241			pmap_t		pmap;
4242
4243			vm_map_clip_start(map, entry, s);
4244			vm_map_clip_end(map, entry, end);
4245
4246			sub_start = entry->offset;
4247			sub_end = entry->vme_end;
4248			sub_end += entry->offset - entry->vme_start;
4249
4250			local_end = entry->vme_end;
4251			if(map_pmap == NULL) {
4252				vm_object_t		object;
4253				vm_object_offset_t	offset;
4254				vm_prot_t		prot;
4255				boolean_t		wired;
4256				vm_map_entry_t		local_entry;
4257				vm_map_version_t	 version;
4258				vm_map_t		lookup_map;
4259
4260				if(entry->use_pmap) {
4261					pmap = entry->object.sub_map->pmap;
4262					/* ppc implementation requires that */
4263					/* submaps pmap address ranges line */
4264					/* up with parent map */
4265#ifdef notdef
4266					pmap_addr = sub_start;
4267#endif
4268					pmap_addr = s;
4269				} else {
4270					pmap = map->pmap;
4271					pmap_addr = s;
4272				}
4273
4274				if (entry->wired_count) {
4275					if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4276						goto done;
4277
4278					/*
4279					 * The map was not unlocked:
4280					 * no need to goto re-lookup.
4281					 * Just go directly to next entry.
4282					 */
4283					entry = entry->vme_next;
4284					s = entry->vme_start;
4285					continue;
4286
4287				}
4288
4289				/* call vm_map_lookup_locked to */
4290				/* cause any needs copy to be   */
4291				/* evaluated */
4292				local_start = entry->vme_start;
4293				lookup_map = map;
4294				vm_map_lock_write_to_read(map);
4295				if(vm_map_lookup_locked(
4296					   &lookup_map, local_start,
4297					   access_type,
4298					   OBJECT_LOCK_EXCLUSIVE,
4299					   &version, &object,
4300					   &offset, &prot, &wired,
4301					   NULL,
4302					   &real_map)) {
4303
4304					vm_map_unlock_read(lookup_map);
4305					vm_map_unwire(map, start,
4306						      s, user_wire);
4307					return(KERN_FAILURE);
4308				}
4309				vm_object_unlock(object);
4310				if(real_map != lookup_map)
4311					vm_map_unlock(real_map);
4312				vm_map_unlock_read(lookup_map);
4313				vm_map_lock(map);
4314
4315				/* we unlocked, so must re-lookup */
4316				if (!vm_map_lookup_entry(map,
4317							 local_start,
4318							 &local_entry)) {
4319					rc = KERN_FAILURE;
4320					goto done;
4321				}
4322
4323				/*
4324				 * entry could have been "simplified",
4325				 * so re-clip
4326				 */
4327				entry = local_entry;
4328				assert(s == local_start);
4329				vm_map_clip_start(map, entry, s);
4330				vm_map_clip_end(map, entry, end);
4331				/* re-compute "e" */
4332				e = entry->vme_end;
4333				if (e > end)
4334					e = end;
4335
4336				/* did we have a change of type? */
4337				if (!entry->is_sub_map) {
4338					last_timestamp = map->timestamp;
4339					continue;
4340				}
4341			} else {
4342				local_start = entry->vme_start;
4343				pmap = map_pmap;
4344			}
4345
4346			if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4347				goto done;
4348
4349			entry->in_transition = TRUE;
4350
4351			vm_map_unlock(map);
4352			rc = vm_map_wire_nested(entry->object.sub_map,
4353						sub_start, sub_end,
4354						access_type,
4355						user_wire, pmap, pmap_addr);
4356			vm_map_lock(map);
4357
4358			/*
4359			 * Find the entry again.  It could have been clipped
4360			 * after we unlocked the map.
4361			 */
4362			if (!vm_map_lookup_entry(map, local_start,
4363						 &first_entry))
4364				panic("vm_map_wire: re-lookup failed");
4365			entry = first_entry;
4366
4367			assert(local_start == s);
4368			/* re-compute "e" */
4369			e = entry->vme_end;
4370			if (e > end)
4371				e = end;
4372
4373			last_timestamp = map->timestamp;
4374			while ((entry != vm_map_to_entry(map)) &&
4375			       (entry->vme_start < e)) {
4376				assert(entry->in_transition);
4377				entry->in_transition = FALSE;
4378				if (entry->needs_wakeup) {
4379					entry->needs_wakeup = FALSE;
4380					need_wakeup = TRUE;
4381				}
4382				if (rc != KERN_SUCCESS) {/* from vm_*_wire */
4383					subtract_wire_counts(map, entry, user_wire);
4384				}
4385				entry = entry->vme_next;
4386			}
4387			if (rc != KERN_SUCCESS) {	/* from vm_*_wire */
4388				goto done;
4389			}
4390
4391			/* no need to relookup again */
4392			s = entry->vme_start;
4393			continue;
4394		}
4395
4396		/*
4397		 * If this entry is already wired then increment
4398		 * the appropriate wire reference count.
4399		 */
4400		if (entry->wired_count) {
4401			/*
4402			 * entry is already wired down, get our reference
4403			 * after clipping to our range.
4404			 */
4405			vm_map_clip_start(map, entry, s);
4406			vm_map_clip_end(map, entry, end);
4407
4408			if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4409				goto done;
4410
4411			/* map was not unlocked: no need to relookup */
4412			entry = entry->vme_next;
4413			s = entry->vme_start;
4414			continue;
4415		}
4416
4417		/*
4418		 * Unwired entry or wire request transmitted via submap
4419		 */
4420
4421
4422		/*
4423		 * Perform actions of vm_map_lookup that need the write
4424		 * lock on the map: create a shadow object for a
4425		 * copy-on-write region, or an object for a zero-fill
4426		 * region.
4427		 */
4428		size = entry->vme_end - entry->vme_start;
4429		/*
4430		 * If wiring a copy-on-write page, we need to copy it now
4431		 * even if we're only (currently) requesting read access.
4432		 * This is aggressive, but once it's wired we can't move it.
4433		 */
4434		if (entry->needs_copy) {
4435			vm_object_shadow(&entry->object.vm_object,
4436					 &entry->offset, size);
4437			entry->needs_copy = FALSE;
4438		} else if (entry->object.vm_object == VM_OBJECT_NULL) {
4439			entry->object.vm_object = vm_object_allocate(size);
4440			entry->offset = (vm_object_offset_t)0;
4441		}
4442
4443		vm_map_clip_start(map, entry, s);
4444		vm_map_clip_end(map, entry, end);
4445
4446		/* re-compute "e" */
4447		e = entry->vme_end;
4448		if (e > end)
4449			e = end;
4450
4451		/*
4452		 * Check for holes and protection mismatch.
4453		 * Holes: Next entry should be contiguous unless this
4454		 *	  is the end of the region.
4455		 * Protection: Access requested must be allowed, unless
4456		 *	wiring is by protection class
4457		 */
4458		if ((entry->vme_end < end) &&
4459		    ((entry->vme_next == vm_map_to_entry(map)) ||
4460		     (entry->vme_next->vme_start > entry->vme_end))) {
4461			/* found a hole */
4462			rc = KERN_INVALID_ADDRESS;
4463			goto done;
4464		}
4465		if ((entry->protection & access_type) != access_type) {
4466			/* found a protection problem */
4467			rc = KERN_PROTECTION_FAILURE;
4468			goto done;
4469		}
4470
4471		assert(entry->wired_count == 0 && entry->user_wired_count == 0);
4472
4473		if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4474			goto done;
4475
4476		entry->in_transition = TRUE;
4477
4478		/*
4479		 * This entry might get split once we unlock the map.
4480		 * In vm_fault_wire(), we need the current range as
4481		 * defined by this entry.  In order for this to work
4482		 * along with a simultaneous clip operation, we make a
4483		 * temporary copy of this entry and use that for the
4484		 * wiring.  Note that the underlying objects do not
4485		 * change during a clip.
4486		 */
4487		tmp_entry = *entry;
4488
4489		/*
4490		 * The in_transition state guarentees that the entry
4491		 * (or entries for this range, if split occured) will be
4492		 * there when the map lock is acquired for the second time.
4493		 */
4494		vm_map_unlock(map);
4495
4496		if (!user_wire && cur_thread != THREAD_NULL)
4497			interruptible_state = thread_interrupt_level(THREAD_UNINT);
4498		else
4499			interruptible_state = THREAD_UNINT;
4500
4501		if(map_pmap)
4502			rc = vm_fault_wire(map,
4503					   &tmp_entry, map_pmap, pmap_addr);
4504		else
4505			rc = vm_fault_wire(map,
4506					   &tmp_entry, map->pmap,
4507					   tmp_entry.vme_start);
4508
4509		if (!user_wire && cur_thread != THREAD_NULL)
4510			thread_interrupt_level(interruptible_state);
4511
4512		vm_map_lock(map);
4513
4514		if (last_timestamp+1 != map->timestamp) {
4515			/*
4516			 * Find the entry again.  It could have been clipped
4517			 * after we unlocked the map.
4518			 */
4519			if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4520						 &first_entry))
4521				panic("vm_map_wire: re-lookup failed");
4522
4523			entry = first_entry;
4524		}
4525
4526		last_timestamp = map->timestamp;
4527
4528		while ((entry != vm_map_to_entry(map)) &&
4529		       (entry->vme_start < tmp_entry.vme_end)) {
4530			assert(entry->in_transition);
4531			entry->in_transition = FALSE;
4532			if (entry->needs_wakeup) {
4533				entry->needs_wakeup = FALSE;
4534				need_wakeup = TRUE;
4535			}
4536			if (rc != KERN_SUCCESS) {	/* from vm_*_wire */
4537				subtract_wire_counts(map, entry, user_wire);
4538			}
4539			entry = entry->vme_next;
4540		}
4541
4542		if (rc != KERN_SUCCESS) {		/* from vm_*_wire */
4543			goto done;
4544		}
4545
4546		s = entry->vme_start;
4547	} /* end while loop through map entries */
4548
4549done:
4550	if (rc == KERN_SUCCESS) {
4551		/* repair any damage we may have made to the VM map */
4552		vm_map_simplify_range(map, start, end);
4553	}
4554
4555	vm_map_unlock(map);
4556
4557	/*
4558	 * wake up anybody waiting on entries we wired.
4559	 */
4560	if (need_wakeup)
4561		vm_map_entry_wakeup(map);
4562
4563	if (rc != KERN_SUCCESS) {
4564		/* undo what has been wired so far */
4565		vm_map_unwire(map, start, s, user_wire);
4566	}
4567
4568	return rc;
4569
4570}
4571
4572kern_return_t
4573vm_map_wire(
4574	register vm_map_t	map,
4575	register vm_map_offset_t	start,
4576	register vm_map_offset_t	end,
4577	register vm_prot_t	access_type,
4578	boolean_t		user_wire)
4579{
4580
4581	kern_return_t	kret;
4582
4583	kret = vm_map_wire_nested(map, start, end, access_type,
4584				  user_wire, (pmap_t)NULL, 0);
4585	return kret;
4586}
4587
4588/*
4589 *	vm_map_unwire:
4590 *
4591 *	Sets the pageability of the specified address range in the target
4592 *	as pageable.  Regions specified must have been wired previously.
4593 *
4594 *	The map must not be locked, but a reference must remain to the map
4595 *	throughout the call.
4596 *
4597 *	Kernel will panic on failures.  User unwire ignores holes and
4598 *	unwired and intransition entries to avoid losing memory by leaving
4599 *	it unwired.
4600 */
4601static kern_return_t
4602vm_map_unwire_nested(
4603	register vm_map_t	map,
4604	register vm_map_offset_t	start,
4605	register vm_map_offset_t	end,
4606	boolean_t		user_wire,
4607	pmap_t			map_pmap,
4608	vm_map_offset_t		pmap_addr)
4609{
4610	register vm_map_entry_t	entry;
4611	struct vm_map_entry	*first_entry, tmp_entry;
4612	boolean_t		need_wakeup;
4613	boolean_t		main_map = FALSE;
4614	unsigned int		last_timestamp;
4615
4616	vm_map_lock(map);
4617	if(map_pmap == NULL)
4618		main_map = TRUE;
4619	last_timestamp = map->timestamp;
4620
4621	VM_MAP_RANGE_CHECK(map, start, end);
4622	assert(page_aligned(start));
4623	assert(page_aligned(end));
4624	assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
4625	assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
4626
4627	if (start == end) {
4628		/* We unwired what the caller asked for: zero pages */
4629		vm_map_unlock(map);
4630		return KERN_SUCCESS;
4631	}
4632
4633	if (vm_map_lookup_entry(map, start, &first_entry)) {
4634		entry = first_entry;
4635		/*
4636		 * vm_map_clip_start will be done later.
4637		 * We don't want to unnest any nested sub maps here !
4638		 */
4639	}
4640	else {
4641		if (!user_wire) {
4642			panic("vm_map_unwire: start not found");
4643		}
4644		/*	Start address is not in map. */
4645		vm_map_unlock(map);
4646		return(KERN_INVALID_ADDRESS);
4647	}
4648
4649	if (entry->superpage_size) {
4650		/* superpages are always wired */
4651		vm_map_unlock(map);
4652		return KERN_INVALID_ADDRESS;
4653	}
4654
4655	need_wakeup = FALSE;
4656	while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4657		if (entry->in_transition) {
4658			/*
4659			 * 1)
4660			 * Another thread is wiring down this entry. Note
4661			 * that if it is not for the other thread we would
4662			 * be unwiring an unwired entry.  This is not
4663			 * permitted.  If we wait, we will be unwiring memory
4664			 * we did not wire.
4665			 *
4666			 * 2)
4667			 * Another thread is unwiring this entry.  We did not
4668			 * have a reference to it, because if we did, this
4669			 * entry will not be getting unwired now.
4670			 */
4671			if (!user_wire) {
4672				/*
4673				 * XXX FBDP
4674				 * This could happen:  there could be some
4675				 * overlapping vslock/vsunlock operations
4676				 * going on.
4677				 * We should probably just wait and retry,
4678				 * but then we have to be careful that this
4679				 * entry could get "simplified" after
4680				 * "in_transition" gets unset and before
4681				 * we re-lookup the entry, so we would
4682				 * have to re-clip the entry to avoid
4683				 * re-unwiring what we have already unwired...
4684				 * See vm_map_wire_nested().
4685				 *
4686				 * Or we could just ignore "in_transition"
4687				 * here and proceed to decement the wired
4688				 * count(s) on this entry.  That should be fine
4689				 * as long as "wired_count" doesn't drop all
4690				 * the way to 0 (and we should panic if THAT
4691				 * happens).
4692				 */
4693				panic("vm_map_unwire: in_transition entry");
4694			}
4695
4696			entry = entry->vme_next;
4697			continue;
4698		}
4699
4700		if (entry->is_sub_map) {
4701			vm_map_offset_t	sub_start;
4702			vm_map_offset_t	sub_end;
4703			vm_map_offset_t	local_end;
4704			pmap_t		pmap;
4705
4706			vm_map_clip_start(map, entry, start);
4707			vm_map_clip_end(map, entry, end);
4708
4709			sub_start = entry->offset;
4710			sub_end = entry->vme_end - entry->vme_start;
4711			sub_end += entry->offset;
4712			local_end = entry->vme_end;
4713			if(map_pmap == NULL) {
4714				if(entry->use_pmap) {
4715					pmap = entry->object.sub_map->pmap;
4716					pmap_addr = sub_start;
4717				} else {
4718					pmap = map->pmap;
4719					pmap_addr = start;
4720				}
4721				if (entry->wired_count == 0 ||
4722				    (user_wire && entry->user_wired_count == 0)) {
4723					if (!user_wire)
4724						panic("vm_map_unwire: entry is unwired");
4725					entry = entry->vme_next;
4726					continue;
4727				}
4728
4729				/*
4730				 * Check for holes
4731				 * Holes: Next entry should be contiguous unless
4732				 * this is the end of the region.
4733				 */
4734				if (((entry->vme_end < end) &&
4735				     ((entry->vme_next == vm_map_to_entry(map)) ||
4736				      (entry->vme_next->vme_start
4737				       > entry->vme_end)))) {
4738					if (!user_wire)
4739						panic("vm_map_unwire: non-contiguous region");
4740/*
4741					entry = entry->vme_next;
4742					continue;
4743*/
4744				}
4745
4746				subtract_wire_counts(map, entry, user_wire);
4747
4748				if (entry->wired_count != 0) {
4749					entry = entry->vme_next;
4750					continue;
4751				}
4752
4753				entry->in_transition = TRUE;
4754				tmp_entry = *entry;/* see comment in vm_map_wire() */
4755
4756				/*
4757				 * We can unlock the map now. The in_transition state
4758				 * guarantees existance of the entry.
4759				 */
4760				vm_map_unlock(map);
4761				vm_map_unwire_nested(entry->object.sub_map,
4762						     sub_start, sub_end, user_wire, pmap, pmap_addr);
4763				vm_map_lock(map);
4764
4765				if (last_timestamp+1 != map->timestamp) {
4766					/*
4767					 * Find the entry again.  It could have been
4768					 * clipped or deleted after we unlocked the map.
4769					 */
4770					if (!vm_map_lookup_entry(map,
4771								 tmp_entry.vme_start,
4772								 &first_entry)) {
4773						if (!user_wire)
4774							panic("vm_map_unwire: re-lookup failed");
4775						entry = first_entry->vme_next;
4776					} else
4777						entry = first_entry;
4778				}
4779				last_timestamp = map->timestamp;
4780
4781				/*
4782				 * clear transition bit for all constituent entries
4783				 * that were in the original entry (saved in
4784				 * tmp_entry).  Also check for waiters.
4785				 */
4786				while ((entry != vm_map_to_entry(map)) &&
4787				       (entry->vme_start < tmp_entry.vme_end)) {
4788					assert(entry->in_transition);
4789					entry->in_transition = FALSE;
4790					if (entry->needs_wakeup) {
4791						entry->needs_wakeup = FALSE;
4792						need_wakeup = TRUE;
4793					}
4794					entry = entry->vme_next;
4795				}
4796				continue;
4797			} else {
4798				vm_map_unlock(map);
4799				vm_map_unwire_nested(entry->object.sub_map,
4800						     sub_start, sub_end, user_wire, map_pmap,
4801						     pmap_addr);
4802				vm_map_lock(map);
4803
4804				if (last_timestamp+1 != map->timestamp) {
4805					/*
4806					 * Find the entry again.  It could have been
4807					 * clipped or deleted after we unlocked the map.
4808					 */
4809					if (!vm_map_lookup_entry(map,
4810								 tmp_entry.vme_start,
4811								 &first_entry)) {
4812						if (!user_wire)
4813							panic("vm_map_unwire: re-lookup failed");
4814						entry = first_entry->vme_next;
4815					} else
4816						entry = first_entry;
4817				}
4818				last_timestamp = map->timestamp;
4819			}
4820		}
4821
4822
4823		if ((entry->wired_count == 0) ||
4824		    (user_wire && entry->user_wired_count == 0)) {
4825			if (!user_wire)
4826				panic("vm_map_unwire: entry is unwired");
4827
4828			entry = entry->vme_next;
4829			continue;
4830		}
4831
4832		assert(entry->wired_count > 0 &&
4833		       (!user_wire || entry->user_wired_count > 0));
4834
4835		vm_map_clip_start(map, entry, start);
4836		vm_map_clip_end(map, entry, end);
4837
4838		/*
4839		 * Check for holes
4840		 * Holes: Next entry should be contiguous unless
4841		 *	  this is the end of the region.
4842		 */
4843		if (((entry->vme_end < end) &&
4844		     ((entry->vme_next == vm_map_to_entry(map)) ||
4845		      (entry->vme_next->vme_start > entry->vme_end)))) {
4846
4847			if (!user_wire)
4848				panic("vm_map_unwire: non-contiguous region");
4849			entry = entry->vme_next;
4850			continue;
4851		}
4852
4853		subtract_wire_counts(map, entry, user_wire);
4854
4855		if (entry->wired_count != 0) {
4856			entry = entry->vme_next;
4857			continue;
4858		}
4859
4860		if(entry->zero_wired_pages) {
4861			entry->zero_wired_pages = FALSE;
4862		}
4863
4864		entry->in_transition = TRUE;
4865		tmp_entry = *entry;	/* see comment in vm_map_wire() */
4866
4867		/*
4868		 * We can unlock the map now. The in_transition state
4869		 * guarantees existance of the entry.
4870		 */
4871		vm_map_unlock(map);
4872		if(map_pmap) {
4873			vm_fault_unwire(map,
4874					&tmp_entry, FALSE, map_pmap, pmap_addr);
4875		} else {
4876			vm_fault_unwire(map,
4877					&tmp_entry, FALSE, map->pmap,
4878					tmp_entry.vme_start);
4879		}
4880		vm_map_lock(map);
4881
4882		if (last_timestamp+1 != map->timestamp) {
4883			/*
4884			 * Find the entry again.  It could have been clipped
4885			 * or deleted after we unlocked the map.
4886			 */
4887			if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4888						 &first_entry)) {
4889				if (!user_wire)
4890					panic("vm_map_unwire: re-lookup failed");
4891				entry = first_entry->vme_next;
4892			} else
4893				entry = first_entry;
4894		}
4895		last_timestamp = map->timestamp;
4896
4897		/*
4898		 * clear transition bit for all constituent entries that
4899		 * were in the original entry (saved in tmp_entry).  Also
4900		 * check for waiters.
4901		 */
4902		while ((entry != vm_map_to_entry(map)) &&
4903		       (entry->vme_start < tmp_entry.vme_end)) {
4904			assert(entry->in_transition);
4905			entry->in_transition = FALSE;
4906			if (entry->needs_wakeup) {
4907				entry->needs_wakeup = FALSE;
4908				need_wakeup = TRUE;
4909			}
4910			entry = entry->vme_next;
4911		}
4912	}
4913
4914	/*
4915	 * We might have fragmented the address space when we wired this
4916	 * range of addresses.  Attempt to re-coalesce these VM map entries
4917	 * with their neighbors now that they're no longer wired.
4918	 * Under some circumstances, address space fragmentation can
4919	 * prevent VM object shadow chain collapsing, which can cause
4920	 * swap space leaks.
4921	 */
4922	vm_map_simplify_range(map, start, end);
4923
4924	vm_map_unlock(map);
4925	/*
4926	 * wake up anybody waiting on entries that we have unwired.
4927	 */
4928	if (need_wakeup)
4929		vm_map_entry_wakeup(map);
4930	return(KERN_SUCCESS);
4931
4932}
4933
4934kern_return_t
4935vm_map_unwire(
4936	register vm_map_t	map,
4937	register vm_map_offset_t	start,
4938	register vm_map_offset_t	end,
4939	boolean_t		user_wire)
4940{
4941	return vm_map_unwire_nested(map, start, end,
4942				    user_wire, (pmap_t)NULL, 0);
4943}
4944
4945
4946/*
4947 *	vm_map_entry_delete:	[ internal use only ]
4948 *
4949 *	Deallocate the given entry from the target map.
4950 */
4951static void
4952vm_map_entry_delete(
4953	register vm_map_t	map,
4954	register vm_map_entry_t	entry)
4955{
4956	register vm_map_offset_t	s, e;
4957	register vm_object_t	object;
4958	register vm_map_t	submap;
4959
4960	s = entry->vme_start;
4961	e = entry->vme_end;
4962	assert(page_aligned(s));
4963	assert(page_aligned(e));
4964	if (entry->map_aligned == TRUE) {
4965		assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
4966		assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
4967	}
4968	assert(entry->wired_count == 0);
4969	assert(entry->user_wired_count == 0);
4970	assert(!entry->permanent);
4971
4972	if (entry->is_sub_map) {
4973		object = NULL;
4974		submap = entry->object.sub_map;
4975	} else {
4976		submap = NULL;
4977		object = entry->object.vm_object;
4978	}
4979
4980	vm_map_store_entry_unlink(map, entry);
4981	map->size -= e - s;
4982
4983	vm_map_entry_dispose(map, entry);
4984
4985	vm_map_unlock(map);
4986	/*
4987	 *	Deallocate the object only after removing all
4988	 *	pmap entries pointing to its pages.
4989	 */
4990	if (submap)
4991		vm_map_deallocate(submap);
4992	else
4993		vm_object_deallocate(object);
4994
4995}
4996
4997void
4998vm_map_submap_pmap_clean(
4999	vm_map_t	map,
5000	vm_map_offset_t	start,
5001	vm_map_offset_t	end,
5002	vm_map_t	sub_map,
5003	vm_map_offset_t	offset)
5004{
5005	vm_map_offset_t	submap_start;
5006	vm_map_offset_t	submap_end;
5007	vm_map_size_t	remove_size;
5008	vm_map_entry_t	entry;
5009
5010	submap_end = offset + (end - start);
5011	submap_start = offset;
5012
5013	vm_map_lock_read(sub_map);
5014	if(vm_map_lookup_entry(sub_map, offset, &entry)) {
5015
5016		remove_size = (entry->vme_end - entry->vme_start);
5017		if(offset > entry->vme_start)
5018			remove_size -= offset - entry->vme_start;
5019
5020
5021		if(submap_end < entry->vme_end) {
5022			remove_size -=
5023				entry->vme_end - submap_end;
5024		}
5025		if(entry->is_sub_map) {
5026			vm_map_submap_pmap_clean(
5027				sub_map,
5028				start,
5029				start + remove_size,
5030				entry->object.sub_map,
5031				entry->offset);
5032		} else {
5033
5034			if((map->mapped_in_other_pmaps) && (map->ref_count)
5035			   && (entry->object.vm_object != NULL)) {
5036				vm_object_pmap_protect(
5037					entry->object.vm_object,
5038					entry->offset+(offset-entry->vme_start),
5039					remove_size,
5040					PMAP_NULL,
5041					entry->vme_start,
5042					VM_PROT_NONE);
5043			} else {
5044				pmap_remove(map->pmap,
5045					    (addr64_t)start,
5046					    (addr64_t)(start + remove_size));
5047			}
5048		}
5049	}
5050
5051	entry = entry->vme_next;
5052
5053	while((entry != vm_map_to_entry(sub_map))
5054	      && (entry->vme_start < submap_end)) {
5055		remove_size = (entry->vme_end - entry->vme_start);
5056		if(submap_end < entry->vme_end) {
5057			remove_size -= entry->vme_end - submap_end;
5058		}
5059		if(entry->is_sub_map) {
5060			vm_map_submap_pmap_clean(
5061				sub_map,
5062				(start + entry->vme_start) - offset,
5063				((start + entry->vme_start) - offset) + remove_size,
5064				entry->object.sub_map,
5065				entry->offset);
5066		} else {
5067			if((map->mapped_in_other_pmaps) && (map->ref_count)
5068			   && (entry->object.vm_object != NULL)) {
5069				vm_object_pmap_protect(
5070					entry->object.vm_object,
5071					entry->offset,
5072					remove_size,
5073					PMAP_NULL,
5074					entry->vme_start,
5075					VM_PROT_NONE);
5076			} else {
5077				pmap_remove(map->pmap,
5078					    (addr64_t)((start + entry->vme_start)
5079						       - offset),
5080					    (addr64_t)(((start + entry->vme_start)
5081							- offset) + remove_size));
5082			}
5083		}
5084		entry = entry->vme_next;
5085	}
5086	vm_map_unlock_read(sub_map);
5087	return;
5088}
5089
5090/*
5091 *	vm_map_delete:	[ internal use only ]
5092 *
5093 *	Deallocates the given address range from the target map.
5094 *	Removes all user wirings. Unwires one kernel wiring if
5095 *	VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
5096 *	away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
5097 *	interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
5098 *
5099 *	This routine is called with map locked and leaves map locked.
5100 */
5101static kern_return_t
5102vm_map_delete(
5103	vm_map_t		map,
5104	vm_map_offset_t		start,
5105	vm_map_offset_t		end,
5106	int			flags,
5107	vm_map_t		zap_map)
5108{
5109	vm_map_entry_t		entry, next;
5110	struct	 vm_map_entry	*first_entry, tmp_entry;
5111	register vm_map_offset_t s;
5112	register vm_object_t	object;
5113	boolean_t		need_wakeup;
5114	unsigned int		last_timestamp = ~0; /* unlikely value */
5115	int			interruptible;
5116
5117	interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
5118		THREAD_ABORTSAFE : THREAD_UNINT;
5119
5120	/*
5121	 * All our DMA I/O operations in IOKit are currently done by
5122	 * wiring through the map entries of the task requesting the I/O.
5123	 * Because of this, we must always wait for kernel wirings
5124	 * to go away on the entries before deleting them.
5125	 *
5126	 * Any caller who wants to actually remove a kernel wiring
5127	 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
5128	 * properly remove one wiring instead of blasting through
5129	 * them all.
5130	 */
5131	flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
5132
5133	while(1) {
5134		/*
5135		 *	Find the start of the region, and clip it
5136		 */
5137		if (vm_map_lookup_entry(map, start, &first_entry)) {
5138			entry = first_entry;
5139			if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */				start = SUPERPAGE_ROUND_DOWN(start);
5140				start = SUPERPAGE_ROUND_DOWN(start);
5141				continue;
5142			}
5143			if (start == entry->vme_start) {
5144				/*
5145				 * No need to clip.  We don't want to cause
5146				 * any unnecessary unnesting in this case...
5147				 */
5148			} else {
5149				vm_map_clip_start(map, entry, start);
5150			}
5151
5152			/*
5153			 *	Fix the lookup hint now, rather than each
5154			 *	time through the loop.
5155			 */
5156			SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5157		} else {
5158			entry = first_entry->vme_next;
5159		}
5160		break;
5161	}
5162	if (entry->superpage_size)
5163		end = SUPERPAGE_ROUND_UP(end);
5164
5165	need_wakeup = FALSE;
5166	/*
5167	 *	Step through all entries in this region
5168	 */
5169	s = entry->vme_start;
5170	while ((entry != vm_map_to_entry(map)) && (s < end)) {
5171		/*
5172		 * At this point, we have deleted all the memory entries
5173		 * between "start" and "s".  We still need to delete
5174		 * all memory entries between "s" and "end".
5175		 * While we were blocked and the map was unlocked, some
5176		 * new memory entries could have been re-allocated between
5177		 * "start" and "s" and we don't want to mess with those.
5178		 * Some of those entries could even have been re-assembled
5179		 * with an entry after "s" (in vm_map_simplify_entry()), so
5180		 * we may have to vm_map_clip_start() again.
5181		 */
5182
5183		if (entry->vme_start >= s) {
5184			/*
5185			 * This entry starts on or after "s"
5186			 * so no need to clip its start.
5187			 */
5188		} else {
5189			/*
5190			 * This entry has been re-assembled by a
5191			 * vm_map_simplify_entry().  We need to
5192			 * re-clip its start.
5193			 */
5194			vm_map_clip_start(map, entry, s);
5195		}
5196		if (entry->vme_end <= end) {
5197			/*
5198			 * This entry is going away completely, so no need
5199			 * to clip and possibly cause an unnecessary unnesting.
5200			 */
5201		} else {
5202			vm_map_clip_end(map, entry, end);
5203		}
5204
5205		if (entry->permanent) {
5206			panic("attempt to remove permanent VM map entry "
5207			      "%p [0x%llx:0x%llx]\n",
5208			      entry, (uint64_t) s, (uint64_t) end);
5209		}
5210
5211
5212		if (entry->in_transition) {
5213			wait_result_t wait_result;
5214
5215			/*
5216			 * Another thread is wiring/unwiring this entry.
5217			 * Let the other thread know we are waiting.
5218			 */
5219			assert(s == entry->vme_start);
5220			entry->needs_wakeup = TRUE;
5221
5222			/*
5223			 * wake up anybody waiting on entries that we have
5224			 * already unwired/deleted.
5225			 */
5226			if (need_wakeup) {
5227				vm_map_entry_wakeup(map);
5228				need_wakeup = FALSE;
5229			}
5230
5231			wait_result = vm_map_entry_wait(map, interruptible);
5232
5233			if (interruptible &&
5234			    wait_result == THREAD_INTERRUPTED) {
5235				/*
5236				 * We do not clear the needs_wakeup flag,
5237				 * since we cannot tell if we were the only one.
5238				 */
5239				vm_map_unlock(map);
5240				return KERN_ABORTED;
5241			}
5242
5243			/*
5244			 * The entry could have been clipped or it
5245			 * may not exist anymore.  Look it up again.
5246			 */
5247			if (!vm_map_lookup_entry(map, s, &first_entry)) {
5248				assert((map != kernel_map) &&
5249				       (!entry->is_sub_map));
5250				/*
5251				 * User: use the next entry
5252				 */
5253				entry = first_entry->vme_next;
5254				s = entry->vme_start;
5255			} else {
5256				entry = first_entry;
5257				SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5258			}
5259			last_timestamp = map->timestamp;
5260			continue;
5261		} /* end in_transition */
5262
5263		if (entry->wired_count) {
5264			boolean_t	user_wire;
5265
5266			user_wire = entry->user_wired_count > 0;
5267
5268			/*
5269			 * 	Remove a kernel wiring if requested
5270			 */
5271			if (flags & VM_MAP_REMOVE_KUNWIRE) {
5272				entry->wired_count--;
5273			}
5274
5275			/*
5276			 *	Remove all user wirings for proper accounting
5277			 */
5278			if (entry->user_wired_count > 0) {
5279				while (entry->user_wired_count)
5280					subtract_wire_counts(map, entry, user_wire);
5281			}
5282
5283			if (entry->wired_count != 0) {
5284				assert(map != kernel_map);
5285				/*
5286				 * Cannot continue.  Typical case is when
5287				 * a user thread has physical io pending on
5288				 * on this page.  Either wait for the
5289				 * kernel wiring to go away or return an
5290				 * error.
5291				 */
5292				if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
5293					wait_result_t wait_result;
5294
5295					assert(s == entry->vme_start);
5296					entry->needs_wakeup = TRUE;
5297					wait_result = vm_map_entry_wait(map,
5298									interruptible);
5299
5300					if (interruptible &&
5301					    wait_result == THREAD_INTERRUPTED) {
5302						/*
5303						 * We do not clear the
5304						 * needs_wakeup flag, since we
5305						 * cannot tell if we were the
5306						 * only one.
5307						 */
5308						vm_map_unlock(map);
5309						return KERN_ABORTED;
5310					}
5311
5312					/*
5313					 * The entry could have been clipped or
5314					 * it may not exist anymore.  Look it
5315					 * up again.
5316					 */
5317					if (!vm_map_lookup_entry(map, s,
5318								 &first_entry)) {
5319						assert(map != kernel_map);
5320						/*
5321						 * User: use the next entry
5322						 */
5323						entry = first_entry->vme_next;
5324						s = entry->vme_start;
5325					} else {
5326						entry = first_entry;
5327						SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5328					}
5329					last_timestamp = map->timestamp;
5330					continue;
5331				}
5332				else {
5333					return KERN_FAILURE;
5334				}
5335			}
5336
5337			entry->in_transition = TRUE;
5338			/*
5339			 * copy current entry.  see comment in vm_map_wire()
5340			 */
5341			tmp_entry = *entry;
5342			assert(s == entry->vme_start);
5343
5344			/*
5345			 * We can unlock the map now. The in_transition
5346			 * state guarentees existance of the entry.
5347			 */
5348			vm_map_unlock(map);
5349
5350			if (tmp_entry.is_sub_map) {
5351				vm_map_t sub_map;
5352				vm_map_offset_t sub_start, sub_end;
5353				pmap_t pmap;
5354				vm_map_offset_t pmap_addr;
5355
5356
5357				sub_map = tmp_entry.object.sub_map;
5358				sub_start = tmp_entry.offset;
5359				sub_end = sub_start + (tmp_entry.vme_end -
5360						       tmp_entry.vme_start);
5361				if (tmp_entry.use_pmap) {
5362					pmap = sub_map->pmap;
5363					pmap_addr = tmp_entry.vme_start;
5364				} else {
5365					pmap = map->pmap;
5366					pmap_addr = tmp_entry.vme_start;
5367				}
5368				(void) vm_map_unwire_nested(sub_map,
5369							    sub_start, sub_end,
5370							    user_wire,
5371							    pmap, pmap_addr);
5372			} else {
5373
5374				if (tmp_entry.object.vm_object == kernel_object) {
5375					pmap_protect_options(
5376						map->pmap,
5377						tmp_entry.vme_start,
5378						tmp_entry.vme_end,
5379						VM_PROT_NONE,
5380						PMAP_OPTIONS_REMOVE,
5381						NULL);
5382				}
5383				vm_fault_unwire(map, &tmp_entry,
5384						tmp_entry.object.vm_object == kernel_object,
5385						map->pmap, tmp_entry.vme_start);
5386			}
5387
5388			vm_map_lock(map);
5389
5390			if (last_timestamp+1 != map->timestamp) {
5391				/*
5392				 * Find the entry again.  It could have
5393				 * been clipped after we unlocked the map.
5394				 */
5395				if (!vm_map_lookup_entry(map, s, &first_entry)){
5396					assert((map != kernel_map) &&
5397					       (!entry->is_sub_map));
5398					first_entry = first_entry->vme_next;
5399					s = first_entry->vme_start;
5400				} else {
5401					SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5402				}
5403			} else {
5404				SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5405				first_entry = entry;
5406			}
5407
5408			last_timestamp = map->timestamp;
5409
5410			entry = first_entry;
5411			while ((entry != vm_map_to_entry(map)) &&
5412			       (entry->vme_start < tmp_entry.vme_end)) {
5413				assert(entry->in_transition);
5414				entry->in_transition = FALSE;
5415				if (entry->needs_wakeup) {
5416					entry->needs_wakeup = FALSE;
5417					need_wakeup = TRUE;
5418				}
5419				entry = entry->vme_next;
5420			}
5421			/*
5422			 * We have unwired the entry(s).  Go back and
5423			 * delete them.
5424			 */
5425			entry = first_entry;
5426			continue;
5427		}
5428
5429		/* entry is unwired */
5430		assert(entry->wired_count == 0);
5431		assert(entry->user_wired_count == 0);
5432
5433		assert(s == entry->vme_start);
5434
5435		if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
5436			/*
5437			 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
5438			 * vm_map_delete(), some map entries might have been
5439			 * transferred to a "zap_map", which doesn't have a
5440			 * pmap.  The original pmap has already been flushed
5441			 * in the vm_map_delete() call targeting the original
5442			 * map, but when we get to destroying the "zap_map",
5443			 * we don't have any pmap to flush, so let's just skip
5444			 * all this.
5445			 */
5446		} else if (entry->is_sub_map) {
5447			if (entry->use_pmap) {
5448#ifndef NO_NESTED_PMAP
5449				pmap_unnest(map->pmap,
5450					    (addr64_t)entry->vme_start,
5451					    entry->vme_end - entry->vme_start);
5452#endif	/* NO_NESTED_PMAP */
5453				if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
5454					/* clean up parent map/maps */
5455					vm_map_submap_pmap_clean(
5456						map, entry->vme_start,
5457						entry->vme_end,
5458						entry->object.sub_map,
5459						entry->offset);
5460				}
5461			} else {
5462				vm_map_submap_pmap_clean(
5463					map, entry->vme_start, entry->vme_end,
5464					entry->object.sub_map,
5465					entry->offset);
5466			}
5467		} else if (entry->object.vm_object != kernel_object &&
5468			   entry->object.vm_object != compressor_object) {
5469			object = entry->object.vm_object;
5470			if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
5471				vm_object_pmap_protect_options(
5472					object, entry->offset,
5473					entry->vme_end - entry->vme_start,
5474					PMAP_NULL,
5475					entry->vme_start,
5476					VM_PROT_NONE,
5477					PMAP_OPTIONS_REMOVE);
5478			} else if ((entry->object.vm_object !=
5479				    VM_OBJECT_NULL) ||
5480				   (map->pmap == kernel_pmap)) {
5481				/* Remove translations associated
5482				 * with this range unless the entry
5483				 * does not have an object, or
5484				 * it's the kernel map or a descendant
5485				 * since the platform could potentially
5486				 * create "backdoor" mappings invisible
5487				 * to the VM. It is expected that
5488				 * objectless, non-kernel ranges
5489				 * do not have such VM invisible
5490				 * translations.
5491				 */
5492				pmap_remove_options(map->pmap,
5493						    (addr64_t)entry->vme_start,
5494						    (addr64_t)entry->vme_end,
5495						    PMAP_OPTIONS_REMOVE);
5496			}
5497		}
5498
5499		/*
5500		 * All pmap mappings for this map entry must have been
5501		 * cleared by now.
5502		 */
5503		assert(vm_map_pmap_is_empty(map,
5504					    entry->vme_start,
5505					    entry->vme_end));
5506
5507		next = entry->vme_next;
5508		s = next->vme_start;
5509		last_timestamp = map->timestamp;
5510
5511		if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
5512		    zap_map != VM_MAP_NULL) {
5513			vm_map_size_t entry_size;
5514			/*
5515			 * The caller wants to save the affected VM map entries
5516			 * into the "zap_map".  The caller will take care of
5517			 * these entries.
5518			 */
5519			/* unlink the entry from "map" ... */
5520			vm_map_store_entry_unlink(map, entry);
5521			/* ... and add it to the end of the "zap_map" */
5522			vm_map_store_entry_link(zap_map,
5523					  vm_map_last_entry(zap_map),
5524					  entry);
5525			entry_size = entry->vme_end - entry->vme_start;
5526			map->size -= entry_size;
5527			zap_map->size += entry_size;
5528			/* we didn't unlock the map, so no timestamp increase */
5529			last_timestamp--;
5530		} else {
5531			vm_map_entry_delete(map, entry);
5532			/* vm_map_entry_delete unlocks the map */
5533			vm_map_lock(map);
5534		}
5535
5536		entry = next;
5537
5538		if(entry == vm_map_to_entry(map)) {
5539			break;
5540		}
5541		if (last_timestamp+1 != map->timestamp) {
5542			/*
5543			 * we are responsible for deleting everything
5544			 * from the give space, if someone has interfered
5545			 * we pick up where we left off, back fills should
5546			 * be all right for anyone except map_delete and
5547			 * we have to assume that the task has been fully
5548			 * disabled before we get here
5549			 */
5550        		if (!vm_map_lookup_entry(map, s, &entry)){
5551	               		entry = entry->vme_next;
5552				s = entry->vme_start;
5553        		} else {
5554				SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5555       		 	}
5556			/*
5557			 * others can not only allocate behind us, we can
5558			 * also see coalesce while we don't have the map lock
5559			 */
5560			if(entry == vm_map_to_entry(map)) {
5561				break;
5562			}
5563		}
5564		last_timestamp = map->timestamp;
5565	}
5566
5567	if (map->wait_for_space)
5568		thread_wakeup((event_t) map);
5569	/*
5570	 * wake up anybody waiting on entries that we have already deleted.
5571	 */
5572	if (need_wakeup)
5573		vm_map_entry_wakeup(map);
5574
5575	return KERN_SUCCESS;
5576}
5577
5578/*
5579 *	vm_map_remove:
5580 *
5581 *	Remove the given address range from the target map.
5582 *	This is the exported form of vm_map_delete.
5583 */
5584kern_return_t
5585vm_map_remove(
5586	register vm_map_t	map,
5587	register vm_map_offset_t	start,
5588	register vm_map_offset_t	end,
5589	register boolean_t	flags)
5590{
5591	register kern_return_t	result;
5592
5593	vm_map_lock(map);
5594	VM_MAP_RANGE_CHECK(map, start, end);
5595	/*
5596	 * For the zone_map, the kernel controls the allocation/freeing of memory.
5597	 * Any free to the zone_map should be within the bounds of the map and
5598	 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
5599	 * free to the zone_map into a no-op, there is a problem and we should
5600	 * panic.
5601	 */
5602	if ((map == zone_map) && (start == end))
5603		panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
5604	result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
5605	vm_map_unlock(map);
5606
5607	return(result);
5608}
5609
5610
5611/*
5612 *	Routine:	vm_map_copy_discard
5613 *
5614 *	Description:
5615 *		Dispose of a map copy object (returned by
5616 *		vm_map_copyin).
5617 */
5618void
5619vm_map_copy_discard(
5620	vm_map_copy_t	copy)
5621{
5622	if (copy == VM_MAP_COPY_NULL)
5623		return;
5624
5625	switch (copy->type) {
5626	case VM_MAP_COPY_ENTRY_LIST:
5627		while (vm_map_copy_first_entry(copy) !=
5628		       vm_map_copy_to_entry(copy)) {
5629			vm_map_entry_t	entry = vm_map_copy_first_entry(copy);
5630
5631			vm_map_copy_entry_unlink(copy, entry);
5632			if (entry->is_sub_map) {
5633				vm_map_deallocate(entry->object.sub_map);
5634			} else {
5635				vm_object_deallocate(entry->object.vm_object);
5636			}
5637			vm_map_copy_entry_dispose(copy, entry);
5638		}
5639		break;
5640        case VM_MAP_COPY_OBJECT:
5641		vm_object_deallocate(copy->cpy_object);
5642		break;
5643	case VM_MAP_COPY_KERNEL_BUFFER:
5644
5645		/*
5646		 * The vm_map_copy_t and possibly the data buffer were
5647		 * allocated by a single call to kalloc(), i.e. the
5648		 * vm_map_copy_t was not allocated out of the zone.
5649		 */
5650		kfree(copy, copy->cpy_kalloc_size);
5651		return;
5652	}
5653	zfree(vm_map_copy_zone, copy);
5654}
5655
5656/*
5657 *	Routine:	vm_map_copy_copy
5658 *
5659 *	Description:
5660 *			Move the information in a map copy object to
5661 *			a new map copy object, leaving the old one
5662 *			empty.
5663 *
5664 *			This is used by kernel routines that need
5665 *			to look at out-of-line data (in copyin form)
5666 *			before deciding whether to return SUCCESS.
5667 *			If the routine returns FAILURE, the original
5668 *			copy object will be deallocated; therefore,
5669 *			these routines must make a copy of the copy
5670 *			object and leave the original empty so that
5671 *			deallocation will not fail.
5672 */
5673vm_map_copy_t
5674vm_map_copy_copy(
5675	vm_map_copy_t	copy)
5676{
5677	vm_map_copy_t	new_copy;
5678
5679	if (copy == VM_MAP_COPY_NULL)
5680		return VM_MAP_COPY_NULL;
5681
5682	/*
5683	 * Allocate a new copy object, and copy the information
5684	 * from the old one into it.
5685	 */
5686
5687	new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5688	*new_copy = *copy;
5689
5690	if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
5691		/*
5692		 * The links in the entry chain must be
5693		 * changed to point to the new copy object.
5694		 */
5695		vm_map_copy_first_entry(copy)->vme_prev
5696			= vm_map_copy_to_entry(new_copy);
5697		vm_map_copy_last_entry(copy)->vme_next
5698			= vm_map_copy_to_entry(new_copy);
5699	}
5700
5701	/*
5702	 * Change the old copy object into one that contains
5703	 * nothing to be deallocated.
5704	 */
5705	copy->type = VM_MAP_COPY_OBJECT;
5706	copy->cpy_object = VM_OBJECT_NULL;
5707
5708	/*
5709	 * Return the new object.
5710	 */
5711	return new_copy;
5712}
5713
5714static kern_return_t
5715vm_map_overwrite_submap_recurse(
5716	vm_map_t	dst_map,
5717	vm_map_offset_t	dst_addr,
5718	vm_map_size_t	dst_size)
5719{
5720	vm_map_offset_t	dst_end;
5721	vm_map_entry_t	tmp_entry;
5722	vm_map_entry_t	entry;
5723	kern_return_t	result;
5724	boolean_t	encountered_sub_map = FALSE;
5725
5726
5727
5728	/*
5729	 *	Verify that the destination is all writeable
5730	 *	initially.  We have to trunc the destination
5731	 *	address and round the copy size or we'll end up
5732	 *	splitting entries in strange ways.
5733	 */
5734
5735	dst_end = vm_map_round_page(dst_addr + dst_size,
5736				    VM_MAP_PAGE_MASK(dst_map));
5737	vm_map_lock(dst_map);
5738
5739start_pass_1:
5740	if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5741		vm_map_unlock(dst_map);
5742		return(KERN_INVALID_ADDRESS);
5743	}
5744
5745	vm_map_clip_start(dst_map,
5746			  tmp_entry,
5747			  vm_map_trunc_page(dst_addr,
5748					    VM_MAP_PAGE_MASK(dst_map)));
5749	assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */
5750
5751	for (entry = tmp_entry;;) {
5752		vm_map_entry_t	next;
5753
5754		next = entry->vme_next;
5755		while(entry->is_sub_map) {
5756			vm_map_offset_t	sub_start;
5757			vm_map_offset_t	sub_end;
5758			vm_map_offset_t	local_end;
5759
5760			if (entry->in_transition) {
5761				/*
5762				 * Say that we are waiting, and wait for entry.
5763				 */
5764                        	entry->needs_wakeup = TRUE;
5765                        	vm_map_entry_wait(dst_map, THREAD_UNINT);
5766
5767				goto start_pass_1;
5768			}
5769
5770			encountered_sub_map = TRUE;
5771			sub_start = entry->offset;
5772
5773			if(entry->vme_end < dst_end)
5774				sub_end = entry->vme_end;
5775			else
5776				sub_end = dst_end;
5777			sub_end -= entry->vme_start;
5778			sub_end += entry->offset;
5779			local_end = entry->vme_end;
5780			vm_map_unlock(dst_map);
5781
5782			result = vm_map_overwrite_submap_recurse(
5783				entry->object.sub_map,
5784				sub_start,
5785				sub_end - sub_start);
5786
5787			if(result != KERN_SUCCESS)
5788				return result;
5789			if (dst_end <= entry->vme_end)
5790				return KERN_SUCCESS;
5791			vm_map_lock(dst_map);
5792			if(!vm_map_lookup_entry(dst_map, local_end,
5793						&tmp_entry)) {
5794				vm_map_unlock(dst_map);
5795				return(KERN_INVALID_ADDRESS);
5796			}
5797			entry = tmp_entry;
5798			next = entry->vme_next;
5799		}
5800
5801		if ( ! (entry->protection & VM_PROT_WRITE)) {
5802			vm_map_unlock(dst_map);
5803			return(KERN_PROTECTION_FAILURE);
5804		}
5805
5806		/*
5807		 *	If the entry is in transition, we must wait
5808		 *	for it to exit that state.  Anything could happen
5809		 *	when we unlock the map, so start over.
5810		 */
5811                if (entry->in_transition) {
5812
5813                        /*
5814                         * Say that we are waiting, and wait for entry.
5815                         */
5816                        entry->needs_wakeup = TRUE;
5817                        vm_map_entry_wait(dst_map, THREAD_UNINT);
5818
5819			goto start_pass_1;
5820		}
5821
5822/*
5823 *		our range is contained completely within this map entry
5824 */
5825		if (dst_end <= entry->vme_end) {
5826			vm_map_unlock(dst_map);
5827			return KERN_SUCCESS;
5828		}
5829/*
5830 *		check that range specified is contiguous region
5831 */
5832		if ((next == vm_map_to_entry(dst_map)) ||
5833		    (next->vme_start != entry->vme_end)) {
5834			vm_map_unlock(dst_map);
5835			return(KERN_INVALID_ADDRESS);
5836		}
5837
5838		/*
5839		 *	Check for permanent objects in the destination.
5840		 */
5841		if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5842		    ((!entry->object.vm_object->internal) ||
5843		     (entry->object.vm_object->true_share))) {
5844			if(encountered_sub_map) {
5845				vm_map_unlock(dst_map);
5846				return(KERN_FAILURE);
5847			}
5848		}
5849
5850
5851		entry = next;
5852	}/* for */
5853	vm_map_unlock(dst_map);
5854	return(KERN_SUCCESS);
5855}
5856
5857/*
5858 *	Routine:	vm_map_copy_overwrite
5859 *
5860 *	Description:
5861 *		Copy the memory described by the map copy
5862 *		object (copy; returned by vm_map_copyin) onto
5863 *		the specified destination region (dst_map, dst_addr).
5864 *		The destination must be writeable.
5865 *
5866 *		Unlike vm_map_copyout, this routine actually
5867 *		writes over previously-mapped memory.  If the
5868 *		previous mapping was to a permanent (user-supplied)
5869 *		memory object, it is preserved.
5870 *
5871 *		The attributes (protection and inheritance) of the
5872 *		destination region are preserved.
5873 *
5874 *		If successful, consumes the copy object.
5875 *		Otherwise, the caller is responsible for it.
5876 *
5877 *	Implementation notes:
5878 *		To overwrite aligned temporary virtual memory, it is
5879 *		sufficient to remove the previous mapping and insert
5880 *		the new copy.  This replacement is done either on
5881 *		the whole region (if no permanent virtual memory
5882 *		objects are embedded in the destination region) or
5883 *		in individual map entries.
5884 *
5885 *		To overwrite permanent virtual memory , it is necessary
5886 *		to copy each page, as the external memory management
5887 *		interface currently does not provide any optimizations.
5888 *
5889 *		Unaligned memory also has to be copied.  It is possible
5890 *		to use 'vm_trickery' to copy the aligned data.  This is
5891 *		not done but not hard to implement.
5892 *
5893 *		Once a page of permanent memory has been overwritten,
5894 *		it is impossible to interrupt this function; otherwise,
5895 *		the call would be neither atomic nor location-independent.
5896 *		The kernel-state portion of a user thread must be
5897 *		interruptible.
5898 *
5899 *		It may be expensive to forward all requests that might
5900 *		overwrite permanent memory (vm_write, vm_copy) to
5901 *		uninterruptible kernel threads.  This routine may be
5902 *		called by interruptible threads; however, success is
5903 *		not guaranteed -- if the request cannot be performed
5904 *		atomically and interruptibly, an error indication is
5905 *		returned.
5906 */
5907
5908static kern_return_t
5909vm_map_copy_overwrite_nested(
5910	vm_map_t		dst_map,
5911	vm_map_address_t	dst_addr,
5912	vm_map_copy_t		copy,
5913	boolean_t		interruptible,
5914	pmap_t			pmap,
5915	boolean_t		discard_on_success)
5916{
5917	vm_map_offset_t		dst_end;
5918	vm_map_entry_t		tmp_entry;
5919	vm_map_entry_t		entry;
5920	kern_return_t		kr;
5921	boolean_t		aligned = TRUE;
5922	boolean_t		contains_permanent_objects = FALSE;
5923	boolean_t		encountered_sub_map = FALSE;
5924	vm_map_offset_t		base_addr;
5925	vm_map_size_t		copy_size;
5926	vm_map_size_t		total_size;
5927
5928
5929	/*
5930	 *	Check for null copy object.
5931	 */
5932
5933	if (copy == VM_MAP_COPY_NULL)
5934		return(KERN_SUCCESS);
5935
5936	/*
5937	 *	Check for special kernel buffer allocated
5938	 *	by new_ipc_kmsg_copyin.
5939	 */
5940
5941	if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
5942		return(vm_map_copyout_kernel_buffer(
5943			       dst_map, &dst_addr,
5944			       copy, TRUE, discard_on_success));
5945	}
5946
5947	/*
5948	 *      Only works for entry lists at the moment.  Will
5949	 *	support page lists later.
5950	 */
5951
5952	assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
5953
5954	if (copy->size == 0) {
5955		if (discard_on_success)
5956			vm_map_copy_discard(copy);
5957		return(KERN_SUCCESS);
5958	}
5959
5960	/*
5961	 *	Verify that the destination is all writeable
5962	 *	initially.  We have to trunc the destination
5963	 *	address and round the copy size or we'll end up
5964	 *	splitting entries in strange ways.
5965	 */
5966
5967	if (!VM_MAP_PAGE_ALIGNED(copy->size,
5968				 VM_MAP_PAGE_MASK(dst_map)) ||
5969	    !VM_MAP_PAGE_ALIGNED(copy->offset,
5970				 VM_MAP_PAGE_MASK(dst_map)) ||
5971	    !VM_MAP_PAGE_ALIGNED(dst_addr,
5972				 VM_MAP_PAGE_MASK(dst_map)) ||
5973	    dst_map->hdr.page_shift != copy->cpy_hdr.page_shift)
5974	{
5975		aligned = FALSE;
5976		dst_end = vm_map_round_page(dst_addr + copy->size,
5977					    VM_MAP_PAGE_MASK(dst_map));
5978	} else {
5979		dst_end = dst_addr + copy->size;
5980	}
5981
5982	vm_map_lock(dst_map);
5983
5984	/* LP64todo - remove this check when vm_map_commpage64()
5985	 * no longer has to stuff in a map_entry for the commpage
5986	 * above the map's max_offset.
5987	 */
5988	if (dst_addr >= dst_map->max_offset) {
5989		vm_map_unlock(dst_map);
5990		return(KERN_INVALID_ADDRESS);
5991	}
5992
5993start_pass_1:
5994	if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5995		vm_map_unlock(dst_map);
5996		return(KERN_INVALID_ADDRESS);
5997	}
5998	vm_map_clip_start(dst_map,
5999			  tmp_entry,
6000			  vm_map_trunc_page(dst_addr,
6001					    VM_MAP_PAGE_MASK(dst_map)));
6002	for (entry = tmp_entry;;) {
6003		vm_map_entry_t	next = entry->vme_next;
6004
6005		while(entry->is_sub_map) {
6006			vm_map_offset_t	sub_start;
6007			vm_map_offset_t	sub_end;
6008			vm_map_offset_t	local_end;
6009
6010                	if (entry->in_transition) {
6011
6012				/*
6013				 * Say that we are waiting, and wait for entry.
6014				 */
6015                        	entry->needs_wakeup = TRUE;
6016                        	vm_map_entry_wait(dst_map, THREAD_UNINT);
6017
6018				goto start_pass_1;
6019			}
6020
6021			local_end = entry->vme_end;
6022		        if (!(entry->needs_copy)) {
6023				/* if needs_copy we are a COW submap */
6024				/* in such a case we just replace so */
6025				/* there is no need for the follow-  */
6026				/* ing check.                        */
6027				encountered_sub_map = TRUE;
6028				sub_start = entry->offset;
6029
6030				if(entry->vme_end < dst_end)
6031					sub_end = entry->vme_end;
6032				else
6033					sub_end = dst_end;
6034				sub_end -= entry->vme_start;
6035				sub_end += entry->offset;
6036				vm_map_unlock(dst_map);
6037
6038				kr = vm_map_overwrite_submap_recurse(
6039					entry->object.sub_map,
6040					sub_start,
6041					sub_end - sub_start);
6042				if(kr != KERN_SUCCESS)
6043					return kr;
6044				vm_map_lock(dst_map);
6045			}
6046
6047			if (dst_end <= entry->vme_end)
6048				goto start_overwrite;
6049			if(!vm_map_lookup_entry(dst_map, local_end,
6050						&entry)) {
6051				vm_map_unlock(dst_map);
6052				return(KERN_INVALID_ADDRESS);
6053			}
6054			next = entry->vme_next;
6055		}
6056
6057		if ( ! (entry->protection & VM_PROT_WRITE)) {
6058			vm_map_unlock(dst_map);
6059			return(KERN_PROTECTION_FAILURE);
6060		}
6061
6062		/*
6063		 *	If the entry is in transition, we must wait
6064		 *	for it to exit that state.  Anything could happen
6065		 *	when we unlock the map, so start over.
6066		 */
6067                if (entry->in_transition) {
6068
6069                        /*
6070                         * Say that we are waiting, and wait for entry.
6071                         */
6072                        entry->needs_wakeup = TRUE;
6073                        vm_map_entry_wait(dst_map, THREAD_UNINT);
6074
6075			goto start_pass_1;
6076		}
6077
6078/*
6079 *		our range is contained completely within this map entry
6080 */
6081		if (dst_end <= entry->vme_end)
6082			break;
6083/*
6084 *		check that range specified is contiguous region
6085 */
6086		if ((next == vm_map_to_entry(dst_map)) ||
6087		    (next->vme_start != entry->vme_end)) {
6088			vm_map_unlock(dst_map);
6089			return(KERN_INVALID_ADDRESS);
6090		}
6091
6092
6093		/*
6094		 *	Check for permanent objects in the destination.
6095		 */
6096		if ((entry->object.vm_object != VM_OBJECT_NULL) &&
6097		    ((!entry->object.vm_object->internal) ||
6098		     (entry->object.vm_object->true_share))) {
6099			contains_permanent_objects = TRUE;
6100		}
6101
6102		entry = next;
6103	}/* for */
6104
6105start_overwrite:
6106	/*
6107	 *	If there are permanent objects in the destination, then
6108	 *	the copy cannot be interrupted.
6109	 */
6110
6111	if (interruptible && contains_permanent_objects) {
6112		vm_map_unlock(dst_map);
6113		return(KERN_FAILURE);	/* XXX */
6114	}
6115
6116	/*
6117 	 *
6118	 *	Make a second pass, overwriting the data
6119	 *	At the beginning of each loop iteration,
6120	 *	the next entry to be overwritten is "tmp_entry"
6121	 *	(initially, the value returned from the lookup above),
6122	 *	and the starting address expected in that entry
6123	 *	is "start".
6124	 */
6125
6126	total_size = copy->size;
6127	if(encountered_sub_map) {
6128		copy_size = 0;
6129		/* re-calculate tmp_entry since we've had the map */
6130		/* unlocked */
6131		if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
6132			vm_map_unlock(dst_map);
6133			return(KERN_INVALID_ADDRESS);
6134		}
6135	} else {
6136		copy_size = copy->size;
6137	}
6138
6139	base_addr = dst_addr;
6140	while(TRUE) {
6141		/* deconstruct the copy object and do in parts */
6142		/* only in sub_map, interruptable case */
6143		vm_map_entry_t	copy_entry;
6144		vm_map_entry_t	previous_prev = VM_MAP_ENTRY_NULL;
6145		vm_map_entry_t	next_copy = VM_MAP_ENTRY_NULL;
6146		int		nentries;
6147		int		remaining_entries = 0;
6148		vm_map_offset_t	new_offset = 0;
6149
6150		for (entry = tmp_entry; copy_size == 0;) {
6151			vm_map_entry_t	next;
6152
6153			next = entry->vme_next;
6154
6155			/* tmp_entry and base address are moved along */
6156			/* each time we encounter a sub-map.  Otherwise */
6157			/* entry can outpase tmp_entry, and the copy_size */
6158			/* may reflect the distance between them */
6159			/* if the current entry is found to be in transition */
6160			/* we will start over at the beginning or the last */
6161			/* encounter of a submap as dictated by base_addr */
6162			/* we will zero copy_size accordingly. */
6163			if (entry->in_transition) {
6164                       		/*
6165                       		 * Say that we are waiting, and wait for entry.
6166                       		 */
6167                       		entry->needs_wakeup = TRUE;
6168                       		vm_map_entry_wait(dst_map, THREAD_UNINT);
6169
6170				if(!vm_map_lookup_entry(dst_map, base_addr,
6171							&tmp_entry)) {
6172					vm_map_unlock(dst_map);
6173					return(KERN_INVALID_ADDRESS);
6174				}
6175				copy_size = 0;
6176				entry = tmp_entry;
6177				continue;
6178			}
6179			if(entry->is_sub_map) {
6180				vm_map_offset_t	sub_start;
6181				vm_map_offset_t	sub_end;
6182				vm_map_offset_t	local_end;
6183
6184		        	if (entry->needs_copy) {
6185					/* if this is a COW submap */
6186					/* just back the range with a */
6187					/* anonymous entry */
6188					if(entry->vme_end < dst_end)
6189						sub_end = entry->vme_end;
6190					else
6191						sub_end = dst_end;
6192					if(entry->vme_start < base_addr)
6193						sub_start = base_addr;
6194					else
6195						sub_start = entry->vme_start;
6196					vm_map_clip_end(
6197						dst_map, entry, sub_end);
6198					vm_map_clip_start(
6199						dst_map, entry, sub_start);
6200					assert(!entry->use_pmap);
6201					entry->is_sub_map = FALSE;
6202					vm_map_deallocate(
6203						entry->object.sub_map);
6204					entry->object.sub_map = NULL;
6205					entry->is_shared = FALSE;
6206					entry->needs_copy = FALSE;
6207					entry->offset = 0;
6208					/*
6209					 * XXX FBDP
6210					 * We should propagate the protections
6211					 * of the submap entry here instead
6212					 * of forcing them to VM_PROT_ALL...
6213					 * Or better yet, we should inherit
6214					 * the protection of the copy_entry.
6215					 */
6216					entry->protection = VM_PROT_ALL;
6217					entry->max_protection = VM_PROT_ALL;
6218					entry->wired_count = 0;
6219					entry->user_wired_count = 0;
6220					if(entry->inheritance
6221					   == VM_INHERIT_SHARE)
6222						entry->inheritance = VM_INHERIT_COPY;
6223					continue;
6224				}
6225				/* first take care of any non-sub_map */
6226				/* entries to send */
6227				if(base_addr < entry->vme_start) {
6228					/* stuff to send */
6229					copy_size =
6230						entry->vme_start - base_addr;
6231					break;
6232				}
6233				sub_start = entry->offset;
6234
6235				if(entry->vme_end < dst_end)
6236					sub_end = entry->vme_end;
6237				else
6238					sub_end = dst_end;
6239				sub_end -= entry->vme_start;
6240				sub_end += entry->offset;
6241				local_end = entry->vme_end;
6242				vm_map_unlock(dst_map);
6243				copy_size = sub_end - sub_start;
6244
6245				/* adjust the copy object */
6246				if (total_size > copy_size) {
6247					vm_map_size_t	local_size = 0;
6248					vm_map_size_t	entry_size;
6249
6250					nentries = 1;
6251					new_offset = copy->offset;
6252					copy_entry = vm_map_copy_first_entry(copy);
6253					while(copy_entry !=
6254					      vm_map_copy_to_entry(copy)){
6255						entry_size = copy_entry->vme_end -
6256							copy_entry->vme_start;
6257						if((local_size < copy_size) &&
6258						   ((local_size + entry_size)
6259						    >= copy_size)) {
6260							vm_map_copy_clip_end(copy,
6261									     copy_entry,
6262									     copy_entry->vme_start +
6263									     (copy_size - local_size));
6264							entry_size = copy_entry->vme_end -
6265								copy_entry->vme_start;
6266							local_size += entry_size;
6267							new_offset += entry_size;
6268						}
6269						if(local_size >= copy_size) {
6270							next_copy = copy_entry->vme_next;
6271							copy_entry->vme_next =
6272								vm_map_copy_to_entry(copy);
6273							previous_prev =
6274								copy->cpy_hdr.links.prev;
6275							copy->cpy_hdr.links.prev = copy_entry;
6276							copy->size = copy_size;
6277							remaining_entries =
6278								copy->cpy_hdr.nentries;
6279							remaining_entries -= nentries;
6280							copy->cpy_hdr.nentries = nentries;
6281							break;
6282						} else {
6283							local_size += entry_size;
6284							new_offset += entry_size;
6285							nentries++;
6286						}
6287						copy_entry = copy_entry->vme_next;
6288					}
6289				}
6290
6291				if((entry->use_pmap) && (pmap == NULL)) {
6292					kr = vm_map_copy_overwrite_nested(
6293						entry->object.sub_map,
6294						sub_start,
6295						copy,
6296						interruptible,
6297						entry->object.sub_map->pmap,
6298						TRUE);
6299				} else if (pmap != NULL) {
6300					kr = vm_map_copy_overwrite_nested(
6301						entry->object.sub_map,
6302						sub_start,
6303						copy,
6304						interruptible, pmap,
6305						TRUE);
6306				} else {
6307					kr = vm_map_copy_overwrite_nested(
6308						entry->object.sub_map,
6309						sub_start,
6310						copy,
6311						interruptible,
6312						dst_map->pmap,
6313						TRUE);
6314				}
6315				if(kr != KERN_SUCCESS) {
6316					if(next_copy != NULL) {
6317						copy->cpy_hdr.nentries +=
6318							remaining_entries;
6319						copy->cpy_hdr.links.prev->vme_next =
6320							next_copy;
6321						copy->cpy_hdr.links.prev
6322							= previous_prev;
6323						copy->size = total_size;
6324					}
6325					return kr;
6326				}
6327				if (dst_end <= local_end) {
6328					return(KERN_SUCCESS);
6329				}
6330				/* otherwise copy no longer exists, it was */
6331				/* destroyed after successful copy_overwrite */
6332			        copy = (vm_map_copy_t)
6333					zalloc(vm_map_copy_zone);
6334				vm_map_copy_first_entry(copy) =
6335					vm_map_copy_last_entry(copy) =
6336					vm_map_copy_to_entry(copy);
6337				copy->type = VM_MAP_COPY_ENTRY_LIST;
6338				copy->offset = new_offset;
6339
6340				/*
6341				 * XXX FBDP
6342				 * this does not seem to deal with
6343				 * the VM map store (R&B tree)
6344				 */
6345
6346				total_size -= copy_size;
6347				copy_size = 0;
6348				/* put back remainder of copy in container */
6349				if(next_copy != NULL) {
6350					copy->cpy_hdr.nentries = remaining_entries;
6351					copy->cpy_hdr.links.next = next_copy;
6352					copy->cpy_hdr.links.prev = previous_prev;
6353					copy->size = total_size;
6354					next_copy->vme_prev =
6355						vm_map_copy_to_entry(copy);
6356					next_copy = NULL;
6357				}
6358				base_addr = local_end;
6359				vm_map_lock(dst_map);
6360				if(!vm_map_lookup_entry(dst_map,
6361							local_end, &tmp_entry)) {
6362					vm_map_unlock(dst_map);
6363					return(KERN_INVALID_ADDRESS);
6364				}
6365				entry = tmp_entry;
6366				continue;
6367			}
6368			if (dst_end <= entry->vme_end) {
6369				copy_size = dst_end - base_addr;
6370				break;
6371			}
6372
6373			if ((next == vm_map_to_entry(dst_map)) ||
6374			    (next->vme_start != entry->vme_end)) {
6375				vm_map_unlock(dst_map);
6376				return(KERN_INVALID_ADDRESS);
6377			}
6378
6379			entry = next;
6380		}/* for */
6381
6382		next_copy = NULL;
6383		nentries = 1;
6384
6385		/* adjust the copy object */
6386		if (total_size > copy_size) {
6387			vm_map_size_t	local_size = 0;
6388			vm_map_size_t	entry_size;
6389
6390			new_offset = copy->offset;
6391			copy_entry = vm_map_copy_first_entry(copy);
6392			while(copy_entry != vm_map_copy_to_entry(copy)) {
6393				entry_size = copy_entry->vme_end -
6394					copy_entry->vme_start;
6395				if((local_size < copy_size) &&
6396				   ((local_size + entry_size)
6397				    >= copy_size)) {
6398					vm_map_copy_clip_end(copy, copy_entry,
6399							     copy_entry->vme_start +
6400							     (copy_size - local_size));
6401					entry_size = copy_entry->vme_end -
6402						copy_entry->vme_start;
6403					local_size += entry_size;
6404					new_offset += entry_size;
6405				}
6406				if(local_size >= copy_size) {
6407					next_copy = copy_entry->vme_next;
6408					copy_entry->vme_next =
6409						vm_map_copy_to_entry(copy);
6410					previous_prev =
6411						copy->cpy_hdr.links.prev;
6412					copy->cpy_hdr.links.prev = copy_entry;
6413					copy->size = copy_size;
6414					remaining_entries =
6415						copy->cpy_hdr.nentries;
6416					remaining_entries -= nentries;
6417					copy->cpy_hdr.nentries = nentries;
6418					break;
6419				} else {
6420					local_size += entry_size;
6421					new_offset += entry_size;
6422					nentries++;
6423				}
6424				copy_entry = copy_entry->vme_next;
6425			}
6426		}
6427
6428		if (aligned) {
6429			pmap_t	local_pmap;
6430
6431			if(pmap)
6432				local_pmap = pmap;
6433			else
6434				local_pmap = dst_map->pmap;
6435
6436			if ((kr =  vm_map_copy_overwrite_aligned(
6437				     dst_map, tmp_entry, copy,
6438				     base_addr, local_pmap)) != KERN_SUCCESS) {
6439				if(next_copy != NULL) {
6440					copy->cpy_hdr.nentries +=
6441						remaining_entries;
6442				        copy->cpy_hdr.links.prev->vme_next =
6443						next_copy;
6444			       		copy->cpy_hdr.links.prev =
6445						previous_prev;
6446					copy->size += copy_size;
6447				}
6448				return kr;
6449			}
6450			vm_map_unlock(dst_map);
6451		} else {
6452			/*
6453			 * Performance gain:
6454			 *
6455			 * if the copy and dst address are misaligned but the same
6456			 * offset within the page we can copy_not_aligned the
6457			 * misaligned parts and copy aligned the rest.  If they are
6458			 * aligned but len is unaligned we simply need to copy
6459			 * the end bit unaligned.  We'll need to split the misaligned
6460			 * bits of the region in this case !
6461			 */
6462			/* ALWAYS UNLOCKS THE dst_map MAP */
6463			kr = vm_map_copy_overwrite_unaligned(
6464				dst_map,
6465				tmp_entry,
6466				copy,
6467				base_addr,
6468				discard_on_success);
6469			if (kr != KERN_SUCCESS) {
6470				if(next_copy != NULL) {
6471					copy->cpy_hdr.nentries +=
6472						remaining_entries;
6473			       		copy->cpy_hdr.links.prev->vme_next =
6474						next_copy;
6475			       		copy->cpy_hdr.links.prev =
6476						previous_prev;
6477					copy->size += copy_size;
6478				}
6479				return kr;
6480			}
6481		}
6482		total_size -= copy_size;
6483		if(total_size == 0)
6484			break;
6485		base_addr += copy_size;
6486		copy_size = 0;
6487		copy->offset = new_offset;
6488		if(next_copy != NULL) {
6489			copy->cpy_hdr.nentries = remaining_entries;
6490			copy->cpy_hdr.links.next = next_copy;
6491			copy->cpy_hdr.links.prev = previous_prev;
6492			next_copy->vme_prev = vm_map_copy_to_entry(copy);
6493			copy->size = total_size;
6494		}
6495		vm_map_lock(dst_map);
6496		while(TRUE) {
6497			if (!vm_map_lookup_entry(dst_map,
6498						 base_addr, &tmp_entry)) {
6499				vm_map_unlock(dst_map);
6500				return(KERN_INVALID_ADDRESS);
6501			}
6502                	if (tmp_entry->in_transition) {
6503                       		entry->needs_wakeup = TRUE;
6504                       		vm_map_entry_wait(dst_map, THREAD_UNINT);
6505			} else {
6506				break;
6507			}
6508		}
6509		vm_map_clip_start(dst_map,
6510				  tmp_entry,
6511				  vm_map_trunc_page(base_addr,
6512						    VM_MAP_PAGE_MASK(dst_map)));
6513
6514		entry = tmp_entry;
6515	} /* while */
6516
6517	/*
6518	 *	Throw away the vm_map_copy object
6519	 */
6520	if (discard_on_success)
6521		vm_map_copy_discard(copy);
6522
6523	return(KERN_SUCCESS);
6524}/* vm_map_copy_overwrite */
6525
6526kern_return_t
6527vm_map_copy_overwrite(
6528	vm_map_t	dst_map,
6529	vm_map_offset_t	dst_addr,
6530	vm_map_copy_t	copy,
6531	boolean_t	interruptible)
6532{
6533	vm_map_size_t	head_size, tail_size;
6534	vm_map_copy_t	head_copy, tail_copy;
6535	vm_map_offset_t	head_addr, tail_addr;
6536	vm_map_entry_t	entry;
6537	kern_return_t	kr;
6538
6539	head_size = 0;
6540	tail_size = 0;
6541	head_copy = NULL;
6542	tail_copy = NULL;
6543	head_addr = 0;
6544	tail_addr = 0;
6545
6546	if (interruptible ||
6547	    copy == VM_MAP_COPY_NULL ||
6548	    copy->type != VM_MAP_COPY_ENTRY_LIST) {
6549		/*
6550		 * We can't split the "copy" map if we're interruptible
6551		 * or if we don't have a "copy" map...
6552		 */
6553	blunt_copy:
6554		return vm_map_copy_overwrite_nested(dst_map,
6555						    dst_addr,
6556						    copy,
6557						    interruptible,
6558						    (pmap_t) NULL,
6559						    TRUE);
6560	}
6561
6562	if (copy->size < 3 * PAGE_SIZE) {
6563		/*
6564		 * Too small to bother with optimizing...
6565		 */
6566		goto blunt_copy;
6567	}
6568
6569	if ((dst_addr & VM_MAP_PAGE_MASK(dst_map)) !=
6570	    (copy->offset & VM_MAP_PAGE_MASK(dst_map))) {
6571		/*
6572		 * Incompatible mis-alignment of source and destination...
6573		 */
6574		goto blunt_copy;
6575	}
6576
6577	/*
6578	 * Proper alignment or identical mis-alignment at the beginning.
6579	 * Let's try and do a small unaligned copy first (if needed)
6580	 * and then an aligned copy for the rest.
6581	 */
6582	if (!page_aligned(dst_addr)) {
6583		head_addr = dst_addr;
6584		head_size = (VM_MAP_PAGE_SIZE(dst_map) -
6585			     (copy->offset & VM_MAP_PAGE_MASK(dst_map)));
6586	}
6587	if (!page_aligned(copy->offset + copy->size)) {
6588		/*
6589		 * Mis-alignment at the end.
6590		 * Do an aligned copy up to the last page and
6591		 * then an unaligned copy for the remaining bytes.
6592		 */
6593		tail_size = ((copy->offset + copy->size) &
6594			     VM_MAP_PAGE_MASK(dst_map));
6595		tail_addr = dst_addr + copy->size - tail_size;
6596	}
6597
6598	if (head_size + tail_size == copy->size) {
6599		/*
6600		 * It's all unaligned, no optimization possible...
6601		 */
6602		goto blunt_copy;
6603	}
6604
6605	/*
6606	 * Can't optimize if there are any submaps in the
6607	 * destination due to the way we free the "copy" map
6608	 * progressively in vm_map_copy_overwrite_nested()
6609	 * in that case.
6610	 */
6611	vm_map_lock_read(dst_map);
6612	if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
6613		vm_map_unlock_read(dst_map);
6614		goto blunt_copy;
6615	}
6616	for (;
6617	     (entry != vm_map_copy_to_entry(copy) &&
6618	      entry->vme_start < dst_addr + copy->size);
6619	     entry = entry->vme_next) {
6620		if (entry->is_sub_map) {
6621			vm_map_unlock_read(dst_map);
6622			goto blunt_copy;
6623		}
6624	}
6625	vm_map_unlock_read(dst_map);
6626
6627	if (head_size) {
6628		/*
6629		 * Unaligned copy of the first "head_size" bytes, to reach
6630		 * a page boundary.
6631		 */
6632
6633		/*
6634		 * Extract "head_copy" out of "copy".
6635		 */
6636		head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6637		vm_map_copy_first_entry(head_copy) =
6638			vm_map_copy_to_entry(head_copy);
6639		vm_map_copy_last_entry(head_copy) =
6640			vm_map_copy_to_entry(head_copy);
6641		head_copy->type = VM_MAP_COPY_ENTRY_LIST;
6642		head_copy->cpy_hdr.nentries = 0;
6643		head_copy->cpy_hdr.entries_pageable =
6644			copy->cpy_hdr.entries_pageable;
6645		vm_map_store_init(&head_copy->cpy_hdr);
6646
6647		head_copy->offset = copy->offset;
6648		head_copy->size = head_size;
6649
6650		copy->offset += head_size;
6651		copy->size -= head_size;
6652
6653		entry = vm_map_copy_first_entry(copy);
6654		vm_map_copy_clip_end(copy, entry, copy->offset);
6655		vm_map_copy_entry_unlink(copy, entry);
6656		vm_map_copy_entry_link(head_copy,
6657				       vm_map_copy_to_entry(head_copy),
6658				       entry);
6659
6660		/*
6661		 * Do the unaligned copy.
6662		 */
6663		kr = vm_map_copy_overwrite_nested(dst_map,
6664						  head_addr,
6665						  head_copy,
6666						  interruptible,
6667						  (pmap_t) NULL,
6668						  FALSE);
6669		if (kr != KERN_SUCCESS)
6670			goto done;
6671	}
6672
6673	if (tail_size) {
6674		/*
6675		 * Extract "tail_copy" out of "copy".
6676		 */
6677		tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6678		vm_map_copy_first_entry(tail_copy) =
6679			vm_map_copy_to_entry(tail_copy);
6680		vm_map_copy_last_entry(tail_copy) =
6681			vm_map_copy_to_entry(tail_copy);
6682		tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
6683		tail_copy->cpy_hdr.nentries = 0;
6684		tail_copy->cpy_hdr.entries_pageable =
6685			copy->cpy_hdr.entries_pageable;
6686		vm_map_store_init(&tail_copy->cpy_hdr);
6687
6688		tail_copy->offset = copy->offset + copy->size - tail_size;
6689		tail_copy->size = tail_size;
6690
6691		copy->size -= tail_size;
6692
6693		entry = vm_map_copy_last_entry(copy);
6694		vm_map_copy_clip_start(copy, entry, tail_copy->offset);
6695		entry = vm_map_copy_last_entry(copy);
6696		vm_map_copy_entry_unlink(copy, entry);
6697		vm_map_copy_entry_link(tail_copy,
6698				       vm_map_copy_last_entry(tail_copy),
6699				       entry);
6700	}
6701
6702	/*
6703	 * Copy most (or possibly all) of the data.
6704	 */
6705	kr = vm_map_copy_overwrite_nested(dst_map,
6706					  dst_addr + head_size,
6707					  copy,
6708					  interruptible,
6709					  (pmap_t) NULL,
6710					  FALSE);
6711	if (kr != KERN_SUCCESS) {
6712		goto done;
6713	}
6714
6715	if (tail_size) {
6716		kr = vm_map_copy_overwrite_nested(dst_map,
6717						  tail_addr,
6718						  tail_copy,
6719						  interruptible,
6720						  (pmap_t) NULL,
6721						  FALSE);
6722	}
6723
6724done:
6725	assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
6726	if (kr == KERN_SUCCESS) {
6727		/*
6728		 * Discard all the copy maps.
6729		 */
6730		if (head_copy) {
6731			vm_map_copy_discard(head_copy);
6732			head_copy = NULL;
6733		}
6734		vm_map_copy_discard(copy);
6735		if (tail_copy) {
6736			vm_map_copy_discard(tail_copy);
6737			tail_copy = NULL;
6738		}
6739	} else {
6740		/*
6741		 * Re-assemble the original copy map.
6742		 */
6743		if (head_copy) {
6744			entry = vm_map_copy_first_entry(head_copy);
6745			vm_map_copy_entry_unlink(head_copy, entry);
6746			vm_map_copy_entry_link(copy,
6747					       vm_map_copy_to_entry(copy),
6748					       entry);
6749			copy->offset -= head_size;
6750			copy->size += head_size;
6751			vm_map_copy_discard(head_copy);
6752			head_copy = NULL;
6753		}
6754		if (tail_copy) {
6755			entry = vm_map_copy_last_entry(tail_copy);
6756			vm_map_copy_entry_unlink(tail_copy, entry);
6757			vm_map_copy_entry_link(copy,
6758					       vm_map_copy_last_entry(copy),
6759					       entry);
6760			copy->size += tail_size;
6761			vm_map_copy_discard(tail_copy);
6762			tail_copy = NULL;
6763		}
6764	}
6765	return kr;
6766}
6767
6768
6769/*
6770 *	Routine: vm_map_copy_overwrite_unaligned	[internal use only]
6771 *
6772 *	Decription:
6773 *	Physically copy unaligned data
6774 *
6775 *	Implementation:
6776 *	Unaligned parts of pages have to be physically copied.  We use
6777 *	a modified form of vm_fault_copy (which understands none-aligned
6778 *	page offsets and sizes) to do the copy.  We attempt to copy as
6779 *	much memory in one go as possibly, however vm_fault_copy copies
6780 *	within 1 memory object so we have to find the smaller of "amount left"
6781 *	"source object data size" and "target object data size".  With
6782 *	unaligned data we don't need to split regions, therefore the source
6783 *	(copy) object should be one map entry, the target range may be split
6784 *	over multiple map entries however.  In any event we are pessimistic
6785 *	about these assumptions.
6786 *
6787 *	Assumptions:
6788 *	dst_map is locked on entry and is return locked on success,
6789 *	unlocked on error.
6790 */
6791
6792static kern_return_t
6793vm_map_copy_overwrite_unaligned(
6794	vm_map_t	dst_map,
6795	vm_map_entry_t	entry,
6796	vm_map_copy_t	copy,
6797	vm_map_offset_t	start,
6798	boolean_t	discard_on_success)
6799{
6800	vm_map_entry_t		copy_entry;
6801	vm_map_entry_t		copy_entry_next;
6802	vm_map_version_t	version;
6803	vm_object_t		dst_object;
6804	vm_object_offset_t	dst_offset;
6805	vm_object_offset_t	src_offset;
6806	vm_object_offset_t	entry_offset;
6807	vm_map_offset_t		entry_end;
6808	vm_map_size_t		src_size,
6809				dst_size,
6810				copy_size,
6811				amount_left;
6812	kern_return_t		kr = KERN_SUCCESS;
6813
6814
6815	copy_entry = vm_map_copy_first_entry(copy);
6816
6817	vm_map_lock_write_to_read(dst_map);
6818
6819	src_offset = copy->offset - vm_object_trunc_page(copy->offset);
6820	amount_left = copy->size;
6821/*
6822 *	unaligned so we never clipped this entry, we need the offset into
6823 *	the vm_object not just the data.
6824 */
6825	while (amount_left > 0) {
6826
6827		if (entry == vm_map_to_entry(dst_map)) {
6828			vm_map_unlock_read(dst_map);
6829			return KERN_INVALID_ADDRESS;
6830		}
6831
6832		/* "start" must be within the current map entry */
6833		assert ((start>=entry->vme_start) && (start<entry->vme_end));
6834
6835		dst_offset = start - entry->vme_start;
6836
6837		dst_size = entry->vme_end - start;
6838
6839		src_size = copy_entry->vme_end -
6840			(copy_entry->vme_start + src_offset);
6841
6842		if (dst_size < src_size) {
6843/*
6844 *			we can only copy dst_size bytes before
6845 *			we have to get the next destination entry
6846 */
6847			copy_size = dst_size;
6848		} else {
6849/*
6850 *			we can only copy src_size bytes before
6851 *			we have to get the next source copy entry
6852 */
6853			copy_size = src_size;
6854		}
6855
6856		if (copy_size > amount_left) {
6857			copy_size = amount_left;
6858		}
6859/*
6860 *		Entry needs copy, create a shadow shadow object for
6861 *		Copy on write region.
6862 */
6863		if (entry->needs_copy &&
6864		    ((entry->protection & VM_PROT_WRITE) != 0))
6865		{
6866			if (vm_map_lock_read_to_write(dst_map)) {
6867				vm_map_lock_read(dst_map);
6868				goto RetryLookup;
6869			}
6870			vm_object_shadow(&entry->object.vm_object,
6871					 &entry->offset,
6872					 (vm_map_size_t)(entry->vme_end
6873							 - entry->vme_start));
6874			entry->needs_copy = FALSE;
6875			vm_map_lock_write_to_read(dst_map);
6876		}
6877		dst_object = entry->object.vm_object;
6878/*
6879 *		unlike with the virtual (aligned) copy we're going
6880 *		to fault on it therefore we need a target object.
6881 */
6882                if (dst_object == VM_OBJECT_NULL) {
6883			if (vm_map_lock_read_to_write(dst_map)) {
6884				vm_map_lock_read(dst_map);
6885				goto RetryLookup;
6886			}
6887			dst_object = vm_object_allocate((vm_map_size_t)
6888							entry->vme_end - entry->vme_start);
6889			entry->object.vm_object = dst_object;
6890			entry->offset = 0;
6891			vm_map_lock_write_to_read(dst_map);
6892		}
6893/*
6894 *		Take an object reference and unlock map. The "entry" may
6895 *		disappear or change when the map is unlocked.
6896 */
6897		vm_object_reference(dst_object);
6898		version.main_timestamp = dst_map->timestamp;
6899		entry_offset = entry->offset;
6900		entry_end = entry->vme_end;
6901		vm_map_unlock_read(dst_map);
6902/*
6903 *		Copy as much as possible in one pass
6904 */
6905		kr = vm_fault_copy(
6906			copy_entry->object.vm_object,
6907			copy_entry->offset + src_offset,
6908			&copy_size,
6909			dst_object,
6910			entry_offset + dst_offset,
6911			dst_map,
6912			&version,
6913			THREAD_UNINT );
6914
6915		start += copy_size;
6916		src_offset += copy_size;
6917		amount_left -= copy_size;
6918/*
6919 *		Release the object reference
6920 */
6921		vm_object_deallocate(dst_object);
6922/*
6923 *		If a hard error occurred, return it now
6924 */
6925		if (kr != KERN_SUCCESS)
6926			return kr;
6927
6928		if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
6929		    || amount_left == 0)
6930		{
6931/*
6932 *			all done with this copy entry, dispose.
6933 */
6934			copy_entry_next = copy_entry->vme_next;
6935
6936			if (discard_on_success) {
6937				vm_map_copy_entry_unlink(copy, copy_entry);
6938				assert(!copy_entry->is_sub_map);
6939				vm_object_deallocate(
6940					copy_entry->object.vm_object);
6941				vm_map_copy_entry_dispose(copy, copy_entry);
6942			}
6943
6944			if (copy_entry_next == vm_map_copy_to_entry(copy) &&
6945			    amount_left) {
6946/*
6947 *				not finished copying but run out of source
6948 */
6949				return KERN_INVALID_ADDRESS;
6950			}
6951
6952			copy_entry = copy_entry_next;
6953
6954			src_offset = 0;
6955		}
6956
6957		if (amount_left == 0)
6958			return KERN_SUCCESS;
6959
6960		vm_map_lock_read(dst_map);
6961		if (version.main_timestamp == dst_map->timestamp) {
6962			if (start == entry_end) {
6963/*
6964 *				destination region is split.  Use the version
6965 *				information to avoid a lookup in the normal
6966 *				case.
6967 */
6968				entry = entry->vme_next;
6969/*
6970 *				should be contiguous. Fail if we encounter
6971 *				a hole in the destination.
6972 */
6973				if (start != entry->vme_start) {
6974					vm_map_unlock_read(dst_map);
6975					return KERN_INVALID_ADDRESS ;
6976				}
6977			}
6978		} else {
6979/*
6980 *			Map version check failed.
6981 *			we must lookup the entry because somebody
6982 *			might have changed the map behind our backs.
6983 */
6984		RetryLookup:
6985			if (!vm_map_lookup_entry(dst_map, start, &entry))
6986			{
6987				vm_map_unlock_read(dst_map);
6988				return KERN_INVALID_ADDRESS ;
6989			}
6990		}
6991	}/* while */
6992
6993	return KERN_SUCCESS;
6994}/* vm_map_copy_overwrite_unaligned */
6995
6996/*
6997 *	Routine: vm_map_copy_overwrite_aligned	[internal use only]
6998 *
6999 *	Description:
7000 *	Does all the vm_trickery possible for whole pages.
7001 *
7002 *	Implementation:
7003 *
7004 *	If there are no permanent objects in the destination,
7005 *	and the source and destination map entry zones match,
7006 *	and the destination map entry is not shared,
7007 *	then the map entries can be deleted and replaced
7008 *	with those from the copy.  The following code is the
7009 *	basic idea of what to do, but there are lots of annoying
7010 *	little details about getting protection and inheritance
7011 *	right.  Should add protection, inheritance, and sharing checks
7012 *	to the above pass and make sure that no wiring is involved.
7013 */
7014
7015int vm_map_copy_overwrite_aligned_src_not_internal = 0;
7016int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
7017int vm_map_copy_overwrite_aligned_src_large = 0;
7018
7019static kern_return_t
7020vm_map_copy_overwrite_aligned(
7021	vm_map_t	dst_map,
7022	vm_map_entry_t	tmp_entry,
7023	vm_map_copy_t	copy,
7024	vm_map_offset_t	start,
7025	__unused pmap_t	pmap)
7026{
7027	vm_object_t	object;
7028	vm_map_entry_t	copy_entry;
7029	vm_map_size_t	copy_size;
7030	vm_map_size_t	size;
7031	vm_map_entry_t	entry;
7032
7033	while ((copy_entry = vm_map_copy_first_entry(copy))
7034	       != vm_map_copy_to_entry(copy))
7035	{
7036		copy_size = (copy_entry->vme_end - copy_entry->vme_start);
7037
7038		entry = tmp_entry;
7039		assert(!entry->use_pmap); /* unnested when clipped earlier */
7040		if (entry == vm_map_to_entry(dst_map)) {
7041			vm_map_unlock(dst_map);
7042			return KERN_INVALID_ADDRESS;
7043		}
7044		size = (entry->vme_end - entry->vme_start);
7045		/*
7046		 *	Make sure that no holes popped up in the
7047		 *	address map, and that the protection is
7048		 *	still valid, in case the map was unlocked
7049		 *	earlier.
7050		 */
7051
7052		if ((entry->vme_start != start) || ((entry->is_sub_map)
7053						    && !entry->needs_copy)) {
7054			vm_map_unlock(dst_map);
7055			return(KERN_INVALID_ADDRESS);
7056		}
7057		assert(entry != vm_map_to_entry(dst_map));
7058
7059		/*
7060		 *	Check protection again
7061		 */
7062
7063		if ( ! (entry->protection & VM_PROT_WRITE)) {
7064			vm_map_unlock(dst_map);
7065			return(KERN_PROTECTION_FAILURE);
7066		}
7067
7068		/*
7069		 *	Adjust to source size first
7070		 */
7071
7072		if (copy_size < size) {
7073			vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
7074			size = copy_size;
7075		}
7076
7077		/*
7078		 *	Adjust to destination size
7079		 */
7080
7081		if (size < copy_size) {
7082			vm_map_copy_clip_end(copy, copy_entry,
7083					     copy_entry->vme_start + size);
7084			copy_size = size;
7085		}
7086
7087		assert((entry->vme_end - entry->vme_start) == size);
7088		assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
7089		assert((copy_entry->vme_end - copy_entry->vme_start) == size);
7090
7091		/*
7092		 *	If the destination contains temporary unshared memory,
7093		 *	we can perform the copy by throwing it away and
7094		 *	installing the source data.
7095		 */
7096
7097		object = entry->object.vm_object;
7098		if ((!entry->is_shared &&
7099		     ((object == VM_OBJECT_NULL) ||
7100		      (object->internal && !object->true_share))) ||
7101		    entry->needs_copy) {
7102			vm_object_t	old_object = entry->object.vm_object;
7103			vm_object_offset_t	old_offset = entry->offset;
7104			vm_object_offset_t	offset;
7105
7106			/*
7107			 * Ensure that the source and destination aren't
7108			 * identical
7109			 */
7110			if (old_object == copy_entry->object.vm_object &&
7111			    old_offset == copy_entry->offset) {
7112				vm_map_copy_entry_unlink(copy, copy_entry);
7113				vm_map_copy_entry_dispose(copy, copy_entry);
7114
7115				if (old_object != VM_OBJECT_NULL)
7116					vm_object_deallocate(old_object);
7117
7118				start = tmp_entry->vme_end;
7119				tmp_entry = tmp_entry->vme_next;
7120				continue;
7121			}
7122
7123#define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024)	/* 64 MB */
7124#define __TRADEOFF1_COPY_SIZE (128 * 1024)	/* 128 KB */
7125			if (copy_entry->object.vm_object != VM_OBJECT_NULL &&
7126			    copy_entry->object.vm_object->vo_size >= __TRADEOFF1_OBJ_SIZE &&
7127			    copy_size <= __TRADEOFF1_COPY_SIZE) {
7128				/*
7129				 * Virtual vs. Physical copy tradeoff #1.
7130				 *
7131				 * Copying only a few pages out of a large
7132				 * object:  do a physical copy instead of
7133				 * a virtual copy, to avoid possibly keeping
7134				 * the entire large object alive because of
7135				 * those few copy-on-write pages.
7136				 */
7137				vm_map_copy_overwrite_aligned_src_large++;
7138				goto slow_copy;
7139			}
7140
7141			if (entry->alias >= VM_MEMORY_MALLOC &&
7142			    entry->alias <= VM_MEMORY_MALLOC_LARGE_REUSED) {
7143				vm_object_t new_object, new_shadow;
7144
7145				/*
7146				 * We're about to map something over a mapping
7147				 * established by malloc()...
7148				 */
7149				new_object = copy_entry->object.vm_object;
7150				if (new_object != VM_OBJECT_NULL) {
7151					vm_object_lock_shared(new_object);
7152				}
7153				while (new_object != VM_OBJECT_NULL &&
7154				       !new_object->true_share &&
7155				       new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
7156				       new_object->internal) {
7157					new_shadow = new_object->shadow;
7158					if (new_shadow == VM_OBJECT_NULL) {
7159						break;
7160					}
7161					vm_object_lock_shared(new_shadow);
7162					vm_object_unlock(new_object);
7163					new_object = new_shadow;
7164				}
7165				if (new_object != VM_OBJECT_NULL) {
7166					if (!new_object->internal) {
7167						/*
7168						 * The new mapping is backed
7169						 * by an external object.  We
7170						 * don't want malloc'ed memory
7171						 * to be replaced with such a
7172						 * non-anonymous mapping, so
7173						 * let's go off the optimized
7174						 * path...
7175						 */
7176						vm_map_copy_overwrite_aligned_src_not_internal++;
7177						vm_object_unlock(new_object);
7178						goto slow_copy;
7179					}
7180					if (new_object->true_share ||
7181					    new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
7182						/*
7183						 * Same if there's a "true_share"
7184						 * object in the shadow chain, or
7185						 * an object with a non-default
7186						 * (SYMMETRIC) copy strategy.
7187						 */
7188						vm_map_copy_overwrite_aligned_src_not_symmetric++;
7189						vm_object_unlock(new_object);
7190						goto slow_copy;
7191					}
7192					vm_object_unlock(new_object);
7193				}
7194				/*
7195				 * The new mapping is still backed by
7196				 * anonymous (internal) memory, so it's
7197				 * OK to substitute it for the original
7198				 * malloc() mapping.
7199				 */
7200			}
7201
7202			if (old_object != VM_OBJECT_NULL) {
7203				if(entry->is_sub_map) {
7204					if(entry->use_pmap) {
7205#ifndef NO_NESTED_PMAP
7206						pmap_unnest(dst_map->pmap,
7207							    (addr64_t)entry->vme_start,
7208							    entry->vme_end - entry->vme_start);
7209#endif	/* NO_NESTED_PMAP */
7210						if(dst_map->mapped_in_other_pmaps) {
7211							/* clean up parent */
7212							/* map/maps */
7213							vm_map_submap_pmap_clean(
7214								dst_map, entry->vme_start,
7215								entry->vme_end,
7216								entry->object.sub_map,
7217								entry->offset);
7218						}
7219					} else {
7220						vm_map_submap_pmap_clean(
7221							dst_map, entry->vme_start,
7222							entry->vme_end,
7223							entry->object.sub_map,
7224							entry->offset);
7225					}
7226				   	vm_map_deallocate(
7227						entry->object.sub_map);
7228			   	} else {
7229					if(dst_map->mapped_in_other_pmaps) {
7230						vm_object_pmap_protect_options(
7231							entry->object.vm_object,
7232							entry->offset,
7233							entry->vme_end
7234							- entry->vme_start,
7235							PMAP_NULL,
7236							entry->vme_start,
7237							VM_PROT_NONE,
7238							PMAP_OPTIONS_REMOVE);
7239					} else {
7240						pmap_remove_options(
7241							dst_map->pmap,
7242							(addr64_t)(entry->vme_start),
7243							(addr64_t)(entry->vme_end),
7244							PMAP_OPTIONS_REMOVE);
7245					}
7246					vm_object_deallocate(old_object);
7247			   	}
7248			}
7249
7250			entry->is_sub_map = FALSE;
7251			entry->object = copy_entry->object;
7252			object = entry->object.vm_object;
7253			entry->needs_copy = copy_entry->needs_copy;
7254			entry->wired_count = 0;
7255			entry->user_wired_count = 0;
7256			offset = entry->offset = copy_entry->offset;
7257
7258			vm_map_copy_entry_unlink(copy, copy_entry);
7259			vm_map_copy_entry_dispose(copy, copy_entry);
7260
7261			/*
7262			 * we could try to push pages into the pmap at this point, BUT
7263			 * this optimization only saved on average 2 us per page if ALL
7264			 * the pages in the source were currently mapped
7265			 * and ALL the pages in the dest were touched, if there were fewer
7266			 * than 2/3 of the pages touched, this optimization actually cost more cycles
7267			 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
7268			 */
7269
7270			/*
7271			 *	Set up for the next iteration.  The map
7272			 *	has not been unlocked, so the next
7273			 *	address should be at the end of this
7274			 *	entry, and the next map entry should be
7275			 *	the one following it.
7276			 */
7277
7278			start = tmp_entry->vme_end;
7279			tmp_entry = tmp_entry->vme_next;
7280		} else {
7281			vm_map_version_t	version;
7282			vm_object_t		dst_object;
7283			vm_object_offset_t	dst_offset;
7284			kern_return_t		r;
7285
7286		slow_copy:
7287			if (entry->needs_copy) {
7288				vm_object_shadow(&entry->object.vm_object,
7289						 &entry->offset,
7290						 (entry->vme_end -
7291						  entry->vme_start));
7292				entry->needs_copy = FALSE;
7293			}
7294
7295			dst_object = entry->object.vm_object;
7296			dst_offset = entry->offset;
7297
7298			/*
7299			 *	Take an object reference, and record
7300			 *	the map version information so that the
7301			 *	map can be safely unlocked.
7302			 */
7303
7304			if (dst_object == VM_OBJECT_NULL) {
7305				/*
7306				 * We would usually have just taken the
7307				 * optimized path above if the destination
7308				 * object has not been allocated yet.  But we
7309				 * now disable that optimization if the copy
7310				 * entry's object is not backed by anonymous
7311				 * memory to avoid replacing malloc'ed
7312				 * (i.e. re-usable) anonymous memory with a
7313				 * not-so-anonymous mapping.
7314				 * So we have to handle this case here and
7315				 * allocate a new VM object for this map entry.
7316				 */
7317				dst_object = vm_object_allocate(
7318					entry->vme_end - entry->vme_start);
7319				dst_offset = 0;
7320				entry->object.vm_object = dst_object;
7321				entry->offset = dst_offset;
7322
7323			}
7324
7325			vm_object_reference(dst_object);
7326
7327			/* account for unlock bumping up timestamp */
7328			version.main_timestamp = dst_map->timestamp + 1;
7329
7330			vm_map_unlock(dst_map);
7331
7332			/*
7333			 *	Copy as much as possible in one pass
7334			 */
7335
7336			copy_size = size;
7337			r = vm_fault_copy(
7338				copy_entry->object.vm_object,
7339				copy_entry->offset,
7340				&copy_size,
7341				dst_object,
7342				dst_offset,
7343				dst_map,
7344				&version,
7345				THREAD_UNINT );
7346
7347			/*
7348			 *	Release the object reference
7349			 */
7350
7351			vm_object_deallocate(dst_object);
7352
7353			/*
7354			 *	If a hard error occurred, return it now
7355			 */
7356
7357			if (r != KERN_SUCCESS)
7358				return(r);
7359
7360			if (copy_size != 0) {
7361				/*
7362				 *	Dispose of the copied region
7363				 */
7364
7365				vm_map_copy_clip_end(copy, copy_entry,
7366						     copy_entry->vme_start + copy_size);
7367				vm_map_copy_entry_unlink(copy, copy_entry);
7368				vm_object_deallocate(copy_entry->object.vm_object);
7369				vm_map_copy_entry_dispose(copy, copy_entry);
7370			}
7371
7372			/*
7373			 *	Pick up in the destination map where we left off.
7374			 *
7375			 *	Use the version information to avoid a lookup
7376			 *	in the normal case.
7377			 */
7378
7379			start += copy_size;
7380			vm_map_lock(dst_map);
7381			if (version.main_timestamp == dst_map->timestamp &&
7382			    copy_size != 0) {
7383				/* We can safely use saved tmp_entry value */
7384
7385				vm_map_clip_end(dst_map, tmp_entry, start);
7386				tmp_entry = tmp_entry->vme_next;
7387			} else {
7388				/* Must do lookup of tmp_entry */
7389
7390				if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
7391					vm_map_unlock(dst_map);
7392					return(KERN_INVALID_ADDRESS);
7393				}
7394				vm_map_clip_start(dst_map, tmp_entry, start);
7395			}
7396		}
7397	}/* while */
7398
7399	return(KERN_SUCCESS);
7400}/* vm_map_copy_overwrite_aligned */
7401
7402/*
7403 *	Routine: vm_map_copyin_kernel_buffer [internal use only]
7404 *
7405 *	Description:
7406 *		Copy in data to a kernel buffer from space in the
7407 *		source map. The original space may be optionally
7408 *		deallocated.
7409 *
7410 *		If successful, returns a new copy object.
7411 */
7412static kern_return_t
7413vm_map_copyin_kernel_buffer(
7414	vm_map_t	src_map,
7415	vm_map_offset_t	src_addr,
7416	vm_map_size_t	len,
7417	boolean_t	src_destroy,
7418	vm_map_copy_t	*copy_result)
7419{
7420	kern_return_t kr;
7421	vm_map_copy_t copy;
7422	vm_size_t kalloc_size;
7423
7424	if ((vm_size_t) len != len) {
7425		/* "len" is too big and doesn't fit in a "vm_size_t" */
7426		return KERN_RESOURCE_SHORTAGE;
7427	}
7428	kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len);
7429	assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len);
7430
7431	copy = (vm_map_copy_t) kalloc(kalloc_size);
7432	if (copy == VM_MAP_COPY_NULL) {
7433		return KERN_RESOURCE_SHORTAGE;
7434	}
7435	copy->type = VM_MAP_COPY_KERNEL_BUFFER;
7436	copy->size = len;
7437	copy->offset = 0;
7438	copy->cpy_kdata = (void *) (copy + 1);
7439	copy->cpy_kalloc_size = kalloc_size;
7440
7441	kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len);
7442	if (kr != KERN_SUCCESS) {
7443		kfree(copy, kalloc_size);
7444		return kr;
7445	}
7446	if (src_destroy) {
7447		(void) vm_map_remove(
7448			src_map,
7449			vm_map_trunc_page(src_addr,
7450					  VM_MAP_PAGE_MASK(src_map)),
7451			vm_map_round_page(src_addr + len,
7452					  VM_MAP_PAGE_MASK(src_map)),
7453			(VM_MAP_REMOVE_INTERRUPTIBLE |
7454			 VM_MAP_REMOVE_WAIT_FOR_KWIRE |
7455			 (src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0));
7456	}
7457	*copy_result = copy;
7458	return KERN_SUCCESS;
7459}
7460
7461/*
7462 *	Routine: vm_map_copyout_kernel_buffer	[internal use only]
7463 *
7464 *	Description:
7465 *		Copy out data from a kernel buffer into space in the
7466 *		destination map. The space may be otpionally dynamically
7467 *		allocated.
7468 *
7469 *		If successful, consumes the copy object.
7470 *		Otherwise, the caller is responsible for it.
7471 */
7472static int vm_map_copyout_kernel_buffer_failures = 0;
7473static kern_return_t
7474vm_map_copyout_kernel_buffer(
7475	vm_map_t		map,
7476	vm_map_address_t	*addr,	/* IN/OUT */
7477	vm_map_copy_t		copy,
7478	boolean_t		overwrite,
7479	boolean_t		consume_on_success)
7480{
7481	kern_return_t kr = KERN_SUCCESS;
7482	thread_t thread = current_thread();
7483
7484	if (!overwrite) {
7485
7486		/*
7487		 * Allocate space in the target map for the data
7488		 */
7489		*addr = 0;
7490		kr = vm_map_enter(map,
7491				  addr,
7492				  vm_map_round_page(copy->size,
7493						    VM_MAP_PAGE_MASK(map)),
7494				  (vm_map_offset_t) 0,
7495				  VM_FLAGS_ANYWHERE,
7496				  VM_OBJECT_NULL,
7497				  (vm_object_offset_t) 0,
7498				  FALSE,
7499				  VM_PROT_DEFAULT,
7500				  VM_PROT_ALL,
7501				  VM_INHERIT_DEFAULT);
7502		if (kr != KERN_SUCCESS)
7503			return kr;
7504	}
7505
7506	/*
7507	 * Copyout the data from the kernel buffer to the target map.
7508	 */
7509	if (thread->map == map) {
7510
7511		/*
7512		 * If the target map is the current map, just do
7513		 * the copy.
7514		 */
7515		assert((vm_size_t) copy->size == copy->size);
7516		if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
7517			kr = KERN_INVALID_ADDRESS;
7518		}
7519	}
7520	else {
7521		vm_map_t oldmap;
7522
7523		/*
7524		 * If the target map is another map, assume the
7525		 * target's address space identity for the duration
7526		 * of the copy.
7527		 */
7528		vm_map_reference(map);
7529		oldmap = vm_map_switch(map);
7530
7531		assert((vm_size_t) copy->size == copy->size);
7532		if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
7533			vm_map_copyout_kernel_buffer_failures++;
7534			kr = KERN_INVALID_ADDRESS;
7535		}
7536
7537		(void) vm_map_switch(oldmap);
7538		vm_map_deallocate(map);
7539	}
7540
7541	if (kr != KERN_SUCCESS) {
7542		/* the copy failed, clean up */
7543		if (!overwrite) {
7544			/*
7545			 * Deallocate the space we allocated in the target map.
7546			 */
7547			(void) vm_map_remove(
7548				map,
7549				vm_map_trunc_page(*addr,
7550						  VM_MAP_PAGE_MASK(map)),
7551				vm_map_round_page((*addr +
7552						   vm_map_round_page(copy->size,
7553								     VM_MAP_PAGE_MASK(map))),
7554						  VM_MAP_PAGE_MASK(map)),
7555				VM_MAP_NO_FLAGS);
7556			*addr = 0;
7557		}
7558	} else {
7559		/* copy was successful, dicard the copy structure */
7560		if (consume_on_success) {
7561			kfree(copy, copy->cpy_kalloc_size);
7562		}
7563	}
7564
7565	return kr;
7566}
7567
7568/*
7569 *	Macro:		vm_map_copy_insert
7570 *
7571 *	Description:
7572 *		Link a copy chain ("copy") into a map at the
7573 *		specified location (after "where").
7574 *	Side effects:
7575 *		The copy chain is destroyed.
7576 *	Warning:
7577 *		The arguments are evaluated multiple times.
7578 */
7579#define	vm_map_copy_insert(map, where, copy)				\
7580MACRO_BEGIN								\
7581	vm_map_store_copy_insert(map, where, copy);	  \
7582	zfree(vm_map_copy_zone, copy);		\
7583MACRO_END
7584
7585void
7586vm_map_copy_remap(
7587	vm_map_t	map,
7588	vm_map_entry_t	where,
7589	vm_map_copy_t	copy,
7590	vm_map_offset_t	adjustment,
7591	vm_prot_t	cur_prot,
7592	vm_prot_t	max_prot,
7593	vm_inherit_t	inheritance)
7594{
7595	vm_map_entry_t	copy_entry, new_entry;
7596
7597	for (copy_entry = vm_map_copy_first_entry(copy);
7598	     copy_entry != vm_map_copy_to_entry(copy);
7599	     copy_entry = copy_entry->vme_next) {
7600		/* get a new VM map entry for the map */
7601		new_entry = vm_map_entry_create(map,
7602						!map->hdr.entries_pageable);
7603		/* copy the "copy entry" to the new entry */
7604		vm_map_entry_copy(new_entry, copy_entry);
7605		/* adjust "start" and "end" */
7606		new_entry->vme_start += adjustment;
7607		new_entry->vme_end += adjustment;
7608		/* clear some attributes */
7609		new_entry->inheritance = inheritance;
7610		new_entry->protection = cur_prot;
7611		new_entry->max_protection = max_prot;
7612		new_entry->behavior = VM_BEHAVIOR_DEFAULT;
7613		/* take an extra reference on the entry's "object" */
7614		if (new_entry->is_sub_map) {
7615			vm_map_lock(new_entry->object.sub_map);
7616			vm_map_reference(new_entry->object.sub_map);
7617			vm_map_unlock(new_entry->object.sub_map);
7618		} else {
7619			vm_object_reference(new_entry->object.vm_object);
7620		}
7621		/* insert the new entry in the map */
7622		vm_map_store_entry_link(map, where, new_entry);
7623		/* continue inserting the "copy entries" after the new entry */
7624		where = new_entry;
7625	}
7626}
7627
7628/*
7629 *	Routine:	vm_map_copyout
7630 *
7631 *	Description:
7632 *		Copy out a copy chain ("copy") into newly-allocated
7633 *		space in the destination map.
7634 *
7635 *		If successful, consumes the copy object.
7636 *		Otherwise, the caller is responsible for it.
7637 */
7638
7639kern_return_t
7640vm_map_copyout(
7641	vm_map_t		dst_map,
7642	vm_map_address_t	*dst_addr,	/* OUT */
7643	vm_map_copy_t		copy)
7644{
7645	return vm_map_copyout_internal(dst_map, dst_addr, copy,
7646				       TRUE, /* consume_on_success */
7647				       VM_PROT_DEFAULT,
7648				       VM_PROT_ALL,
7649				       VM_INHERIT_DEFAULT);
7650}
7651
7652kern_return_t
7653vm_map_copyout_internal(
7654	vm_map_t		dst_map,
7655	vm_map_address_t	*dst_addr,	/* OUT */
7656	vm_map_copy_t		copy,
7657	boolean_t		consume_on_success,
7658	vm_prot_t		cur_protection,
7659	vm_prot_t		max_protection,
7660	vm_inherit_t		inheritance)
7661{
7662	vm_map_size_t		size;
7663	vm_map_size_t		adjustment;
7664	vm_map_offset_t		start;
7665	vm_object_offset_t	vm_copy_start;
7666	vm_map_entry_t		last;
7667	vm_map_entry_t		entry;
7668
7669	/*
7670	 *	Check for null copy object.
7671	 */
7672
7673	if (copy == VM_MAP_COPY_NULL) {
7674		*dst_addr = 0;
7675		return(KERN_SUCCESS);
7676	}
7677
7678	/*
7679	 *	Check for special copy object, created
7680	 *	by vm_map_copyin_object.
7681	 */
7682
7683	if (copy->type == VM_MAP_COPY_OBJECT) {
7684		vm_object_t 		object = copy->cpy_object;
7685		kern_return_t 		kr;
7686		vm_object_offset_t	offset;
7687
7688		offset = vm_object_trunc_page(copy->offset);
7689		size = vm_map_round_page((copy->size +
7690					  (vm_map_size_t)(copy->offset -
7691							  offset)),
7692					 VM_MAP_PAGE_MASK(dst_map));
7693		*dst_addr = 0;
7694		kr = vm_map_enter(dst_map, dst_addr, size,
7695				  (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
7696				  object, offset, FALSE,
7697				  VM_PROT_DEFAULT, VM_PROT_ALL,
7698				  VM_INHERIT_DEFAULT);
7699		if (kr != KERN_SUCCESS)
7700			return(kr);
7701		/* Account for non-pagealigned copy object */
7702		*dst_addr += (vm_map_offset_t)(copy->offset - offset);
7703		if (consume_on_success)
7704			zfree(vm_map_copy_zone, copy);
7705		return(KERN_SUCCESS);
7706	}
7707
7708	/*
7709	 *	Check for special kernel buffer allocated
7710	 *	by new_ipc_kmsg_copyin.
7711	 */
7712
7713	if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
7714		return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
7715						    copy, FALSE,
7716						    consume_on_success);
7717	}
7718
7719
7720	/*
7721	 *	Find space for the data
7722	 */
7723
7724	vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
7725					  VM_MAP_COPY_PAGE_MASK(copy));
7726	size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size,
7727				 VM_MAP_COPY_PAGE_MASK(copy))
7728		- vm_copy_start;
7729
7730
7731StartAgain: ;
7732
7733	vm_map_lock(dst_map);
7734	if( dst_map->disable_vmentry_reuse == TRUE) {
7735		VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
7736		last = entry;
7737	} else {
7738		assert(first_free_is_valid(dst_map));
7739		start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
7740		vm_map_min(dst_map) : last->vme_end;
7741		start = vm_map_round_page(start,
7742					  VM_MAP_PAGE_MASK(dst_map));
7743	}
7744
7745	while (TRUE) {
7746		vm_map_entry_t	next = last->vme_next;
7747		vm_map_offset_t	end = start + size;
7748
7749		if ((end > dst_map->max_offset) || (end < start)) {
7750			if (dst_map->wait_for_space) {
7751				if (size <= (dst_map->max_offset - dst_map->min_offset)) {
7752					assert_wait((event_t) dst_map,
7753						    THREAD_INTERRUPTIBLE);
7754					vm_map_unlock(dst_map);
7755					thread_block(THREAD_CONTINUE_NULL);
7756					goto StartAgain;
7757				}
7758			}
7759			vm_map_unlock(dst_map);
7760			return(KERN_NO_SPACE);
7761		}
7762
7763		if ((next == vm_map_to_entry(dst_map)) ||
7764		    (next->vme_start >= end))
7765			break;
7766
7767		last = next;
7768		start = last->vme_end;
7769		start = vm_map_round_page(start,
7770					  VM_MAP_PAGE_MASK(dst_map));
7771	}
7772
7773	adjustment = start - vm_copy_start;
7774	if (! consume_on_success) {
7775		/*
7776		 * We're not allowed to consume "copy", so we'll have to
7777		 * copy its map entries into the destination map below.
7778		 * No need to re-allocate map entries from the correct
7779		 * (pageable or not) zone, since we'll get new map entries
7780		 * during the transfer.
7781		 * We'll also adjust the map entries's "start" and "end"
7782		 * during the transfer, to keep "copy"'s entries consistent
7783		 * with its "offset".
7784		 */
7785		goto after_adjustments;
7786	}
7787
7788	/*
7789	 *	Since we're going to just drop the map
7790	 *	entries from the copy into the destination
7791	 *	map, they must come from the same pool.
7792	 */
7793
7794	if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
7795		/*
7796		 * Mismatches occur when dealing with the default
7797		 * pager.
7798		 */
7799		zone_t		old_zone;
7800		vm_map_entry_t	next, new;
7801
7802		/*
7803		 * Find the zone that the copies were allocated from
7804		 */
7805
7806		entry = vm_map_copy_first_entry(copy);
7807
7808		/*
7809		 * Reinitialize the copy so that vm_map_copy_entry_link
7810		 * will work.
7811		 */
7812		vm_map_store_copy_reset(copy, entry);
7813		copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
7814
7815		/*
7816		 * Copy each entry.
7817		 */
7818		while (entry != vm_map_copy_to_entry(copy)) {
7819			new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
7820			vm_map_entry_copy_full(new, entry);
7821			new->use_pmap = FALSE;	/* clr address space specifics */
7822			vm_map_copy_entry_link(copy,
7823					       vm_map_copy_last_entry(copy),
7824					       new);
7825			next = entry->vme_next;
7826			old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
7827			zfree(old_zone, entry);
7828			entry = next;
7829		}
7830	}
7831
7832	/*
7833	 *	Adjust the addresses in the copy chain, and
7834	 *	reset the region attributes.
7835	 */
7836
7837	for (entry = vm_map_copy_first_entry(copy);
7838	     entry != vm_map_copy_to_entry(copy);
7839	     entry = entry->vme_next) {
7840		if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
7841			/*
7842			 * We're injecting this copy entry into a map that
7843			 * has the standard page alignment, so clear
7844			 * "map_aligned" (which might have been inherited
7845			 * from the original map entry).
7846			 */
7847			entry->map_aligned = FALSE;
7848		}
7849
7850		entry->vme_start += adjustment;
7851		entry->vme_end += adjustment;
7852
7853		if (entry->map_aligned) {
7854			assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
7855						   VM_MAP_PAGE_MASK(dst_map)));
7856			assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
7857						   VM_MAP_PAGE_MASK(dst_map)));
7858		}
7859
7860		entry->inheritance = VM_INHERIT_DEFAULT;
7861		entry->protection = VM_PROT_DEFAULT;
7862		entry->max_protection = VM_PROT_ALL;
7863		entry->behavior = VM_BEHAVIOR_DEFAULT;
7864
7865		/*
7866		 * If the entry is now wired,
7867		 * map the pages into the destination map.
7868		 */
7869		if (entry->wired_count != 0) {
7870			register vm_map_offset_t va;
7871			vm_object_offset_t	 offset;
7872			register vm_object_t object;
7873			vm_prot_t prot;
7874			int	type_of_fault;
7875
7876			object = entry->object.vm_object;
7877			offset = entry->offset;
7878			va = entry->vme_start;
7879
7880			pmap_pageable(dst_map->pmap,
7881				      entry->vme_start,
7882				      entry->vme_end,
7883				      TRUE);
7884
7885			while (va < entry->vme_end) {
7886				register vm_page_t	m;
7887
7888				/*
7889				 * Look up the page in the object.
7890				 * Assert that the page will be found in the
7891				 * top object:
7892				 * either
7893				 *	the object was newly created by
7894				 *	vm_object_copy_slowly, and has
7895				 *	copies of all of the pages from
7896				 *	the source object
7897				 * or
7898				 *	the object was moved from the old
7899				 *	map entry; because the old map
7900				 *	entry was wired, all of the pages
7901				 *	were in the top-level object.
7902				 *	(XXX not true if we wire pages for
7903				 *	 reading)
7904				 */
7905				vm_object_lock(object);
7906
7907				m = vm_page_lookup(object, offset);
7908				if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
7909				    m->absent)
7910					panic("vm_map_copyout: wiring %p", m);
7911
7912				/*
7913				 * ENCRYPTED SWAP:
7914				 * The page is assumed to be wired here, so it
7915				 * shouldn't be encrypted.  Otherwise, we
7916				 * couldn't enter it in the page table, since
7917				 * we don't want the user to see the encrypted
7918				 * data.
7919				 */
7920				ASSERT_PAGE_DECRYPTED(m);
7921
7922				prot = entry->protection;
7923
7924				if (override_nx(dst_map, entry->alias) && prot)
7925				        prot |= VM_PROT_EXECUTE;
7926
7927				type_of_fault = DBG_CACHE_HIT_FAULT;
7928
7929				vm_fault_enter(m, dst_map->pmap, va, prot, prot,
7930					       VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL,
7931					       &type_of_fault);
7932
7933				vm_object_unlock(object);
7934
7935				offset += PAGE_SIZE_64;
7936				va += PAGE_SIZE;
7937			}
7938		}
7939	}
7940
7941after_adjustments:
7942
7943	/*
7944	 *	Correct the page alignment for the result
7945	 */
7946
7947	*dst_addr = start + (copy->offset - vm_copy_start);
7948
7949	/*
7950	 *	Update the hints and the map size
7951	 */
7952
7953	if (consume_on_success) {
7954		SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
7955	} else {
7956		SAVE_HINT_MAP_WRITE(dst_map, last);
7957	}
7958
7959	dst_map->size += size;
7960
7961	/*
7962	 *	Link in the copy
7963	 */
7964
7965	if (consume_on_success) {
7966		vm_map_copy_insert(dst_map, last, copy);
7967	} else {
7968		vm_map_copy_remap(dst_map, last, copy, adjustment,
7969				  cur_protection, max_protection,
7970				  inheritance);
7971	}
7972
7973	vm_map_unlock(dst_map);
7974
7975	/*
7976	 * XXX	If wiring_required, call vm_map_pageable
7977	 */
7978
7979	return(KERN_SUCCESS);
7980}
7981
7982/*
7983 *	Routine:	vm_map_copyin
7984 *
7985 *	Description:
7986 *		see vm_map_copyin_common.  Exported via Unsupported.exports.
7987 *
7988 */
7989
7990#undef vm_map_copyin
7991
7992kern_return_t
7993vm_map_copyin(
7994	vm_map_t			src_map,
7995	vm_map_address_t	src_addr,
7996	vm_map_size_t		len,
7997	boolean_t			src_destroy,
7998	vm_map_copy_t		*copy_result)	/* OUT */
7999{
8000	return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
8001					FALSE, copy_result, FALSE));
8002}
8003
8004/*
8005 *	Routine:	vm_map_copyin_common
8006 *
8007 *	Description:
8008 *		Copy the specified region (src_addr, len) from the
8009 *		source address space (src_map), possibly removing
8010 *		the region from the source address space (src_destroy).
8011 *
8012 *	Returns:
8013 *		A vm_map_copy_t object (copy_result), suitable for
8014 *		insertion into another address space (using vm_map_copyout),
8015 *		copying over another address space region (using
8016 *		vm_map_copy_overwrite).  If the copy is unused, it
8017 *		should be destroyed (using vm_map_copy_discard).
8018 *
8019 *	In/out conditions:
8020 *		The source map should not be locked on entry.
8021 */
8022
8023typedef struct submap_map {
8024	vm_map_t	parent_map;
8025	vm_map_offset_t	base_start;
8026	vm_map_offset_t	base_end;
8027	vm_map_size_t	base_len;
8028	struct submap_map *next;
8029} submap_map_t;
8030
8031kern_return_t
8032vm_map_copyin_common(
8033	vm_map_t	src_map,
8034	vm_map_address_t src_addr,
8035	vm_map_size_t	len,
8036	boolean_t	src_destroy,
8037	__unused boolean_t	src_volatile,
8038	vm_map_copy_t	*copy_result,	/* OUT */
8039	boolean_t	use_maxprot)
8040{
8041	vm_map_entry_t	tmp_entry;	/* Result of last map lookup --
8042					 * in multi-level lookup, this
8043					 * entry contains the actual
8044					 * vm_object/offset.
8045					 */
8046	register
8047	vm_map_entry_t	new_entry = VM_MAP_ENTRY_NULL;	/* Map entry for copy */
8048
8049	vm_map_offset_t	src_start;	/* Start of current entry --
8050					 * where copy is taking place now
8051					 */
8052	vm_map_offset_t	src_end;	/* End of entire region to be
8053					 * copied */
8054	vm_map_offset_t src_base;
8055	vm_map_t	base_map = src_map;
8056	boolean_t	map_share=FALSE;
8057	submap_map_t	*parent_maps = NULL;
8058
8059	register
8060	vm_map_copy_t	copy;		/* Resulting copy */
8061	vm_map_address_t	copy_addr;
8062
8063	/*
8064	 *	Check for copies of zero bytes.
8065	 */
8066
8067	if (len == 0) {
8068		*copy_result = VM_MAP_COPY_NULL;
8069		return(KERN_SUCCESS);
8070	}
8071
8072	/*
8073	 *	Check that the end address doesn't overflow
8074	 */
8075	src_end = src_addr + len;
8076	if (src_end < src_addr)
8077		return KERN_INVALID_ADDRESS;
8078
8079	/*
8080	 * If the copy is sufficiently small, use a kernel buffer instead
8081	 * of making a virtual copy.  The theory being that the cost of
8082	 * setting up VM (and taking C-O-W faults) dominates the copy costs
8083	 * for small regions.
8084	 */
8085	if ((len < msg_ool_size_small) && !use_maxprot)
8086		return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
8087						   src_destroy, copy_result);
8088
8089	/*
8090	 *	Compute (page aligned) start and end of region
8091	 */
8092	src_start = vm_map_trunc_page(src_addr,
8093				      VM_MAP_PAGE_MASK(src_map));
8094	src_end = vm_map_round_page(src_end,
8095				    VM_MAP_PAGE_MASK(src_map));
8096
8097	XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
8098
8099	/*
8100	 *	Allocate a header element for the list.
8101	 *
8102	 *	Use the start and end in the header to
8103	 *	remember the endpoints prior to rounding.
8104	 */
8105
8106	copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8107	vm_map_copy_first_entry(copy) =
8108		vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
8109	copy->type = VM_MAP_COPY_ENTRY_LIST;
8110	copy->cpy_hdr.nentries = 0;
8111	copy->cpy_hdr.entries_pageable = TRUE;
8112#if 00
8113	copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
8114#else
8115	/*
8116	 * The copy entries can be broken down for a variety of reasons,
8117	 * so we can't guarantee that they will remain map-aligned...
8118	 * Will need to adjust the first copy_entry's "vme_start" and
8119	 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
8120	 * rather than the original map's alignment.
8121	 */
8122	copy->cpy_hdr.page_shift = PAGE_SHIFT;
8123#endif
8124
8125	vm_map_store_init( &(copy->cpy_hdr) );
8126
8127	copy->offset = src_addr;
8128	copy->size = len;
8129
8130	new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
8131
8132#define	RETURN(x)						\
8133	MACRO_BEGIN						\
8134	vm_map_unlock(src_map);					\
8135	if(src_map != base_map)					\
8136		vm_map_deallocate(src_map);			\
8137	if (new_entry != VM_MAP_ENTRY_NULL)			\
8138		vm_map_copy_entry_dispose(copy,new_entry);	\
8139	vm_map_copy_discard(copy);				\
8140	{							\
8141		submap_map_t	*_ptr;				\
8142								\
8143		for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
8144			parent_maps=parent_maps->next;		\
8145			if (_ptr->parent_map != base_map)	\
8146				vm_map_deallocate(_ptr->parent_map);	\
8147			kfree(_ptr, sizeof(submap_map_t));	\
8148		}						\
8149	}							\
8150	MACRO_RETURN(x);					\
8151	MACRO_END
8152
8153	/*
8154	 *	Find the beginning of the region.
8155	 */
8156
8157 	vm_map_lock(src_map);
8158
8159	if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
8160		RETURN(KERN_INVALID_ADDRESS);
8161	if(!tmp_entry->is_sub_map) {
8162		vm_map_clip_start(src_map, tmp_entry, src_start);
8163	}
8164	/* set for later submap fix-up */
8165	copy_addr = src_start;
8166
8167	/*
8168	 *	Go through entries until we get to the end.
8169	 */
8170
8171	while (TRUE) {
8172		register
8173		vm_map_entry_t	src_entry = tmp_entry;	/* Top-level entry */
8174		vm_map_size_t	src_size;		/* Size of source
8175							 * map entry (in both
8176							 * maps)
8177							 */
8178
8179		register
8180		vm_object_t		src_object;	/* Object to copy */
8181		vm_object_offset_t	src_offset;
8182
8183		boolean_t	src_needs_copy;		/* Should source map
8184							 * be made read-only
8185							 * for copy-on-write?
8186							 */
8187
8188		boolean_t	new_entry_needs_copy;	/* Will new entry be COW? */
8189
8190		boolean_t	was_wired;		/* Was source wired? */
8191		vm_map_version_t version;		/* Version before locks
8192							 * dropped to make copy
8193							 */
8194		kern_return_t	result;			/* Return value from
8195							 * copy_strategically.
8196							 */
8197		while(tmp_entry->is_sub_map) {
8198			vm_map_size_t submap_len;
8199			submap_map_t *ptr;
8200
8201			ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
8202			ptr->next = parent_maps;
8203			parent_maps = ptr;
8204			ptr->parent_map = src_map;
8205			ptr->base_start = src_start;
8206			ptr->base_end = src_end;
8207			submap_len = tmp_entry->vme_end - src_start;
8208			if(submap_len > (src_end-src_start))
8209				submap_len = src_end-src_start;
8210			ptr->base_len = submap_len;
8211
8212			src_start -= tmp_entry->vme_start;
8213			src_start += tmp_entry->offset;
8214			src_end = src_start + submap_len;
8215			src_map = tmp_entry->object.sub_map;
8216			vm_map_lock(src_map);
8217			/* keep an outstanding reference for all maps in */
8218			/* the parents tree except the base map */
8219			vm_map_reference(src_map);
8220			vm_map_unlock(ptr->parent_map);
8221			if (!vm_map_lookup_entry(
8222				    src_map, src_start, &tmp_entry))
8223				RETURN(KERN_INVALID_ADDRESS);
8224			map_share = TRUE;
8225			if(!tmp_entry->is_sub_map)
8226				vm_map_clip_start(src_map, tmp_entry, src_start);
8227			src_entry = tmp_entry;
8228		}
8229		/* we are now in the lowest level submap... */
8230
8231		if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
8232		    (tmp_entry->object.vm_object->phys_contiguous)) {
8233			/* This is not, supported for now.In future */
8234			/* we will need to detect the phys_contig   */
8235			/* condition and then upgrade copy_slowly   */
8236			/* to do physical copy from the device mem  */
8237			/* based object. We can piggy-back off of   */
8238			/* the was wired boolean to set-up the      */
8239			/* proper handling */
8240			RETURN(KERN_PROTECTION_FAILURE);
8241		}
8242		/*
8243		 *	Create a new address map entry to hold the result.
8244		 *	Fill in the fields from the appropriate source entries.
8245		 *	We must unlock the source map to do this if we need
8246		 *	to allocate a map entry.
8247		 */
8248		if (new_entry == VM_MAP_ENTRY_NULL) {
8249			version.main_timestamp = src_map->timestamp;
8250			vm_map_unlock(src_map);
8251
8252			new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
8253
8254			vm_map_lock(src_map);
8255			if ((version.main_timestamp + 1) != src_map->timestamp) {
8256				if (!vm_map_lookup_entry(src_map, src_start,
8257							 &tmp_entry)) {
8258					RETURN(KERN_INVALID_ADDRESS);
8259				}
8260				if (!tmp_entry->is_sub_map)
8261					vm_map_clip_start(src_map, tmp_entry, src_start);
8262				continue; /* restart w/ new tmp_entry */
8263			}
8264		}
8265
8266		/*
8267		 *	Verify that the region can be read.
8268		 */
8269		if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
8270		     !use_maxprot) ||
8271		    (src_entry->max_protection & VM_PROT_READ) == 0)
8272			RETURN(KERN_PROTECTION_FAILURE);
8273
8274		/*
8275		 *	Clip against the endpoints of the entire region.
8276		 */
8277
8278		vm_map_clip_end(src_map, src_entry, src_end);
8279
8280		src_size = src_entry->vme_end - src_start;
8281		src_object = src_entry->object.vm_object;
8282		src_offset = src_entry->offset;
8283		was_wired = (src_entry->wired_count != 0);
8284
8285		vm_map_entry_copy(new_entry, src_entry);
8286		new_entry->use_pmap = FALSE; /* clr address space specifics */
8287
8288		/*
8289		 *	Attempt non-blocking copy-on-write optimizations.
8290		 */
8291
8292		if (src_destroy &&
8293		    (src_object == VM_OBJECT_NULL ||
8294		     (src_object->internal && !src_object->true_share
8295		      && !map_share))) {
8296			/*
8297			 * If we are destroying the source, and the object
8298			 * is internal, we can move the object reference
8299			 * from the source to the copy.  The copy is
8300			 * copy-on-write only if the source is.
8301			 * We make another reference to the object, because
8302			 * destroying the source entry will deallocate it.
8303			 */
8304			vm_object_reference(src_object);
8305
8306			/*
8307			 * Copy is always unwired.  vm_map_copy_entry
8308			 * set its wired count to zero.
8309			 */
8310
8311			goto CopySuccessful;
8312		}
8313
8314
8315	RestartCopy:
8316		XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
8317		    src_object, new_entry, new_entry->object.vm_object,
8318		    was_wired, 0);
8319		if ((src_object == VM_OBJECT_NULL ||
8320		     (!was_wired && !map_share && !tmp_entry->is_shared)) &&
8321		    vm_object_copy_quickly(
8322			    &new_entry->object.vm_object,
8323			    src_offset,
8324			    src_size,
8325			    &src_needs_copy,
8326			    &new_entry_needs_copy)) {
8327
8328			new_entry->needs_copy = new_entry_needs_copy;
8329
8330			/*
8331			 *	Handle copy-on-write obligations
8332			 */
8333
8334			if (src_needs_copy && !tmp_entry->needs_copy) {
8335			        vm_prot_t prot;
8336
8337				prot = src_entry->protection & ~VM_PROT_WRITE;
8338
8339				if (override_nx(src_map, src_entry->alias) && prot)
8340				        prot |= VM_PROT_EXECUTE;
8341
8342				vm_object_pmap_protect(
8343					src_object,
8344					src_offset,
8345					src_size,
8346			      		(src_entry->is_shared ?
8347					 PMAP_NULL
8348					 : src_map->pmap),
8349					src_entry->vme_start,
8350					prot);
8351
8352				tmp_entry->needs_copy = TRUE;
8353			}
8354
8355			/*
8356			 *	The map has never been unlocked, so it's safe
8357			 *	to move to the next entry rather than doing
8358			 *	another lookup.
8359			 */
8360
8361			goto CopySuccessful;
8362		}
8363
8364		/*
8365		 *	Take an object reference, so that we may
8366		 *	release the map lock(s).
8367		 */
8368
8369		assert(src_object != VM_OBJECT_NULL);
8370		vm_object_reference(src_object);
8371
8372		/*
8373		 *	Record the timestamp for later verification.
8374		 *	Unlock the map.
8375		 */
8376
8377		version.main_timestamp = src_map->timestamp;
8378		vm_map_unlock(src_map);	/* Increments timestamp once! */
8379
8380		/*
8381		 *	Perform the copy
8382		 */
8383
8384		if (was_wired) {
8385		CopySlowly:
8386			vm_object_lock(src_object);
8387			result = vm_object_copy_slowly(
8388				src_object,
8389				src_offset,
8390				src_size,
8391				THREAD_UNINT,
8392				&new_entry->object.vm_object);
8393			new_entry->offset = 0;
8394			new_entry->needs_copy = FALSE;
8395
8396		}
8397		else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
8398			 (tmp_entry->is_shared  || map_share)) {
8399		  	vm_object_t new_object;
8400
8401			vm_object_lock_shared(src_object);
8402			new_object = vm_object_copy_delayed(
8403				src_object,
8404				src_offset,
8405				src_size,
8406				TRUE);
8407			if (new_object == VM_OBJECT_NULL)
8408			  	goto CopySlowly;
8409
8410			new_entry->object.vm_object = new_object;
8411			new_entry->needs_copy = TRUE;
8412			result = KERN_SUCCESS;
8413
8414		} else {
8415			result = vm_object_copy_strategically(src_object,
8416							      src_offset,
8417							      src_size,
8418							      &new_entry->object.vm_object,
8419							      &new_entry->offset,
8420							      &new_entry_needs_copy);
8421
8422			new_entry->needs_copy = new_entry_needs_copy;
8423		}
8424
8425		if (result != KERN_SUCCESS &&
8426		    result != KERN_MEMORY_RESTART_COPY) {
8427			vm_map_lock(src_map);
8428			RETURN(result);
8429		}
8430
8431		/*
8432		 *	Throw away the extra reference
8433		 */
8434
8435		vm_object_deallocate(src_object);
8436
8437		/*
8438		 *	Verify that the map has not substantially
8439		 *	changed while the copy was being made.
8440		 */
8441
8442		vm_map_lock(src_map);
8443
8444		if ((version.main_timestamp + 1) == src_map->timestamp)
8445			goto VerificationSuccessful;
8446
8447		/*
8448		 *	Simple version comparison failed.
8449		 *
8450		 *	Retry the lookup and verify that the
8451		 *	same object/offset are still present.
8452		 *
8453		 *	[Note: a memory manager that colludes with
8454		 *	the calling task can detect that we have
8455		 *	cheated.  While the map was unlocked, the
8456		 *	mapping could have been changed and restored.]
8457		 */
8458
8459		if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
8460			RETURN(KERN_INVALID_ADDRESS);
8461		}
8462
8463		src_entry = tmp_entry;
8464		vm_map_clip_start(src_map, src_entry, src_start);
8465
8466		if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
8467		     !use_maxprot) ||
8468		    ((src_entry->max_protection & VM_PROT_READ) == 0))
8469			goto VerificationFailed;
8470
8471		if (src_entry->vme_end < new_entry->vme_end) {
8472			assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
8473						   VM_MAP_COPY_PAGE_MASK(copy)));
8474			new_entry->vme_end = src_entry->vme_end;
8475			src_size = new_entry->vme_end - src_start;
8476		}
8477
8478		if ((src_entry->object.vm_object != src_object) ||
8479		    (src_entry->offset != src_offset) ) {
8480
8481			/*
8482			 *	Verification failed.
8483			 *
8484			 *	Start over with this top-level entry.
8485			 */
8486
8487		VerificationFailed: ;
8488
8489			vm_object_deallocate(new_entry->object.vm_object);
8490			tmp_entry = src_entry;
8491			continue;
8492		}
8493
8494		/*
8495		 *	Verification succeeded.
8496		 */
8497
8498	VerificationSuccessful: ;
8499
8500		if (result == KERN_MEMORY_RESTART_COPY)
8501			goto RestartCopy;
8502
8503		/*
8504		 *	Copy succeeded.
8505		 */
8506
8507	CopySuccessful: ;
8508
8509		/*
8510		 *	Link in the new copy entry.
8511		 */
8512
8513		vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
8514				       new_entry);
8515
8516		/*
8517		 *	Determine whether the entire region
8518		 *	has been copied.
8519		 */
8520		src_base = src_start;
8521		src_start = new_entry->vme_end;
8522		new_entry = VM_MAP_ENTRY_NULL;
8523		while ((src_start >= src_end) && (src_end != 0)) {
8524			if (src_map != base_map) {
8525				submap_map_t	*ptr;
8526
8527				ptr = parent_maps;
8528				assert(ptr != NULL);
8529				parent_maps = parent_maps->next;
8530
8531				/* fix up the damage we did in that submap */
8532				vm_map_simplify_range(src_map,
8533						      src_base,
8534						      src_end);
8535
8536				vm_map_unlock(src_map);
8537				vm_map_deallocate(src_map);
8538				vm_map_lock(ptr->parent_map);
8539				src_map = ptr->parent_map;
8540				src_base = ptr->base_start;
8541				src_start = ptr->base_start + ptr->base_len;
8542				src_end = ptr->base_end;
8543				if ((src_end > src_start) &&
8544				    !vm_map_lookup_entry(
8545					    src_map, src_start, &tmp_entry))
8546					RETURN(KERN_INVALID_ADDRESS);
8547				kfree(ptr, sizeof(submap_map_t));
8548				if(parent_maps == NULL)
8549					map_share = FALSE;
8550				src_entry = tmp_entry->vme_prev;
8551			} else
8552				break;
8553		}
8554		if ((src_start >= src_end) && (src_end != 0))
8555			break;
8556
8557		/*
8558		 *	Verify that there are no gaps in the region
8559		 */
8560
8561		tmp_entry = src_entry->vme_next;
8562		if ((tmp_entry->vme_start != src_start) ||
8563		    (tmp_entry == vm_map_to_entry(src_map))) {
8564
8565			if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
8566			    (vm_map_round_page(src_entry->vme_end,
8567					       VM_MAP_PAGE_MASK(src_map)) ==
8568			     src_end)) {
8569				vm_map_entry_t last_copy_entry;
8570				vm_map_offset_t adjustment;
8571
8572				/*
8573				 * This is the last entry in the range we
8574				 * want and it happens to miss a few pages
8575				 * because it is not map-aligned (must have
8576				 * been imported from a differently-aligned
8577				 * map).
8578				 * Let's say we're done, but first we have
8579				 * to compensate for the alignment adjustment
8580				 * we're about to do before returning.
8581				 */
8582
8583				last_copy_entry = vm_map_copy_last_entry(copy);
8584				assert(last_copy_entry !=
8585				       vm_map_copy_to_entry(copy));
8586				adjustment =
8587					(vm_map_round_page((copy->offset +
8588							    copy->size),
8589							   VM_MAP_PAGE_MASK(src_map)) -
8590					 vm_map_round_page((copy->offset +
8591							    copy->size),
8592							   PAGE_MASK));
8593				last_copy_entry->vme_end += adjustment;
8594				last_copy_entry->map_aligned = FALSE;
8595				/* ... and we're done */
8596				break;
8597			}
8598
8599			RETURN(KERN_INVALID_ADDRESS);
8600		}
8601	}
8602
8603	/*
8604	 * If the source should be destroyed, do it now, since the
8605	 * copy was successful.
8606	 */
8607	if (src_destroy) {
8608		(void) vm_map_delete(
8609			src_map,
8610			vm_map_trunc_page(src_addr,
8611					  VM_MAP_PAGE_MASK(src_map)),
8612			src_end,
8613			((src_map == kernel_map) ?
8614			 VM_MAP_REMOVE_KUNWIRE :
8615			 VM_MAP_NO_FLAGS),
8616			VM_MAP_NULL);
8617	} else {
8618		/* fix up the damage we did in the base map */
8619		vm_map_simplify_range(
8620			src_map,
8621			vm_map_trunc_page(src_addr,
8622					  VM_MAP_PAGE_MASK(src_map)),
8623			vm_map_round_page(src_end,
8624					  VM_MAP_PAGE_MASK(src_map)));
8625	}
8626
8627	vm_map_unlock(src_map);
8628
8629	if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
8630		assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
8631
8632		/* adjust alignment of first copy_entry's "vme_start" */
8633		tmp_entry = vm_map_copy_first_entry(copy);
8634		if (tmp_entry != vm_map_copy_to_entry(copy)) {
8635			vm_map_offset_t adjustment;
8636			adjustment =
8637				(vm_map_trunc_page(copy->offset,
8638						   PAGE_MASK) -
8639				 vm_map_trunc_page(copy->offset,
8640						   VM_MAP_PAGE_MASK(src_map)));
8641			if (adjustment) {
8642				assert(page_aligned(adjustment));
8643				assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
8644				tmp_entry->vme_start += adjustment;
8645				tmp_entry->offset += adjustment;
8646				copy_addr += adjustment;
8647				assert(tmp_entry->vme_start < tmp_entry->vme_end);
8648			}
8649		}
8650
8651		/* adjust alignment of last copy_entry's "vme_end" */
8652		tmp_entry = vm_map_copy_last_entry(copy);
8653		if (tmp_entry != vm_map_copy_to_entry(copy)) {
8654			vm_map_offset_t adjustment;
8655			adjustment =
8656				(vm_map_round_page((copy->offset +
8657						    copy->size),
8658						   VM_MAP_PAGE_MASK(src_map)) -
8659				 vm_map_round_page((copy->offset +
8660						    copy->size),
8661						   PAGE_MASK));
8662			if (adjustment) {
8663				assert(page_aligned(adjustment));
8664				assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
8665				tmp_entry->vme_end -= adjustment;
8666				assert(tmp_entry->vme_start < tmp_entry->vme_end);
8667			}
8668		}
8669	}
8670
8671	/* Fix-up start and end points in copy.  This is necessary */
8672	/* when the various entries in the copy object were picked */
8673	/* up from different sub-maps */
8674
8675	tmp_entry = vm_map_copy_first_entry(copy);
8676	while (tmp_entry != vm_map_copy_to_entry(copy)) {
8677		assert(VM_MAP_PAGE_ALIGNED(
8678			       copy_addr + (tmp_entry->vme_end -
8679					    tmp_entry->vme_start),
8680			       VM_MAP_COPY_PAGE_MASK(copy)));
8681		assert(VM_MAP_PAGE_ALIGNED(
8682			       copy_addr,
8683			       VM_MAP_COPY_PAGE_MASK(copy)));
8684
8685		/*
8686		 * The copy_entries will be injected directly into the
8687		 * destination map and might not be "map aligned" there...
8688		 */
8689		tmp_entry->map_aligned = FALSE;
8690
8691		tmp_entry->vme_end = copy_addr +
8692			(tmp_entry->vme_end - tmp_entry->vme_start);
8693		tmp_entry->vme_start = copy_addr;
8694		assert(tmp_entry->vme_start < tmp_entry->vme_end);
8695		copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
8696		tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
8697	}
8698
8699	*copy_result = copy;
8700	return(KERN_SUCCESS);
8701
8702#undef	RETURN
8703}
8704
8705kern_return_t
8706vm_map_copy_extract(
8707	vm_map_t		src_map,
8708	vm_map_address_t	src_addr,
8709	vm_map_size_t		len,
8710	vm_map_copy_t		*copy_result,	/* OUT */
8711	vm_prot_t		*cur_prot,	/* OUT */
8712	vm_prot_t		*max_prot)
8713{
8714	vm_map_offset_t	src_start, src_end;
8715	vm_map_copy_t	copy;
8716	kern_return_t	kr;
8717
8718	/*
8719	 *	Check for copies of zero bytes.
8720	 */
8721
8722	if (len == 0) {
8723		*copy_result = VM_MAP_COPY_NULL;
8724		return(KERN_SUCCESS);
8725	}
8726
8727	/*
8728	 *	Check that the end address doesn't overflow
8729	 */
8730	src_end = src_addr + len;
8731	if (src_end < src_addr)
8732		return KERN_INVALID_ADDRESS;
8733
8734	/*
8735	 *	Compute (page aligned) start and end of region
8736	 */
8737	src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
8738	src_end = vm_map_round_page(src_end, PAGE_MASK);
8739
8740	/*
8741	 *	Allocate a header element for the list.
8742	 *
8743	 *	Use the start and end in the header to
8744	 *	remember the endpoints prior to rounding.
8745	 */
8746
8747	copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8748	vm_map_copy_first_entry(copy) =
8749		vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
8750	copy->type = VM_MAP_COPY_ENTRY_LIST;
8751	copy->cpy_hdr.nentries = 0;
8752	copy->cpy_hdr.entries_pageable = TRUE;
8753
8754	vm_map_store_init(&copy->cpy_hdr);
8755
8756	copy->offset = 0;
8757	copy->size = len;
8758
8759	kr = vm_map_remap_extract(src_map,
8760				  src_addr,
8761				  len,
8762				  FALSE, /* copy */
8763				  &copy->cpy_hdr,
8764				  cur_prot,
8765				  max_prot,
8766				  VM_INHERIT_SHARE,
8767				  TRUE); /* pageable */
8768	if (kr != KERN_SUCCESS) {
8769		vm_map_copy_discard(copy);
8770		return kr;
8771	}
8772
8773	*copy_result = copy;
8774	return KERN_SUCCESS;
8775}
8776
8777/*
8778 *	vm_map_copyin_object:
8779 *
8780 *	Create a copy object from an object.
8781 *	Our caller donates an object reference.
8782 */
8783
8784kern_return_t
8785vm_map_copyin_object(
8786	vm_object_t		object,
8787	vm_object_offset_t	offset,	/* offset of region in object */
8788	vm_object_size_t	size,	/* size of region in object */
8789	vm_map_copy_t	*copy_result)	/* OUT */
8790{
8791	vm_map_copy_t	copy;		/* Resulting copy */
8792
8793	/*
8794	 *	We drop the object into a special copy object
8795	 *	that contains the object directly.
8796	 */
8797
8798	copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8799	copy->type = VM_MAP_COPY_OBJECT;
8800	copy->cpy_object = object;
8801	copy->offset = offset;
8802	copy->size = size;
8803
8804	*copy_result = copy;
8805	return(KERN_SUCCESS);
8806}
8807
8808static void
8809vm_map_fork_share(
8810	vm_map_t	old_map,
8811	vm_map_entry_t	old_entry,
8812	vm_map_t	new_map)
8813{
8814	vm_object_t 	object;
8815	vm_map_entry_t 	new_entry;
8816
8817	/*
8818	 *	New sharing code.  New map entry
8819	 *	references original object.  Internal
8820	 *	objects use asynchronous copy algorithm for
8821	 *	future copies.  First make sure we have
8822	 *	the right object.  If we need a shadow,
8823	 *	or someone else already has one, then
8824	 *	make a new shadow and share it.
8825	 */
8826
8827	object = old_entry->object.vm_object;
8828	if (old_entry->is_sub_map) {
8829		assert(old_entry->wired_count == 0);
8830#ifndef NO_NESTED_PMAP
8831		if(old_entry->use_pmap) {
8832			kern_return_t	result;
8833
8834			result = pmap_nest(new_map->pmap,
8835					   (old_entry->object.sub_map)->pmap,
8836					   (addr64_t)old_entry->vme_start,
8837					   (addr64_t)old_entry->vme_start,
8838					   (uint64_t)(old_entry->vme_end - old_entry->vme_start));
8839			if(result)
8840				panic("vm_map_fork_share: pmap_nest failed!");
8841		}
8842#endif	/* NO_NESTED_PMAP */
8843	} else if (object == VM_OBJECT_NULL) {
8844		object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
8845							    old_entry->vme_start));
8846		old_entry->offset = 0;
8847		old_entry->object.vm_object = object;
8848		assert(!old_entry->needs_copy);
8849	} else if (object->copy_strategy !=
8850		   MEMORY_OBJECT_COPY_SYMMETRIC) {
8851
8852		/*
8853		 *	We are already using an asymmetric
8854		 *	copy, and therefore we already have
8855		 *	the right object.
8856		 */
8857
8858		assert(! old_entry->needs_copy);
8859	}
8860	else if (old_entry->needs_copy ||	/* case 1 */
8861		 object->shadowed ||		/* case 2 */
8862		 (!object->true_share && 	/* case 3 */
8863		  !old_entry->is_shared &&
8864		  (object->vo_size >
8865		   (vm_map_size_t)(old_entry->vme_end -
8866				   old_entry->vme_start)))) {
8867
8868		/*
8869		 *	We need to create a shadow.
8870		 *	There are three cases here.
8871		 *	In the first case, we need to
8872		 *	complete a deferred symmetrical
8873		 *	copy that we participated in.
8874		 *	In the second and third cases,
8875		 *	we need to create the shadow so
8876		 *	that changes that we make to the
8877		 *	object do not interfere with
8878		 *	any symmetrical copies which
8879		 *	have occured (case 2) or which
8880		 *	might occur (case 3).
8881		 *
8882		 *	The first case is when we had
8883		 *	deferred shadow object creation
8884		 *	via the entry->needs_copy mechanism.
8885		 *	This mechanism only works when
8886		 *	only one entry points to the source
8887		 *	object, and we are about to create
8888		 *	a second entry pointing to the
8889		 *	same object. The problem is that
8890		 *	there is no way of mapping from
8891		 *	an object to the entries pointing
8892		 *	to it. (Deferred shadow creation
8893		 *	works with one entry because occurs
8894		 *	at fault time, and we walk from the
8895		 *	entry to the object when handling
8896		 *	the fault.)
8897		 *
8898		 *	The second case is when the object
8899		 *	to be shared has already been copied
8900		 *	with a symmetric copy, but we point
8901		 *	directly to the object without
8902		 *	needs_copy set in our entry. (This
8903		 *	can happen because different ranges
8904		 *	of an object can be pointed to by
8905		 *	different entries. In particular,
8906		 *	a single entry pointing to an object
8907		 *	can be split by a call to vm_inherit,
8908		 *	which, combined with task_create, can
8909		 *	result in the different entries
8910		 *	having different needs_copy values.)
8911		 *	The shadowed flag in the object allows
8912		 *	us to detect this case. The problem
8913		 *	with this case is that if this object
8914		 *	has or will have shadows, then we
8915		 *	must not perform an asymmetric copy
8916		 *	of this object, since such a copy
8917		 *	allows the object to be changed, which
8918		 *	will break the previous symmetrical
8919		 *	copies (which rely upon the object
8920		 *	not changing). In a sense, the shadowed
8921		 *	flag says "don't change this object".
8922		 *	We fix this by creating a shadow
8923		 *	object for this object, and sharing
8924		 *	that. This works because we are free
8925		 *	to change the shadow object (and thus
8926		 *	to use an asymmetric copy strategy);
8927		 *	this is also semantically correct,
8928		 *	since this object is temporary, and
8929		 *	therefore a copy of the object is
8930		 *	as good as the object itself. (This
8931		 *	is not true for permanent objects,
8932		 *	since the pager needs to see changes,
8933		 *	which won't happen if the changes
8934		 *	are made to a copy.)
8935		 *
8936		 *	The third case is when the object
8937		 *	to be shared has parts sticking
8938		 *	outside of the entry we're working
8939		 *	with, and thus may in the future
8940		 *	be subject to a symmetrical copy.
8941		 *	(This is a preemptive version of
8942		 *	case 2.)
8943		 */
8944		vm_object_shadow(&old_entry->object.vm_object,
8945				 &old_entry->offset,
8946				 (vm_map_size_t) (old_entry->vme_end -
8947						  old_entry->vme_start));
8948
8949		/*
8950		 *	If we're making a shadow for other than
8951		 *	copy on write reasons, then we have
8952		 *	to remove write permission.
8953		 */
8954
8955		if (!old_entry->needs_copy &&
8956		    (old_entry->protection & VM_PROT_WRITE)) {
8957		        vm_prot_t prot;
8958
8959			prot = old_entry->protection & ~VM_PROT_WRITE;
8960
8961			if (override_nx(old_map, old_entry->alias) && prot)
8962			        prot |= VM_PROT_EXECUTE;
8963
8964			if (old_map->mapped_in_other_pmaps) {
8965				vm_object_pmap_protect(
8966					old_entry->object.vm_object,
8967					old_entry->offset,
8968					(old_entry->vme_end -
8969					 old_entry->vme_start),
8970					PMAP_NULL,
8971					old_entry->vme_start,
8972					prot);
8973			} else {
8974				pmap_protect(old_map->pmap,
8975					     old_entry->vme_start,
8976					     old_entry->vme_end,
8977					     prot);
8978			}
8979		}
8980
8981		old_entry->needs_copy = FALSE;
8982		object = old_entry->object.vm_object;
8983	}
8984
8985
8986	/*
8987	 *	If object was using a symmetric copy strategy,
8988	 *	change its copy strategy to the default
8989	 *	asymmetric copy strategy, which is copy_delay
8990	 *	in the non-norma case and copy_call in the
8991	 *	norma case. Bump the reference count for the
8992	 *	new entry.
8993	 */
8994
8995	if(old_entry->is_sub_map) {
8996		vm_map_lock(old_entry->object.sub_map);
8997		vm_map_reference(old_entry->object.sub_map);
8998		vm_map_unlock(old_entry->object.sub_map);
8999	} else {
9000		vm_object_lock(object);
9001		vm_object_reference_locked(object);
9002		if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
9003			object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
9004		}
9005		vm_object_unlock(object);
9006	}
9007
9008	/*
9009	 *	Clone the entry, using object ref from above.
9010	 *	Mark both entries as shared.
9011	 */
9012
9013	new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
9014							  * map or descendants */
9015	vm_map_entry_copy(new_entry, old_entry);
9016	old_entry->is_shared = TRUE;
9017	new_entry->is_shared = TRUE;
9018
9019	/*
9020	 *	Insert the entry into the new map -- we
9021	 *	know we're inserting at the end of the new
9022	 *	map.
9023	 */
9024
9025	vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
9026
9027	/*
9028	 *	Update the physical map
9029	 */
9030
9031	if (old_entry->is_sub_map) {
9032		/* Bill Angell pmap support goes here */
9033	} else {
9034		pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
9035			  old_entry->vme_end - old_entry->vme_start,
9036			  old_entry->vme_start);
9037	}
9038}
9039
9040static boolean_t
9041vm_map_fork_copy(
9042	vm_map_t	old_map,
9043	vm_map_entry_t	*old_entry_p,
9044	vm_map_t	new_map)
9045{
9046	vm_map_entry_t old_entry = *old_entry_p;
9047	vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
9048	vm_map_offset_t start = old_entry->vme_start;
9049	vm_map_copy_t copy;
9050	vm_map_entry_t last = vm_map_last_entry(new_map);
9051
9052	vm_map_unlock(old_map);
9053	/*
9054	 *	Use maxprot version of copyin because we
9055	 *	care about whether this memory can ever
9056	 *	be accessed, not just whether it's accessible
9057	 *	right now.
9058	 */
9059	if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
9060	    != KERN_SUCCESS) {
9061		/*
9062		 *	The map might have changed while it
9063		 *	was unlocked, check it again.  Skip
9064		 *	any blank space or permanently
9065		 *	unreadable region.
9066		 */
9067		vm_map_lock(old_map);
9068		if (!vm_map_lookup_entry(old_map, start, &last) ||
9069		    (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
9070			last = last->vme_next;
9071		}
9072		*old_entry_p = last;
9073
9074		/*
9075		 * XXX	For some error returns, want to
9076		 * XXX	skip to the next element.  Note
9077		 *	that INVALID_ADDRESS and
9078		 *	PROTECTION_FAILURE are handled above.
9079		 */
9080
9081		return FALSE;
9082	}
9083
9084	/*
9085	 *	Insert the copy into the new map
9086	 */
9087
9088	vm_map_copy_insert(new_map, last, copy);
9089
9090	/*
9091	 *	Pick up the traversal at the end of
9092	 *	the copied region.
9093	 */
9094
9095	vm_map_lock(old_map);
9096	start += entry_size;
9097	if (! vm_map_lookup_entry(old_map, start, &last)) {
9098		last = last->vme_next;
9099	} else {
9100		if (last->vme_start == start) {
9101			/*
9102			 * No need to clip here and we don't
9103			 * want to cause any unnecessary
9104			 * unnesting...
9105			 */
9106		} else {
9107			vm_map_clip_start(old_map, last, start);
9108		}
9109	}
9110	*old_entry_p = last;
9111
9112	return TRUE;
9113}
9114
9115/*
9116 *	vm_map_fork:
9117 *
9118 *	Create and return a new map based on the old
9119 *	map, according to the inheritance values on the
9120 *	regions in that map.
9121 *
9122 *	The source map must not be locked.
9123 */
9124vm_map_t
9125vm_map_fork(
9126	ledger_t	ledger,
9127	vm_map_t	old_map)
9128{
9129	pmap_t		new_pmap;
9130	vm_map_t	new_map;
9131	vm_map_entry_t	old_entry;
9132	vm_map_size_t	new_size = 0, entry_size;
9133	vm_map_entry_t	new_entry;
9134	boolean_t	src_needs_copy;
9135	boolean_t	new_entry_needs_copy;
9136
9137	new_pmap = pmap_create(ledger, (vm_map_size_t) 0,
9138#if defined(__i386__) || defined(__x86_64__)
9139			       old_map->pmap->pm_task_map != TASK_MAP_32BIT
9140#else
9141#error Unknown architecture.
9142#endif
9143			       );
9144
9145	vm_map_reference_swap(old_map);
9146	vm_map_lock(old_map);
9147
9148	new_map = vm_map_create(new_pmap,
9149				old_map->min_offset,
9150				old_map->max_offset,
9151				old_map->hdr.entries_pageable);
9152	/* inherit the parent map's page size */
9153	vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
9154	for (
9155		old_entry = vm_map_first_entry(old_map);
9156		old_entry != vm_map_to_entry(old_map);
9157		) {
9158
9159		entry_size = old_entry->vme_end - old_entry->vme_start;
9160
9161		switch (old_entry->inheritance) {
9162		case VM_INHERIT_NONE:
9163			break;
9164
9165		case VM_INHERIT_SHARE:
9166			vm_map_fork_share(old_map, old_entry, new_map);
9167			new_size += entry_size;
9168			break;
9169
9170		case VM_INHERIT_COPY:
9171
9172			/*
9173			 *	Inline the copy_quickly case;
9174			 *	upon failure, fall back on call
9175			 *	to vm_map_fork_copy.
9176			 */
9177
9178			if(old_entry->is_sub_map)
9179				break;
9180			if ((old_entry->wired_count != 0) ||
9181			    ((old_entry->object.vm_object != NULL) &&
9182			     (old_entry->object.vm_object->true_share))) {
9183				goto slow_vm_map_fork_copy;
9184			}
9185
9186			new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
9187			vm_map_entry_copy(new_entry, old_entry);
9188			/* clear address space specifics */
9189			new_entry->use_pmap = FALSE;
9190
9191			if (! vm_object_copy_quickly(
9192				    &new_entry->object.vm_object,
9193				    old_entry->offset,
9194				    (old_entry->vme_end -
9195				     old_entry->vme_start),
9196				    &src_needs_copy,
9197				    &new_entry_needs_copy)) {
9198				vm_map_entry_dispose(new_map, new_entry);
9199				goto slow_vm_map_fork_copy;
9200			}
9201
9202			/*
9203			 *	Handle copy-on-write obligations
9204			 */
9205
9206			if (src_needs_copy && !old_entry->needs_copy) {
9207			        vm_prot_t prot;
9208
9209				prot = old_entry->protection & ~VM_PROT_WRITE;
9210
9211				if (override_nx(old_map, old_entry->alias) && prot)
9212				        prot |= VM_PROT_EXECUTE;
9213
9214				vm_object_pmap_protect(
9215					old_entry->object.vm_object,
9216					old_entry->offset,
9217					(old_entry->vme_end -
9218					 old_entry->vme_start),
9219					((old_entry->is_shared
9220					  || old_map->mapped_in_other_pmaps)
9221					 ? PMAP_NULL :
9222					 old_map->pmap),
9223					old_entry->vme_start,
9224					prot);
9225
9226				old_entry->needs_copy = TRUE;
9227			}
9228			new_entry->needs_copy = new_entry_needs_copy;
9229
9230			/*
9231			 *	Insert the entry at the end
9232			 *	of the map.
9233			 */
9234
9235			vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
9236					  new_entry);
9237			new_size += entry_size;
9238			break;
9239
9240		slow_vm_map_fork_copy:
9241			if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
9242				new_size += entry_size;
9243			}
9244			continue;
9245		}
9246		old_entry = old_entry->vme_next;
9247	}
9248
9249	new_map->size = new_size;
9250	vm_map_unlock(old_map);
9251	vm_map_deallocate(old_map);
9252
9253	return(new_map);
9254}
9255
9256/*
9257 * vm_map_exec:
9258 *
9259 * 	Setup the "new_map" with the proper execution environment according
9260 *	to the type of executable (platform, 64bit, chroot environment).
9261 *	Map the comm page and shared region, etc...
9262 */
9263kern_return_t
9264vm_map_exec(
9265	vm_map_t	new_map,
9266	task_t		task,
9267	void		*fsroot,
9268	cpu_type_t	cpu)
9269{
9270	SHARED_REGION_TRACE_DEBUG(
9271		("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
9272		 current_task(), new_map, task, fsroot, cpu));
9273	(void) vm_commpage_enter(new_map, task);
9274	(void) vm_shared_region_enter(new_map, task, fsroot, cpu);
9275	SHARED_REGION_TRACE_DEBUG(
9276		("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
9277		 current_task(), new_map, task, fsroot, cpu));
9278	return KERN_SUCCESS;
9279}
9280
9281/*
9282 *	vm_map_lookup_locked:
9283 *
9284 *	Finds the VM object, offset, and
9285 *	protection for a given virtual address in the
9286 *	specified map, assuming a page fault of the
9287 *	type specified.
9288 *
9289 *	Returns the (object, offset, protection) for
9290 *	this address, whether it is wired down, and whether
9291 *	this map has the only reference to the data in question.
9292 *	In order to later verify this lookup, a "version"
9293 *	is returned.
9294 *
9295 *	The map MUST be locked by the caller and WILL be
9296 *	locked on exit.  In order to guarantee the
9297 *	existence of the returned object, it is returned
9298 *	locked.
9299 *
9300 *	If a lookup is requested with "write protection"
9301 *	specified, the map may be changed to perform virtual
9302 *	copying operations, although the data referenced will
9303 *	remain the same.
9304 */
9305kern_return_t
9306vm_map_lookup_locked(
9307	vm_map_t		*var_map,	/* IN/OUT */
9308	vm_map_offset_t		vaddr,
9309	vm_prot_t		fault_type,
9310	int			object_lock_type,
9311	vm_map_version_t	*out_version,	/* OUT */
9312	vm_object_t		*object,	/* OUT */
9313	vm_object_offset_t	*offset,	/* OUT */
9314	vm_prot_t		*out_prot,	/* OUT */
9315	boolean_t		*wired,		/* OUT */
9316	vm_object_fault_info_t	fault_info,	/* OUT */
9317	vm_map_t		*real_map)
9318{
9319	vm_map_entry_t			entry;
9320	register vm_map_t		map = *var_map;
9321	vm_map_t			old_map = *var_map;
9322	vm_map_t			cow_sub_map_parent = VM_MAP_NULL;
9323	vm_map_offset_t			cow_parent_vaddr = 0;
9324	vm_map_offset_t			old_start = 0;
9325	vm_map_offset_t			old_end = 0;
9326	register vm_prot_t		prot;
9327	boolean_t			mask_protections;
9328	vm_prot_t			original_fault_type;
9329
9330	/*
9331	 * VM_PROT_MASK means that the caller wants us to use "fault_type"
9332	 * as a mask against the mapping's actual protections, not as an
9333	 * absolute value.
9334	 */
9335	mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
9336	fault_type &= ~VM_PROT_IS_MASK;
9337	original_fault_type = fault_type;
9338
9339	*real_map = map;
9340
9341RetryLookup:
9342	fault_type = original_fault_type;
9343
9344	/*
9345	 *	If the map has an interesting hint, try it before calling
9346	 *	full blown lookup routine.
9347	 */
9348	entry = map->hint;
9349
9350	if ((entry == vm_map_to_entry(map)) ||
9351	    (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
9352		vm_map_entry_t	tmp_entry;
9353
9354		/*
9355		 *	Entry was either not a valid hint, or the vaddr
9356		 *	was not contained in the entry, so do a full lookup.
9357		 */
9358		if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
9359			if((cow_sub_map_parent) && (cow_sub_map_parent != map))
9360				vm_map_unlock(cow_sub_map_parent);
9361			if((*real_map != map)
9362			   && (*real_map != cow_sub_map_parent))
9363				vm_map_unlock(*real_map);
9364			return KERN_INVALID_ADDRESS;
9365		}
9366
9367		entry = tmp_entry;
9368	}
9369	if(map == old_map) {
9370		old_start = entry->vme_start;
9371		old_end = entry->vme_end;
9372	}
9373
9374	/*
9375	 *	Handle submaps.  Drop lock on upper map, submap is
9376	 *	returned locked.
9377	 */
9378
9379submap_recurse:
9380	if (entry->is_sub_map) {
9381		vm_map_offset_t		local_vaddr;
9382		vm_map_offset_t		end_delta;
9383		vm_map_offset_t		start_delta;
9384		vm_map_entry_t		submap_entry;
9385		boolean_t		mapped_needs_copy=FALSE;
9386
9387		local_vaddr = vaddr;
9388
9389		if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
9390			/* if real_map equals map we unlock below */
9391			if ((*real_map != map) &&
9392			    (*real_map != cow_sub_map_parent))
9393				vm_map_unlock(*real_map);
9394			*real_map = entry->object.sub_map;
9395		}
9396
9397		if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
9398			if (!mapped_needs_copy) {
9399				if (vm_map_lock_read_to_write(map)) {
9400					vm_map_lock_read(map);
9401					*real_map = map;
9402					goto RetryLookup;
9403				}
9404				vm_map_lock_read(entry->object.sub_map);
9405				*var_map = entry->object.sub_map;
9406				cow_sub_map_parent = map;
9407				/* reset base to map before cow object */
9408				/* this is the map which will accept   */
9409				/* the new cow object */
9410				old_start = entry->vme_start;
9411				old_end = entry->vme_end;
9412				cow_parent_vaddr = vaddr;
9413				mapped_needs_copy = TRUE;
9414			} else {
9415				vm_map_lock_read(entry->object.sub_map);
9416				*var_map = entry->object.sub_map;
9417				if((cow_sub_map_parent != map) &&
9418				   (*real_map != map))
9419					vm_map_unlock(map);
9420			}
9421		} else {
9422			vm_map_lock_read(entry->object.sub_map);
9423			*var_map = entry->object.sub_map;
9424			/* leave map locked if it is a target */
9425			/* cow sub_map above otherwise, just  */
9426			/* follow the maps down to the object */
9427			/* here we unlock knowing we are not  */
9428			/* revisiting the map.  */
9429			if((*real_map != map) && (map != cow_sub_map_parent))
9430				vm_map_unlock_read(map);
9431		}
9432
9433		map = *var_map;
9434
9435		/* calculate the offset in the submap for vaddr */
9436		local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
9437
9438	RetrySubMap:
9439		if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
9440			if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
9441				vm_map_unlock(cow_sub_map_parent);
9442			}
9443			if((*real_map != map)
9444			   && (*real_map != cow_sub_map_parent)) {
9445				vm_map_unlock(*real_map);
9446			}
9447			*real_map = map;
9448			return KERN_INVALID_ADDRESS;
9449		}
9450
9451		/* find the attenuated shadow of the underlying object */
9452		/* on our target map */
9453
9454		/* in english the submap object may extend beyond the     */
9455		/* region mapped by the entry or, may only fill a portion */
9456		/* of it.  For our purposes, we only care if the object   */
9457		/* doesn't fill.  In this case the area which will        */
9458		/* ultimately be clipped in the top map will only need    */
9459		/* to be as big as the portion of the underlying entry    */
9460		/* which is mapped */
9461		start_delta = submap_entry->vme_start > entry->offset ?
9462			submap_entry->vme_start - entry->offset : 0;
9463
9464		end_delta =
9465			(entry->offset + start_delta + (old_end - old_start)) <=
9466			submap_entry->vme_end ?
9467			0 : (entry->offset +
9468			     (old_end - old_start))
9469			- submap_entry->vme_end;
9470
9471		old_start += start_delta;
9472		old_end -= end_delta;
9473
9474		if(submap_entry->is_sub_map) {
9475			entry = submap_entry;
9476			vaddr = local_vaddr;
9477			goto submap_recurse;
9478		}
9479
9480		if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
9481
9482			vm_object_t	sub_object, copy_object;
9483			vm_object_offset_t copy_offset;
9484			vm_map_offset_t	local_start;
9485			vm_map_offset_t	local_end;
9486			boolean_t		copied_slowly = FALSE;
9487
9488			if (vm_map_lock_read_to_write(map)) {
9489				vm_map_lock_read(map);
9490				old_start -= start_delta;
9491				old_end += end_delta;
9492				goto RetrySubMap;
9493			}
9494
9495
9496			sub_object = submap_entry->object.vm_object;
9497			if (sub_object == VM_OBJECT_NULL) {
9498				sub_object =
9499					vm_object_allocate(
9500						(vm_map_size_t)
9501						(submap_entry->vme_end -
9502						 submap_entry->vme_start));
9503				submap_entry->object.vm_object = sub_object;
9504				submap_entry->offset = 0;
9505			}
9506			local_start =  local_vaddr -
9507				(cow_parent_vaddr - old_start);
9508			local_end = local_vaddr +
9509				(old_end - cow_parent_vaddr);
9510			vm_map_clip_start(map, submap_entry, local_start);
9511			vm_map_clip_end(map, submap_entry, local_end);
9512			/* unnesting was done in vm_map_clip_start/end() */
9513			assert(!submap_entry->use_pmap);
9514
9515			/* This is the COW case, lets connect */
9516			/* an entry in our space to the underlying */
9517			/* object in the submap, bypassing the  */
9518			/* submap. */
9519
9520
9521			if(submap_entry->wired_count != 0 ||
9522			   (sub_object->copy_strategy ==
9523			    MEMORY_OBJECT_COPY_NONE)) {
9524				vm_object_lock(sub_object);
9525				vm_object_copy_slowly(sub_object,
9526						      submap_entry->offset,
9527						      (submap_entry->vme_end -
9528						       submap_entry->vme_start),
9529						      FALSE,
9530						      &copy_object);
9531				copied_slowly = TRUE;
9532			} else {
9533
9534				/* set up shadow object */
9535				copy_object = sub_object;
9536				vm_object_reference(copy_object);
9537				sub_object->shadowed = TRUE;
9538				submap_entry->needs_copy = TRUE;
9539
9540				prot = submap_entry->protection & ~VM_PROT_WRITE;
9541
9542				if (override_nx(old_map, submap_entry->alias) && prot)
9543				        prot |= VM_PROT_EXECUTE;
9544
9545				vm_object_pmap_protect(
9546					sub_object,
9547					submap_entry->offset,
9548					submap_entry->vme_end -
9549					submap_entry->vme_start,
9550					(submap_entry->is_shared
9551					 || map->mapped_in_other_pmaps) ?
9552					PMAP_NULL : map->pmap,
9553					submap_entry->vme_start,
9554					prot);
9555			}
9556
9557			/*
9558			 * Adjust the fault offset to the submap entry.
9559			 */
9560			copy_offset = (local_vaddr -
9561				       submap_entry->vme_start +
9562				       submap_entry->offset);
9563
9564			/* This works diffently than the   */
9565			/* normal submap case. We go back  */
9566			/* to the parent of the cow map and*/
9567			/* clip out the target portion of  */
9568			/* the sub_map, substituting the   */
9569			/* new copy object,                */
9570
9571			vm_map_unlock(map);
9572			local_start = old_start;
9573			local_end = old_end;
9574			map = cow_sub_map_parent;
9575			*var_map = cow_sub_map_parent;
9576			vaddr = cow_parent_vaddr;
9577			cow_sub_map_parent = NULL;
9578
9579			if(!vm_map_lookup_entry(map,
9580						vaddr, &entry)) {
9581				vm_object_deallocate(
9582					copy_object);
9583				vm_map_lock_write_to_read(map);
9584				return KERN_INVALID_ADDRESS;
9585			}
9586
9587			/* clip out the portion of space */
9588			/* mapped by the sub map which   */
9589			/* corresponds to the underlying */
9590			/* object */
9591
9592			/*
9593			 * Clip (and unnest) the smallest nested chunk
9594			 * possible around the faulting address...
9595			 */
9596			local_start = vaddr & ~(pmap_nesting_size_min - 1);
9597			local_end = local_start + pmap_nesting_size_min;
9598			/*
9599			 * ... but don't go beyond the "old_start" to "old_end"
9600			 * range, to avoid spanning over another VM region
9601			 * with a possibly different VM object and/or offset.
9602			 */
9603			if (local_start < old_start) {
9604				local_start = old_start;
9605			}
9606			if (local_end > old_end) {
9607				local_end = old_end;
9608			}
9609			/*
9610			 * Adjust copy_offset to the start of the range.
9611			 */
9612			copy_offset -= (vaddr - local_start);
9613
9614			vm_map_clip_start(map, entry, local_start);
9615			vm_map_clip_end(map, entry, local_end);
9616			/* unnesting was done in vm_map_clip_start/end() */
9617			assert(!entry->use_pmap);
9618
9619			/* substitute copy object for */
9620			/* shared map entry           */
9621			vm_map_deallocate(entry->object.sub_map);
9622			entry->is_sub_map = FALSE;
9623			entry->object.vm_object = copy_object;
9624
9625			/* propagate the submap entry's protections */
9626			entry->protection |= submap_entry->protection;
9627			entry->max_protection |= submap_entry->max_protection;
9628
9629			if(copied_slowly) {
9630				entry->offset = local_start - old_start;
9631				entry->needs_copy = FALSE;
9632				entry->is_shared = FALSE;
9633			} else {
9634				entry->offset = copy_offset;
9635				entry->needs_copy = TRUE;
9636				if(entry->inheritance == VM_INHERIT_SHARE)
9637					entry->inheritance = VM_INHERIT_COPY;
9638				if (map != old_map)
9639					entry->is_shared = TRUE;
9640			}
9641			if(entry->inheritance == VM_INHERIT_SHARE)
9642				entry->inheritance = VM_INHERIT_COPY;
9643
9644			vm_map_lock_write_to_read(map);
9645		} else {
9646			if((cow_sub_map_parent)
9647			   && (cow_sub_map_parent != *real_map)
9648			   && (cow_sub_map_parent != map)) {
9649				vm_map_unlock(cow_sub_map_parent);
9650			}
9651			entry = submap_entry;
9652			vaddr = local_vaddr;
9653		}
9654	}
9655
9656	/*
9657	 *	Check whether this task is allowed to have
9658	 *	this page.
9659	 */
9660
9661	prot = entry->protection;
9662
9663	if (override_nx(old_map, entry->alias) && prot) {
9664	        /*
9665		 * HACK -- if not a stack, then allow execution
9666		 */
9667	        prot |= VM_PROT_EXECUTE;
9668	}
9669
9670	if (mask_protections) {
9671		fault_type &= prot;
9672		if (fault_type == VM_PROT_NONE) {
9673			goto protection_failure;
9674		}
9675	}
9676	if ((fault_type & (prot)) != fault_type) {
9677	protection_failure:
9678		if (*real_map != map) {
9679			vm_map_unlock(*real_map);
9680		}
9681		*real_map = map;
9682
9683		if ((fault_type & VM_PROT_EXECUTE) && prot)
9684		        log_stack_execution_failure((addr64_t)vaddr, prot);
9685
9686		DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
9687		return KERN_PROTECTION_FAILURE;
9688	}
9689
9690	/*
9691	 *	If this page is not pageable, we have to get
9692	 *	it for all possible accesses.
9693	 */
9694
9695	*wired = (entry->wired_count != 0);
9696	if (*wired)
9697	        fault_type = prot;
9698
9699	/*
9700	 *	If the entry was copy-on-write, we either ...
9701	 */
9702
9703	if (entry->needs_copy) {
9704	    	/*
9705		 *	If we want to write the page, we may as well
9706		 *	handle that now since we've got the map locked.
9707		 *
9708		 *	If we don't need to write the page, we just
9709		 *	demote the permissions allowed.
9710		 */
9711
9712		if ((fault_type & VM_PROT_WRITE) || *wired) {
9713			/*
9714			 *	Make a new object, and place it in the
9715			 *	object chain.  Note that no new references
9716			 *	have appeared -- one just moved from the
9717			 *	map to the new object.
9718			 */
9719
9720			if (vm_map_lock_read_to_write(map)) {
9721				vm_map_lock_read(map);
9722				goto RetryLookup;
9723			}
9724			vm_object_shadow(&entry->object.vm_object,
9725					 &entry->offset,
9726					 (vm_map_size_t) (entry->vme_end -
9727							  entry->vme_start));
9728
9729			entry->object.vm_object->shadowed = TRUE;
9730			entry->needs_copy = FALSE;
9731			vm_map_lock_write_to_read(map);
9732		}
9733		else {
9734			/*
9735			 *	We're attempting to read a copy-on-write
9736			 *	page -- don't allow writes.
9737			 */
9738
9739			prot &= (~VM_PROT_WRITE);
9740		}
9741	}
9742
9743	/*
9744	 *	Create an object if necessary.
9745	 */
9746	if (entry->object.vm_object == VM_OBJECT_NULL) {
9747
9748		if (vm_map_lock_read_to_write(map)) {
9749			vm_map_lock_read(map);
9750			goto RetryLookup;
9751		}
9752
9753		entry->object.vm_object = vm_object_allocate(
9754			(vm_map_size_t)(entry->vme_end - entry->vme_start));
9755		entry->offset = 0;
9756		vm_map_lock_write_to_read(map);
9757	}
9758
9759	/*
9760	 *	Return the object/offset from this entry.  If the entry
9761	 *	was copy-on-write or empty, it has been fixed up.  Also
9762	 *	return the protection.
9763	 */
9764
9765        *offset = (vaddr - entry->vme_start) + entry->offset;
9766        *object = entry->object.vm_object;
9767	*out_prot = prot;
9768
9769	if (fault_info) {
9770		fault_info->interruptible = THREAD_UNINT; /* for now... */
9771		/* ... the caller will change "interruptible" if needed */
9772	        fault_info->cluster_size = 0;
9773		fault_info->user_tag = entry->alias;
9774	        fault_info->behavior = entry->behavior;
9775		fault_info->lo_offset = entry->offset;
9776		fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
9777		fault_info->no_cache  = entry->no_cache;
9778		fault_info->stealth = FALSE;
9779		fault_info->io_sync = FALSE;
9780		fault_info->cs_bypass = (entry->used_for_jit)? TRUE : FALSE;
9781		fault_info->mark_zf_absent = FALSE;
9782		fault_info->batch_pmap_op = FALSE;
9783	}
9784
9785	/*
9786	 *	Lock the object to prevent it from disappearing
9787	 */
9788	if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
9789	        vm_object_lock(*object);
9790	else
9791	        vm_object_lock_shared(*object);
9792
9793	/*
9794	 *	Save the version number
9795	 */
9796
9797	out_version->main_timestamp = map->timestamp;
9798
9799	return KERN_SUCCESS;
9800}
9801
9802
9803/*
9804 *	vm_map_verify:
9805 *
9806 *	Verifies that the map in question has not changed
9807 *	since the given version.  If successful, the map
9808 *	will not change until vm_map_verify_done() is called.
9809 */
9810boolean_t
9811vm_map_verify(
9812	register vm_map_t		map,
9813	register vm_map_version_t	*version)	/* REF */
9814{
9815	boolean_t	result;
9816
9817	vm_map_lock_read(map);
9818	result = (map->timestamp == version->main_timestamp);
9819
9820	if (!result)
9821		vm_map_unlock_read(map);
9822
9823	return(result);
9824}
9825
9826/*
9827 *	vm_map_verify_done:
9828 *
9829 *	Releases locks acquired by a vm_map_verify.
9830 *
9831 *	This is now a macro in vm/vm_map.h.  It does a
9832 *	vm_map_unlock_read on the map.
9833 */
9834
9835
9836/*
9837 *	TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
9838 *	Goes away after regular vm_region_recurse function migrates to
9839 *	64 bits
9840 *	vm_region_recurse: A form of vm_region which follows the
9841 *	submaps in a target map
9842 *
9843 */
9844
9845kern_return_t
9846vm_map_region_recurse_64(
9847	vm_map_t		 map,
9848	vm_map_offset_t	*address,		/* IN/OUT */
9849	vm_map_size_t		*size,			/* OUT */
9850	natural_t	 	*nesting_depth,	/* IN/OUT */
9851	vm_region_submap_info_64_t	submap_info,	/* IN/OUT */
9852	mach_msg_type_number_t	*count)	/* IN/OUT */
9853{
9854	mach_msg_type_number_t	original_count;
9855	vm_region_extended_info_data_t	extended;
9856	vm_map_entry_t			tmp_entry;
9857	vm_map_offset_t			user_address;
9858	unsigned int			user_max_depth;
9859
9860	/*
9861	 * "curr_entry" is the VM map entry preceding or including the
9862	 * address we're looking for.
9863	 * "curr_map" is the map or sub-map containing "curr_entry".
9864	 * "curr_address" is the equivalent of the top map's "user_address"
9865	 * in the current map.
9866	 * "curr_offset" is the cumulated offset of "curr_map" in the
9867	 * target task's address space.
9868	 * "curr_depth" is the depth of "curr_map" in the chain of
9869	 * sub-maps.
9870	 *
9871	 * "curr_max_below" and "curr_max_above" limit the range (around
9872	 * "curr_address") we should take into account in the current (sub)map.
9873	 * They limit the range to what's visible through the map entries
9874	 * we've traversed from the top map to the current map.
9875
9876	 */
9877	vm_map_entry_t			curr_entry;
9878	vm_map_address_t		curr_address;
9879	vm_map_offset_t			curr_offset;
9880	vm_map_t			curr_map;
9881	unsigned int			curr_depth;
9882	vm_map_offset_t			curr_max_below, curr_max_above;
9883	vm_map_offset_t			curr_skip;
9884
9885	/*
9886	 * "next_" is the same as "curr_" but for the VM region immediately
9887	 * after the address we're looking for.  We need to keep track of this
9888	 * too because we want to return info about that region if the
9889	 * address we're looking for is not mapped.
9890	 */
9891	vm_map_entry_t			next_entry;
9892	vm_map_offset_t			next_offset;
9893	vm_map_offset_t			next_address;
9894	vm_map_t			next_map;
9895	unsigned int			next_depth;
9896	vm_map_offset_t			next_max_below, next_max_above;
9897	vm_map_offset_t			next_skip;
9898
9899	boolean_t			look_for_pages;
9900	vm_region_submap_short_info_64_t short_info;
9901
9902	if (map == VM_MAP_NULL) {
9903		/* no address space to work on */
9904		return KERN_INVALID_ARGUMENT;
9905	}
9906
9907
9908	if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
9909		/*
9910		 * "info" structure is not big enough and
9911		 * would overflow
9912		 */
9913		return KERN_INVALID_ARGUMENT;
9914	}
9915
9916	original_count = *count;
9917
9918	if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
9919		*count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
9920		look_for_pages = FALSE;
9921		short_info = (vm_region_submap_short_info_64_t) submap_info;
9922		submap_info = NULL;
9923	} else {
9924		look_for_pages = TRUE;
9925		*count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
9926		short_info = NULL;
9927
9928		if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
9929			*count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
9930		}
9931	}
9932
9933	user_address = *address;
9934	user_max_depth = *nesting_depth;
9935
9936	curr_entry = NULL;
9937	curr_map = map;
9938	curr_address = user_address;
9939	curr_offset = 0;
9940	curr_skip = 0;
9941	curr_depth = 0;
9942	curr_max_above = ((vm_map_offset_t) -1) - curr_address;
9943	curr_max_below = curr_address;
9944
9945	next_entry = NULL;
9946	next_map = NULL;
9947	next_address = 0;
9948	next_offset = 0;
9949	next_skip = 0;
9950	next_depth = 0;
9951	next_max_above = (vm_map_offset_t) -1;
9952	next_max_below = (vm_map_offset_t) -1;
9953
9954	if (not_in_kdp) {
9955		vm_map_lock_read(curr_map);
9956	}
9957
9958	for (;;) {
9959		if (vm_map_lookup_entry(curr_map,
9960					curr_address,
9961					&tmp_entry)) {
9962			/* tmp_entry contains the address we're looking for */
9963			curr_entry = tmp_entry;
9964		} else {
9965			vm_map_offset_t skip;
9966			/*
9967			 * The address is not mapped.  "tmp_entry" is the
9968			 * map entry preceding the address.  We want the next
9969			 * one, if it exists.
9970			 */
9971			curr_entry = tmp_entry->vme_next;
9972
9973			if (curr_entry == vm_map_to_entry(curr_map) ||
9974			    (curr_entry->vme_start >=
9975			     curr_address + curr_max_above)) {
9976				/* no next entry at this level: stop looking */
9977				if (not_in_kdp) {
9978					vm_map_unlock_read(curr_map);
9979				}
9980				curr_entry = NULL;
9981				curr_map = NULL;
9982				curr_offset = 0;
9983				curr_depth = 0;
9984				curr_max_above = 0;
9985				curr_max_below = 0;
9986				break;
9987			}
9988
9989			/* adjust current address and offset */
9990			skip = curr_entry->vme_start - curr_address;
9991			curr_address = curr_entry->vme_start;
9992			curr_skip = skip;
9993			curr_offset += skip;
9994			curr_max_above -= skip;
9995			curr_max_below = 0;
9996		}
9997
9998		/*
9999		 * Is the next entry at this level closer to the address (or
10000		 * deeper in the submap chain) than the one we had
10001		 * so far ?
10002		 */
10003		tmp_entry = curr_entry->vme_next;
10004		if (tmp_entry == vm_map_to_entry(curr_map)) {
10005			/* no next entry at this level */
10006		} else if (tmp_entry->vme_start >=
10007			   curr_address + curr_max_above) {
10008			/*
10009			 * tmp_entry is beyond the scope of what we mapped of
10010			 * this submap in the upper level: ignore it.
10011			 */
10012		} else if ((next_entry == NULL) ||
10013			   (tmp_entry->vme_start + curr_offset <=
10014			    next_entry->vme_start + next_offset)) {
10015			/*
10016			 * We didn't have a "next_entry" or this one is
10017			 * closer to the address we're looking for:
10018			 * use this "tmp_entry" as the new "next_entry".
10019			 */
10020			if (next_entry != NULL) {
10021				/* unlock the last "next_map" */
10022				if (next_map != curr_map && not_in_kdp) {
10023					vm_map_unlock_read(next_map);
10024				}
10025			}
10026			next_entry = tmp_entry;
10027			next_map = curr_map;
10028			next_depth = curr_depth;
10029			next_address = next_entry->vme_start;
10030			next_skip = curr_skip;
10031			next_offset = curr_offset;
10032			next_offset += (next_address - curr_address);
10033			next_max_above = MIN(next_max_above, curr_max_above);
10034			next_max_above = MIN(next_max_above,
10035					     next_entry->vme_end - next_address);
10036			next_max_below = MIN(next_max_below, curr_max_below);
10037			next_max_below = MIN(next_max_below,
10038					     next_address - next_entry->vme_start);
10039		}
10040
10041		/*
10042		 * "curr_max_{above,below}" allow us to keep track of the
10043		 * portion of the submap that is actually mapped at this level:
10044		 * the rest of that submap is irrelevant to us, since it's not
10045		 * mapped here.
10046		 * The relevant portion of the map starts at
10047		 * "curr_entry->offset" up to the size of "curr_entry".
10048		 */
10049		curr_max_above = MIN(curr_max_above,
10050				     curr_entry->vme_end - curr_address);
10051		curr_max_below = MIN(curr_max_below,
10052				     curr_address - curr_entry->vme_start);
10053
10054		if (!curr_entry->is_sub_map ||
10055		    curr_depth >= user_max_depth) {
10056			/*
10057			 * We hit a leaf map or we reached the maximum depth
10058			 * we could, so stop looking.  Keep the current map
10059			 * locked.
10060			 */
10061			break;
10062		}
10063
10064		/*
10065		 * Get down to the next submap level.
10066		 */
10067
10068		/*
10069		 * Lock the next level and unlock the current level,
10070		 * unless we need to keep it locked to access the "next_entry"
10071		 * later.
10072		 */
10073		if (not_in_kdp) {
10074			vm_map_lock_read(curr_entry->object.sub_map);
10075		}
10076		if (curr_map == next_map) {
10077			/* keep "next_map" locked in case we need it */
10078		} else {
10079			/* release this map */
10080			if (not_in_kdp)
10081				vm_map_unlock_read(curr_map);
10082		}
10083
10084		/*
10085		 * Adjust the offset.  "curr_entry" maps the submap
10086		 * at relative address "curr_entry->vme_start" in the
10087		 * curr_map but skips the first "curr_entry->offset"
10088		 * bytes of the submap.
10089		 * "curr_offset" always represents the offset of a virtual
10090		 * address in the curr_map relative to the absolute address
10091		 * space (i.e. the top-level VM map).
10092		 */
10093		curr_offset +=
10094			(curr_entry->offset - curr_entry->vme_start);
10095		curr_address = user_address + curr_offset;
10096		/* switch to the submap */
10097		curr_map = curr_entry->object.sub_map;
10098		curr_depth++;
10099		curr_entry = NULL;
10100	}
10101
10102	if (curr_entry == NULL) {
10103		/* no VM region contains the address... */
10104		if (next_entry == NULL) {
10105			/* ... and no VM region follows it either */
10106			return KERN_INVALID_ADDRESS;
10107		}
10108		/* ... gather info about the next VM region */
10109		curr_entry = next_entry;
10110		curr_map = next_map;	/* still locked ... */
10111		curr_address = next_address;
10112		curr_skip = next_skip;
10113		curr_offset = next_offset;
10114		curr_depth = next_depth;
10115		curr_max_above = next_max_above;
10116		curr_max_below = next_max_below;
10117		if (curr_map == map) {
10118			user_address = curr_address;
10119		}
10120	} else {
10121		/* we won't need "next_entry" after all */
10122		if (next_entry != NULL) {
10123			/* release "next_map" */
10124			if (next_map != curr_map && not_in_kdp) {
10125				vm_map_unlock_read(next_map);
10126			}
10127		}
10128	}
10129	next_entry = NULL;
10130	next_map = NULL;
10131	next_offset = 0;
10132	next_skip = 0;
10133	next_depth = 0;
10134	next_max_below = -1;
10135	next_max_above = -1;
10136
10137	*nesting_depth = curr_depth;
10138	*size = curr_max_above + curr_max_below;
10139	*address = user_address + curr_skip - curr_max_below;
10140
10141// LP64todo: all the current tools are 32bit, obviously never worked for 64b
10142// so probably should be a real 32b ID vs. ptr.
10143// Current users just check for equality
10144#define INFO_MAKE_OBJECT_ID(p)	((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
10145
10146	if (look_for_pages) {
10147		submap_info->user_tag = curr_entry->alias;
10148		submap_info->offset = curr_entry->offset;
10149		submap_info->protection = curr_entry->protection;
10150		submap_info->inheritance = curr_entry->inheritance;
10151		submap_info->max_protection = curr_entry->max_protection;
10152		submap_info->behavior = curr_entry->behavior;
10153		submap_info->user_wired_count = curr_entry->user_wired_count;
10154		submap_info->is_submap = curr_entry->is_sub_map;
10155		submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
10156	} else {
10157		short_info->user_tag = curr_entry->alias;
10158		short_info->offset = curr_entry->offset;
10159		short_info->protection = curr_entry->protection;
10160		short_info->inheritance = curr_entry->inheritance;
10161		short_info->max_protection = curr_entry->max_protection;
10162		short_info->behavior = curr_entry->behavior;
10163		short_info->user_wired_count = curr_entry->user_wired_count;
10164		short_info->is_submap = curr_entry->is_sub_map;
10165		short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
10166	}
10167
10168	extended.pages_resident = 0;
10169	extended.pages_swapped_out = 0;
10170	extended.pages_shared_now_private = 0;
10171	extended.pages_dirtied = 0;
10172	extended.pages_reusable = 0;
10173	extended.external_pager = 0;
10174	extended.shadow_depth = 0;
10175
10176	if (not_in_kdp) {
10177		if (!curr_entry->is_sub_map) {
10178			vm_map_offset_t range_start, range_end;
10179			range_start = MAX((curr_address - curr_max_below),
10180					  curr_entry->vme_start);
10181			range_end = MIN((curr_address + curr_max_above),
10182					curr_entry->vme_end);
10183			vm_map_region_walk(curr_map,
10184					   range_start,
10185					   curr_entry,
10186					   (curr_entry->offset +
10187					    (range_start -
10188					     curr_entry->vme_start)),
10189					   range_end - range_start,
10190					   &extended,
10191					   look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
10192			if (extended.external_pager &&
10193			    extended.ref_count == 2 &&
10194			    extended.share_mode == SM_SHARED) {
10195				extended.share_mode = SM_PRIVATE;
10196			}
10197		} else {
10198			if (curr_entry->use_pmap) {
10199				extended.share_mode = SM_TRUESHARED;
10200			} else {
10201				extended.share_mode = SM_PRIVATE;
10202			}
10203			extended.ref_count =
10204				curr_entry->object.sub_map->ref_count;
10205		}
10206	}
10207
10208	if (look_for_pages) {
10209		submap_info->pages_resident = extended.pages_resident;
10210		submap_info->pages_swapped_out = extended.pages_swapped_out;
10211		submap_info->pages_shared_now_private =
10212			extended.pages_shared_now_private;
10213		submap_info->pages_dirtied = extended.pages_dirtied;
10214		submap_info->external_pager = extended.external_pager;
10215		submap_info->shadow_depth = extended.shadow_depth;
10216		submap_info->share_mode = extended.share_mode;
10217		submap_info->ref_count = extended.ref_count;
10218
10219		if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
10220			submap_info->pages_reusable = extended.pages_reusable;
10221		}
10222	} else {
10223		short_info->external_pager = extended.external_pager;
10224		short_info->shadow_depth = extended.shadow_depth;
10225		short_info->share_mode = extended.share_mode;
10226		short_info->ref_count = extended.ref_count;
10227	}
10228
10229	if (not_in_kdp) {
10230		vm_map_unlock_read(curr_map);
10231	}
10232
10233	return KERN_SUCCESS;
10234}
10235
10236/*
10237 *	vm_region:
10238 *
10239 *	User call to obtain information about a region in
10240 *	a task's address map. Currently, only one flavor is
10241 *	supported.
10242 *
10243 *	XXX The reserved and behavior fields cannot be filled
10244 *	    in until the vm merge from the IK is completed, and
10245 *	    vm_reserve is implemented.
10246 */
10247
10248kern_return_t
10249vm_map_region(
10250	vm_map_t		 map,
10251	vm_map_offset_t	*address,		/* IN/OUT */
10252	vm_map_size_t		*size,			/* OUT */
10253	vm_region_flavor_t	 flavor,		/* IN */
10254	vm_region_info_t	 info,			/* OUT */
10255	mach_msg_type_number_t	*count,	/* IN/OUT */
10256	mach_port_t		*object_name)		/* OUT */
10257{
10258	vm_map_entry_t		tmp_entry;
10259	vm_map_entry_t		entry;
10260	vm_map_offset_t		start;
10261
10262	if (map == VM_MAP_NULL)
10263		return(KERN_INVALID_ARGUMENT);
10264
10265	switch (flavor) {
10266
10267	case VM_REGION_BASIC_INFO:
10268		/* legacy for old 32-bit objects info */
10269	{
10270		vm_region_basic_info_t	basic;
10271
10272		if (*count < VM_REGION_BASIC_INFO_COUNT)
10273			return(KERN_INVALID_ARGUMENT);
10274
10275		basic = (vm_region_basic_info_t) info;
10276		*count = VM_REGION_BASIC_INFO_COUNT;
10277
10278		vm_map_lock_read(map);
10279
10280		start = *address;
10281		if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
10282			if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
10283				vm_map_unlock_read(map);
10284				return(KERN_INVALID_ADDRESS);
10285			}
10286		} else {
10287			entry = tmp_entry;
10288		}
10289
10290		start = entry->vme_start;
10291
10292		basic->offset = (uint32_t)entry->offset;
10293		basic->protection = entry->protection;
10294		basic->inheritance = entry->inheritance;
10295		basic->max_protection = entry->max_protection;
10296		basic->behavior = entry->behavior;
10297		basic->user_wired_count = entry->user_wired_count;
10298		basic->reserved = entry->is_sub_map;
10299		*address = start;
10300		*size = (entry->vme_end - start);
10301
10302		if (object_name) *object_name = IP_NULL;
10303		if (entry->is_sub_map) {
10304			basic->shared = FALSE;
10305		} else {
10306			basic->shared = entry->is_shared;
10307		}
10308
10309		vm_map_unlock_read(map);
10310		return(KERN_SUCCESS);
10311	}
10312
10313	case VM_REGION_BASIC_INFO_64:
10314	{
10315		vm_region_basic_info_64_t	basic;
10316
10317		if (*count < VM_REGION_BASIC_INFO_COUNT_64)
10318			return(KERN_INVALID_ARGUMENT);
10319
10320		basic = (vm_region_basic_info_64_t) info;
10321		*count = VM_REGION_BASIC_INFO_COUNT_64;
10322
10323		vm_map_lock_read(map);
10324
10325		start = *address;
10326		if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
10327			if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
10328				vm_map_unlock_read(map);
10329				return(KERN_INVALID_ADDRESS);
10330			}
10331		} else {
10332			entry = tmp_entry;
10333		}
10334
10335		start = entry->vme_start;
10336
10337		basic->offset = entry->offset;
10338		basic->protection = entry->protection;
10339		basic->inheritance = entry->inheritance;
10340		basic->max_protection = entry->max_protection;
10341		basic->behavior = entry->behavior;
10342		basic->user_wired_count = entry->user_wired_count;
10343		basic->reserved = entry->is_sub_map;
10344		*address = start;
10345		*size = (entry->vme_end - start);
10346
10347		if (object_name) *object_name = IP_NULL;
10348		if (entry->is_sub_map) {
10349			basic->shared = FALSE;
10350		} else {
10351			basic->shared = entry->is_shared;
10352		}
10353
10354		vm_map_unlock_read(map);
10355		return(KERN_SUCCESS);
10356	}
10357	case VM_REGION_EXTENDED_INFO:
10358		if (*count < VM_REGION_EXTENDED_INFO_COUNT)
10359			return(KERN_INVALID_ARGUMENT);
10360		/*fallthru*/
10361	case VM_REGION_EXTENDED_INFO__legacy:
10362		if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy)
10363			return KERN_INVALID_ARGUMENT;
10364
10365	{
10366		vm_region_extended_info_t	extended;
10367		mach_msg_type_number_t original_count;
10368
10369		extended = (vm_region_extended_info_t) info;
10370
10371		vm_map_lock_read(map);
10372
10373		start = *address;
10374		if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
10375			if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
10376				vm_map_unlock_read(map);
10377				return(KERN_INVALID_ADDRESS);
10378			}
10379		} else {
10380			entry = tmp_entry;
10381		}
10382		start = entry->vme_start;
10383
10384		extended->protection = entry->protection;
10385		extended->user_tag = entry->alias;
10386		extended->pages_resident = 0;
10387		extended->pages_swapped_out = 0;
10388		extended->pages_shared_now_private = 0;
10389		extended->pages_dirtied = 0;
10390		extended->external_pager = 0;
10391		extended->shadow_depth = 0;
10392
10393		original_count = *count;
10394		if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
10395			*count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
10396		} else {
10397			extended->pages_reusable = 0;
10398			*count = VM_REGION_EXTENDED_INFO_COUNT;
10399		}
10400
10401		vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE, *count);
10402
10403		if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
10404			extended->share_mode = SM_PRIVATE;
10405
10406		if (object_name)
10407			*object_name = IP_NULL;
10408		*address = start;
10409		*size = (entry->vme_end - start);
10410
10411		vm_map_unlock_read(map);
10412		return(KERN_SUCCESS);
10413	}
10414	case VM_REGION_TOP_INFO:
10415	{
10416		vm_region_top_info_t	top;
10417
10418		if (*count < VM_REGION_TOP_INFO_COUNT)
10419			return(KERN_INVALID_ARGUMENT);
10420
10421		top = (vm_region_top_info_t) info;
10422		*count = VM_REGION_TOP_INFO_COUNT;
10423
10424		vm_map_lock_read(map);
10425
10426		start = *address;
10427		if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
10428			if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
10429				vm_map_unlock_read(map);
10430				return(KERN_INVALID_ADDRESS);
10431			}
10432		} else {
10433			entry = tmp_entry;
10434
10435		}
10436		start = entry->vme_start;
10437
10438		top->private_pages_resident = 0;
10439		top->shared_pages_resident = 0;
10440
10441		vm_map_region_top_walk(entry, top);
10442
10443		if (object_name)
10444			*object_name = IP_NULL;
10445		*address = start;
10446		*size = (entry->vme_end - start);
10447
10448		vm_map_unlock_read(map);
10449		return(KERN_SUCCESS);
10450	}
10451	default:
10452		return(KERN_INVALID_ARGUMENT);
10453	}
10454}
10455
10456#define OBJ_RESIDENT_COUNT(obj, entry_size)				\
10457	MIN((entry_size),						\
10458	    ((obj)->all_reusable ?					\
10459	     (obj)->wired_page_count :					\
10460	     (obj)->resident_page_count - (obj)->reusable_page_count))
10461
10462void
10463vm_map_region_top_walk(
10464        vm_map_entry_t		   entry,
10465	vm_region_top_info_t       top)
10466{
10467
10468	if (entry->object.vm_object == 0 || entry->is_sub_map) {
10469		top->share_mode = SM_EMPTY;
10470		top->ref_count = 0;
10471		top->obj_id = 0;
10472		return;
10473	}
10474
10475	{
10476	        struct	vm_object *obj, *tmp_obj;
10477		int		ref_count;
10478		uint32_t	entry_size;
10479
10480		entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
10481
10482		obj = entry->object.vm_object;
10483
10484		vm_object_lock(obj);
10485
10486		if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
10487			ref_count--;
10488
10489		assert(obj->reusable_page_count <= obj->resident_page_count);
10490		if (obj->shadow) {
10491			if (ref_count == 1)
10492				top->private_pages_resident =
10493					OBJ_RESIDENT_COUNT(obj, entry_size);
10494			else
10495				top->shared_pages_resident =
10496					OBJ_RESIDENT_COUNT(obj, entry_size);
10497			top->ref_count  = ref_count;
10498			top->share_mode = SM_COW;
10499
10500			while ((tmp_obj = obj->shadow)) {
10501				vm_object_lock(tmp_obj);
10502				vm_object_unlock(obj);
10503				obj = tmp_obj;
10504
10505				if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
10506					ref_count--;
10507
10508				assert(obj->reusable_page_count <= obj->resident_page_count);
10509				top->shared_pages_resident +=
10510					OBJ_RESIDENT_COUNT(obj, entry_size);
10511				top->ref_count += ref_count - 1;
10512			}
10513		} else {
10514			if (entry->superpage_size) {
10515				top->share_mode = SM_LARGE_PAGE;
10516				top->shared_pages_resident = 0;
10517				top->private_pages_resident = entry_size;
10518			} else if (entry->needs_copy) {
10519				top->share_mode = SM_COW;
10520				top->shared_pages_resident =
10521					OBJ_RESIDENT_COUNT(obj, entry_size);
10522			} else {
10523				if (ref_count == 1 ||
10524				    (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
10525					top->share_mode = SM_PRIVATE;
10526						top->private_pages_resident =
10527							OBJ_RESIDENT_COUNT(obj,
10528									   entry_size);
10529				} else {
10530					top->share_mode = SM_SHARED;
10531					top->shared_pages_resident =
10532						OBJ_RESIDENT_COUNT(obj,
10533								  entry_size);
10534				}
10535			}
10536			top->ref_count = ref_count;
10537		}
10538		/* XXX K64: obj_id will be truncated */
10539		top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
10540
10541		vm_object_unlock(obj);
10542	}
10543}
10544
10545void
10546vm_map_region_walk(
10547	vm_map_t		   	map,
10548	vm_map_offset_t			va,
10549	vm_map_entry_t			entry,
10550	vm_object_offset_t		offset,
10551	vm_object_size_t		range,
10552	vm_region_extended_info_t	extended,
10553	boolean_t			look_for_pages,
10554	mach_msg_type_number_t count)
10555{
10556        register struct vm_object *obj, *tmp_obj;
10557	register vm_map_offset_t       last_offset;
10558	register int               i;
10559	register int               ref_count;
10560	struct vm_object	*shadow_object;
10561	int			shadow_depth;
10562
10563	if ((entry->object.vm_object == 0) ||
10564	    (entry->is_sub_map) ||
10565	    (entry->object.vm_object->phys_contiguous &&
10566	     !entry->superpage_size)) {
10567		extended->share_mode = SM_EMPTY;
10568		extended->ref_count = 0;
10569		return;
10570	}
10571
10572	if (entry->superpage_size) {
10573		extended->shadow_depth = 0;
10574		extended->share_mode = SM_LARGE_PAGE;
10575		extended->ref_count = 1;
10576		extended->external_pager = 0;
10577		extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
10578		extended->shadow_depth = 0;
10579		return;
10580	}
10581
10582	{
10583		obj = entry->object.vm_object;
10584
10585		vm_object_lock(obj);
10586
10587		if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
10588			ref_count--;
10589
10590		if (look_for_pages) {
10591			for (last_offset = offset + range;
10592			     offset < last_offset;
10593			     offset += PAGE_SIZE_64, va += PAGE_SIZE) {
10594					vm_map_region_look_for_page(map, va, obj,
10595								    offset, ref_count,
10596								    0, extended, count);
10597			}
10598		} else {
10599			shadow_object = obj->shadow;
10600			shadow_depth = 0;
10601
10602			if ( !(obj->pager_trusted) && !(obj->internal))
10603				extended->external_pager = 1;
10604
10605			if (shadow_object != VM_OBJECT_NULL) {
10606				vm_object_lock(shadow_object);
10607				for (;
10608				     shadow_object != VM_OBJECT_NULL;
10609				     shadow_depth++) {
10610					vm_object_t	next_shadow;
10611
10612					if ( !(shadow_object->pager_trusted) &&
10613					     !(shadow_object->internal))
10614						extended->external_pager = 1;
10615
10616					next_shadow = shadow_object->shadow;
10617					if (next_shadow) {
10618						vm_object_lock(next_shadow);
10619					}
10620					vm_object_unlock(shadow_object);
10621					shadow_object = next_shadow;
10622				}
10623			}
10624			extended->shadow_depth = shadow_depth;
10625		}
10626
10627		if (extended->shadow_depth || entry->needs_copy)
10628			extended->share_mode = SM_COW;
10629		else {
10630			if (ref_count == 1)
10631				extended->share_mode = SM_PRIVATE;
10632			else {
10633				if (obj->true_share)
10634					extended->share_mode = SM_TRUESHARED;
10635				else
10636					extended->share_mode = SM_SHARED;
10637			}
10638		}
10639		extended->ref_count = ref_count - extended->shadow_depth;
10640
10641		for (i = 0; i < extended->shadow_depth; i++) {
10642			if ((tmp_obj = obj->shadow) == 0)
10643				break;
10644			vm_object_lock(tmp_obj);
10645			vm_object_unlock(obj);
10646
10647			if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
10648				ref_count--;
10649
10650			extended->ref_count += ref_count;
10651			obj = tmp_obj;
10652		}
10653		vm_object_unlock(obj);
10654
10655		if (extended->share_mode == SM_SHARED) {
10656			register vm_map_entry_t	     cur;
10657			register vm_map_entry_t	     last;
10658			int      my_refs;
10659
10660			obj = entry->object.vm_object;
10661			last = vm_map_to_entry(map);
10662			my_refs = 0;
10663
10664			if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
10665				ref_count--;
10666			for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
10667				my_refs += vm_map_region_count_obj_refs(cur, obj);
10668
10669			if (my_refs == ref_count)
10670				extended->share_mode = SM_PRIVATE_ALIASED;
10671			else if (my_refs > 1)
10672				extended->share_mode = SM_SHARED_ALIASED;
10673		}
10674	}
10675}
10676
10677
10678/* object is locked on entry and locked on return */
10679
10680
10681static void
10682vm_map_region_look_for_page(
10683	__unused vm_map_t		map,
10684	__unused vm_map_offset_t	va,
10685	vm_object_t			object,
10686	vm_object_offset_t		offset,
10687	int				max_refcnt,
10688	int				depth,
10689	vm_region_extended_info_t	extended,
10690	mach_msg_type_number_t count)
10691{
10692        register vm_page_t	p;
10693        register vm_object_t	shadow;
10694	register int            ref_count;
10695	vm_object_t		caller_object;
10696	kern_return_t		kr;
10697	shadow = object->shadow;
10698	caller_object = object;
10699
10700
10701	while (TRUE) {
10702
10703		if ( !(object->pager_trusted) && !(object->internal))
10704			extended->external_pager = 1;
10705
10706		if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
10707	        	if (shadow && (max_refcnt == 1))
10708		    		extended->pages_shared_now_private++;
10709
10710			if (!p->fictitious &&
10711			    (p->dirty || pmap_is_modified(p->phys_page)))
10712		    		extended->pages_dirtied++;
10713			else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
10714				if (p->reusable || p->object->all_reusable) {
10715					extended->pages_reusable++;
10716				}
10717			}
10718
10719			extended->pages_resident++;
10720
10721			if(object != caller_object)
10722				vm_object_unlock(object);
10723
10724			return;
10725		}
10726#if	MACH_PAGEMAP
10727		if (object->existence_map) {
10728	    		if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
10729
10730	        		extended->pages_swapped_out++;
10731
10732				if(object != caller_object)
10733					vm_object_unlock(object);
10734
10735				return;
10736	    		}
10737		} else
10738#endif /* MACH_PAGEMAP */
10739		if (object->internal &&
10740		    object->alive &&
10741		    !object->terminating &&
10742		    object->pager_ready) {
10743
10744			if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
10745				if (VM_COMPRESSOR_PAGER_STATE_GET(object,
10746								  offset)
10747				    == VM_EXTERNAL_STATE_EXISTS) {
10748					/* the pager has that page */
10749					extended->pages_swapped_out++;
10750					if (object != caller_object)
10751						vm_object_unlock(object);
10752					return;
10753				}
10754			} else {
10755				memory_object_t pager;
10756
10757				vm_object_paging_begin(object);
10758				pager = object->pager;
10759				vm_object_unlock(object);
10760
10761				kr = memory_object_data_request(
10762					pager,
10763					offset + object->paging_offset,
10764					0, /* just poke the pager */
10765					VM_PROT_READ,
10766					NULL);
10767
10768				vm_object_lock(object);
10769				vm_object_paging_end(object);
10770
10771				if (kr == KERN_SUCCESS) {
10772					/* the pager has that page */
10773					extended->pages_swapped_out++;
10774					if (object != caller_object)
10775						vm_object_unlock(object);
10776					return;
10777				}
10778			}
10779		}
10780
10781		if (shadow) {
10782			vm_object_lock(shadow);
10783
10784			if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
10785			        ref_count--;
10786
10787	    		if (++depth > extended->shadow_depth)
10788	        		extended->shadow_depth = depth;
10789
10790	    		if (ref_count > max_refcnt)
10791	        		max_refcnt = ref_count;
10792
10793			if(object != caller_object)
10794				vm_object_unlock(object);
10795
10796			offset = offset + object->vo_shadow_offset;
10797			object = shadow;
10798			shadow = object->shadow;
10799			continue;
10800		}
10801		if(object != caller_object)
10802			vm_object_unlock(object);
10803		break;
10804	}
10805}
10806
10807static int
10808vm_map_region_count_obj_refs(
10809        vm_map_entry_t    entry,
10810	vm_object_t       object)
10811{
10812        register int ref_count;
10813	register vm_object_t chk_obj;
10814	register vm_object_t tmp_obj;
10815
10816	if (entry->object.vm_object == 0)
10817		return(0);
10818
10819        if (entry->is_sub_map)
10820		return(0);
10821	else {
10822		ref_count = 0;
10823
10824		chk_obj = entry->object.vm_object;
10825		vm_object_lock(chk_obj);
10826
10827		while (chk_obj) {
10828			if (chk_obj == object)
10829				ref_count++;
10830			tmp_obj = chk_obj->shadow;
10831			if (tmp_obj)
10832				vm_object_lock(tmp_obj);
10833			vm_object_unlock(chk_obj);
10834
10835			chk_obj = tmp_obj;
10836		}
10837	}
10838	return(ref_count);
10839}
10840
10841
10842/*
10843 *	Routine:	vm_map_simplify
10844 *
10845 *	Description:
10846 *		Attempt to simplify the map representation in
10847 *		the vicinity of the given starting address.
10848 *	Note:
10849 *		This routine is intended primarily to keep the
10850 *		kernel maps more compact -- they generally don't
10851 *		benefit from the "expand a map entry" technology
10852 *		at allocation time because the adjacent entry
10853 *		is often wired down.
10854 */
10855void
10856vm_map_simplify_entry(
10857	vm_map_t	map,
10858	vm_map_entry_t	this_entry)
10859{
10860	vm_map_entry_t	prev_entry;
10861
10862	counter(c_vm_map_simplify_entry_called++);
10863
10864	prev_entry = this_entry->vme_prev;
10865
10866	if ((this_entry != vm_map_to_entry(map)) &&
10867	    (prev_entry != vm_map_to_entry(map)) &&
10868
10869	    (prev_entry->vme_end == this_entry->vme_start) &&
10870
10871	    (prev_entry->is_sub_map == this_entry->is_sub_map) &&
10872
10873	    (prev_entry->object.vm_object == this_entry->object.vm_object) &&
10874	    ((prev_entry->offset + (prev_entry->vme_end -
10875				    prev_entry->vme_start))
10876	     == this_entry->offset) &&
10877
10878	    (prev_entry->map_aligned == this_entry->map_aligned) &&
10879	    (prev_entry->inheritance == this_entry->inheritance) &&
10880	    (prev_entry->protection == this_entry->protection) &&
10881	    (prev_entry->max_protection == this_entry->max_protection) &&
10882	    (prev_entry->behavior == this_entry->behavior) &&
10883	    (prev_entry->alias == this_entry->alias) &&
10884	    (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
10885	    (prev_entry->no_cache == this_entry->no_cache) &&
10886	    (prev_entry->wired_count == this_entry->wired_count) &&
10887	    (prev_entry->user_wired_count == this_entry->user_wired_count) &&
10888
10889	    (prev_entry->needs_copy == this_entry->needs_copy) &&
10890	    (prev_entry->permanent == this_entry->permanent) &&
10891
10892	    (prev_entry->use_pmap == FALSE) &&
10893	    (this_entry->use_pmap == FALSE) &&
10894	    (prev_entry->in_transition == FALSE) &&
10895	    (this_entry->in_transition == FALSE) &&
10896	    (prev_entry->needs_wakeup == FALSE) &&
10897	    (this_entry->needs_wakeup == FALSE) &&
10898	    (prev_entry->is_shared == FALSE) &&
10899	    (this_entry->is_shared == FALSE)
10900		) {
10901		vm_map_store_entry_unlink(map, prev_entry);
10902		assert(prev_entry->vme_start < this_entry->vme_end);
10903		if (prev_entry->map_aligned)
10904			assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
10905						   VM_MAP_PAGE_MASK(map)));
10906		this_entry->vme_start = prev_entry->vme_start;
10907		this_entry->offset = prev_entry->offset;
10908		if (prev_entry->is_sub_map) {
10909			vm_map_deallocate(prev_entry->object.sub_map);
10910		} else {
10911			vm_object_deallocate(prev_entry->object.vm_object);
10912		}
10913		vm_map_entry_dispose(map, prev_entry);
10914		SAVE_HINT_MAP_WRITE(map, this_entry);
10915		counter(c_vm_map_simplified++);
10916	}
10917}
10918
10919void
10920vm_map_simplify(
10921	vm_map_t	map,
10922	vm_map_offset_t	start)
10923{
10924	vm_map_entry_t	this_entry;
10925
10926	vm_map_lock(map);
10927	if (vm_map_lookup_entry(map, start, &this_entry)) {
10928		vm_map_simplify_entry(map, this_entry);
10929		vm_map_simplify_entry(map, this_entry->vme_next);
10930	}
10931	counter(c_vm_map_simplify_called++);
10932	vm_map_unlock(map);
10933}
10934
10935static void
10936vm_map_simplify_range(
10937	vm_map_t	map,
10938	vm_map_offset_t	start,
10939	vm_map_offset_t	end)
10940{
10941	vm_map_entry_t	entry;
10942
10943	/*
10944	 * The map should be locked (for "write") by the caller.
10945	 */
10946
10947	if (start >= end) {
10948		/* invalid address range */
10949		return;
10950	}
10951
10952	start = vm_map_trunc_page(start,
10953				  VM_MAP_PAGE_MASK(map));
10954	end = vm_map_round_page(end,
10955				VM_MAP_PAGE_MASK(map));
10956
10957	if (!vm_map_lookup_entry(map, start, &entry)) {
10958		/* "start" is not mapped and "entry" ends before "start" */
10959		if (entry == vm_map_to_entry(map)) {
10960			/* start with first entry in the map */
10961			entry = vm_map_first_entry(map);
10962		} else {
10963			/* start with next entry */
10964			entry = entry->vme_next;
10965		}
10966	}
10967
10968	while (entry != vm_map_to_entry(map) &&
10969	       entry->vme_start <= end) {
10970		/* try and coalesce "entry" with its previous entry */
10971		vm_map_simplify_entry(map, entry);
10972		entry = entry->vme_next;
10973	}
10974}
10975
10976
10977/*
10978 *	Routine:	vm_map_machine_attribute
10979 *	Purpose:
10980 *		Provide machine-specific attributes to mappings,
10981 *		such as cachability etc. for machines that provide
10982 *		them.  NUMA architectures and machines with big/strange
10983 *		caches will use this.
10984 *	Note:
10985 *		Responsibilities for locking and checking are handled here,
10986 *		everything else in the pmap module. If any non-volatile
10987 *		information must be kept, the pmap module should handle
10988 *		it itself. [This assumes that attributes do not
10989 *		need to be inherited, which seems ok to me]
10990 */
10991kern_return_t
10992vm_map_machine_attribute(
10993	vm_map_t			map,
10994	vm_map_offset_t		start,
10995	vm_map_offset_t		end,
10996	vm_machine_attribute_t	attribute,
10997	vm_machine_attribute_val_t* value)		/* IN/OUT */
10998{
10999	kern_return_t	ret;
11000	vm_map_size_t sync_size;
11001	vm_map_entry_t entry;
11002
11003	if (start < vm_map_min(map) || end > vm_map_max(map))
11004		return KERN_INVALID_ADDRESS;
11005
11006	/* Figure how much memory we need to flush (in page increments) */
11007	sync_size = end - start;
11008
11009	vm_map_lock(map);
11010
11011	if (attribute != MATTR_CACHE) {
11012		/* If we don't have to find physical addresses, we */
11013		/* don't have to do an explicit traversal here.    */
11014		ret = pmap_attribute(map->pmap, start, end-start,
11015				     attribute, value);
11016		vm_map_unlock(map);
11017		return ret;
11018	}
11019
11020	ret = KERN_SUCCESS;										/* Assume it all worked */
11021
11022	while(sync_size) {
11023		if (vm_map_lookup_entry(map, start, &entry)) {
11024			vm_map_size_t	sub_size;
11025			if((entry->vme_end - start) > sync_size) {
11026				sub_size = sync_size;
11027				sync_size = 0;
11028			} else {
11029				sub_size = entry->vme_end - start;
11030				sync_size -= sub_size;
11031			}
11032			if(entry->is_sub_map) {
11033				vm_map_offset_t sub_start;
11034				vm_map_offset_t sub_end;
11035
11036				sub_start = (start - entry->vme_start)
11037					+ entry->offset;
11038				sub_end = sub_start + sub_size;
11039				vm_map_machine_attribute(
11040					entry->object.sub_map,
11041					sub_start,
11042					sub_end,
11043					attribute, value);
11044			} else {
11045				if(entry->object.vm_object) {
11046					vm_page_t		m;
11047					vm_object_t		object;
11048					vm_object_t		base_object;
11049					vm_object_t		last_object;
11050					vm_object_offset_t	offset;
11051					vm_object_offset_t	base_offset;
11052					vm_map_size_t		range;
11053					range = sub_size;
11054					offset = (start - entry->vme_start)
11055						+ entry->offset;
11056					base_offset = offset;
11057					object = entry->object.vm_object;
11058					base_object = object;
11059					last_object = NULL;
11060
11061					vm_object_lock(object);
11062
11063					while (range) {
11064						m = vm_page_lookup(
11065							object, offset);
11066
11067						if (m && !m->fictitious) {
11068						        ret =
11069								pmap_attribute_cache_sync(
11070									m->phys_page,
11071									PAGE_SIZE,
11072									attribute, value);
11073
11074						} else if (object->shadow) {
11075						        offset = offset + object->vo_shadow_offset;
11076							last_object = object;
11077							object = object->shadow;
11078							vm_object_lock(last_object->shadow);
11079							vm_object_unlock(last_object);
11080							continue;
11081						}
11082						range -= PAGE_SIZE;
11083
11084						if (base_object != object) {
11085						        vm_object_unlock(object);
11086							vm_object_lock(base_object);
11087							object = base_object;
11088						}
11089						/* Bump to the next page */
11090						base_offset += PAGE_SIZE;
11091						offset = base_offset;
11092					}
11093					vm_object_unlock(object);
11094				}
11095			}
11096			start += sub_size;
11097		} else {
11098			vm_map_unlock(map);
11099			return KERN_FAILURE;
11100		}
11101
11102	}
11103
11104	vm_map_unlock(map);
11105
11106	return ret;
11107}
11108
11109/*
11110 *	vm_map_behavior_set:
11111 *
11112 *	Sets the paging reference behavior of the specified address
11113 *	range in the target map.  Paging reference behavior affects
11114 *	how pagein operations resulting from faults on the map will be
11115 *	clustered.
11116 */
11117kern_return_t
11118vm_map_behavior_set(
11119	vm_map_t	map,
11120	vm_map_offset_t	start,
11121	vm_map_offset_t	end,
11122	vm_behavior_t	new_behavior)
11123{
11124	register vm_map_entry_t	entry;
11125	vm_map_entry_t	temp_entry;
11126
11127	XPR(XPR_VM_MAP,
11128	    "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
11129	    map, start, end, new_behavior, 0);
11130
11131	if (start > end ||
11132	    start < vm_map_min(map) ||
11133	    end > vm_map_max(map)) {
11134		return KERN_NO_SPACE;
11135	}
11136
11137	switch (new_behavior) {
11138
11139	/*
11140	 * This first block of behaviors all set a persistent state on the specified
11141	 * memory range.  All we have to do here is to record the desired behavior
11142	 * in the vm_map_entry_t's.
11143	 */
11144
11145	case VM_BEHAVIOR_DEFAULT:
11146	case VM_BEHAVIOR_RANDOM:
11147	case VM_BEHAVIOR_SEQUENTIAL:
11148	case VM_BEHAVIOR_RSEQNTL:
11149	case VM_BEHAVIOR_ZERO_WIRED_PAGES:
11150		vm_map_lock(map);
11151
11152		/*
11153		 *	The entire address range must be valid for the map.
11154		 * 	Note that vm_map_range_check() does a
11155		 *	vm_map_lookup_entry() internally and returns the
11156		 *	entry containing the start of the address range if
11157		 *	the entire range is valid.
11158		 */
11159		if (vm_map_range_check(map, start, end, &temp_entry)) {
11160			entry = temp_entry;
11161			vm_map_clip_start(map, entry, start);
11162		}
11163		else {
11164			vm_map_unlock(map);
11165			return(KERN_INVALID_ADDRESS);
11166		}
11167
11168		while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
11169			vm_map_clip_end(map, entry, end);
11170			assert(!entry->use_pmap);
11171
11172			if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
11173				entry->zero_wired_pages = TRUE;
11174			} else {
11175				entry->behavior = new_behavior;
11176			}
11177			entry = entry->vme_next;
11178		}
11179
11180		vm_map_unlock(map);
11181		break;
11182
11183	/*
11184	 * The rest of these are different from the above in that they cause
11185	 * an immediate action to take place as opposed to setting a behavior that
11186	 * affects future actions.
11187	 */
11188
11189	case VM_BEHAVIOR_WILLNEED:
11190		return vm_map_willneed(map, start, end);
11191
11192	case VM_BEHAVIOR_DONTNEED:
11193		return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
11194
11195	case VM_BEHAVIOR_FREE:
11196		return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
11197
11198	case VM_BEHAVIOR_REUSABLE:
11199		return vm_map_reusable_pages(map, start, end);
11200
11201	case VM_BEHAVIOR_REUSE:
11202		return vm_map_reuse_pages(map, start, end);
11203
11204	case VM_BEHAVIOR_CAN_REUSE:
11205		return vm_map_can_reuse(map, start, end);
11206
11207	default:
11208		return(KERN_INVALID_ARGUMENT);
11209	}
11210
11211	return(KERN_SUCCESS);
11212}
11213
11214
11215/*
11216 * Internals for madvise(MADV_WILLNEED) system call.
11217 *
11218 * The present implementation is to do a read-ahead if the mapping corresponds
11219 * to a mapped regular file.  If it's an anonymous mapping, then we do nothing
11220 * and basically ignore the "advice" (which we are always free to do).
11221 */
11222
11223
11224static kern_return_t
11225vm_map_willneed(
11226	vm_map_t	map,
11227	vm_map_offset_t	start,
11228	vm_map_offset_t	end
11229)
11230{
11231	vm_map_entry_t 			entry;
11232	vm_object_t			object;
11233	memory_object_t			pager;
11234	struct vm_object_fault_info	fault_info;
11235	kern_return_t			kr;
11236	vm_object_size_t		len;
11237	vm_object_offset_t		offset;
11238
11239	/*
11240	 * Fill in static values in fault_info.  Several fields get ignored by the code
11241	 * we call, but we'll fill them in anyway since uninitialized fields are bad
11242	 * when it comes to future backwards compatibility.
11243	 */
11244
11245	fault_info.interruptible = THREAD_UNINT;		/* ignored value */
11246	fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
11247	fault_info.no_cache      = FALSE;			/* ignored value */
11248	fault_info.stealth	 = TRUE;
11249	fault_info.io_sync = FALSE;
11250	fault_info.cs_bypass = FALSE;
11251	fault_info.mark_zf_absent = FALSE;
11252	fault_info.batch_pmap_op = FALSE;
11253
11254	/*
11255	 * The MADV_WILLNEED operation doesn't require any changes to the
11256	 * vm_map_entry_t's, so the read lock is sufficient.
11257	 */
11258
11259	vm_map_lock_read(map);
11260
11261	/*
11262	 * The madvise semantics require that the address range be fully
11263	 * allocated with no holes.  Otherwise, we're required to return
11264	 * an error.
11265	 */
11266
11267	if (! vm_map_range_check(map, start, end, &entry)) {
11268		vm_map_unlock_read(map);
11269		return KERN_INVALID_ADDRESS;
11270	}
11271
11272	/*
11273	 * Examine each vm_map_entry_t in the range.
11274	 */
11275	for (; entry != vm_map_to_entry(map) && start < end; ) {
11276
11277		/*
11278		 * The first time through, the start address could be anywhere
11279		 * within the vm_map_entry we found.  So adjust the offset to
11280		 * correspond.  After that, the offset will always be zero to
11281		 * correspond to the beginning of the current vm_map_entry.
11282		 */
11283		offset = (start - entry->vme_start) + entry->offset;
11284
11285		/*
11286		 * Set the length so we don't go beyond the end of the
11287		 * map_entry or beyond the end of the range we were given.
11288		 * This range could span also multiple map entries all of which
11289		 * map different files, so make sure we only do the right amount
11290		 * of I/O for each object.  Note that it's possible for there
11291		 * to be multiple map entries all referring to the same object
11292		 * but with different page permissions, but it's not worth
11293		 * trying to optimize that case.
11294		 */
11295		len = MIN(entry->vme_end - start, end - start);
11296
11297		if ((vm_size_t) len != len) {
11298			/* 32-bit overflow */
11299			len = (vm_size_t) (0 - PAGE_SIZE);
11300		}
11301		fault_info.cluster_size = (vm_size_t) len;
11302		fault_info.lo_offset    = offset;
11303		fault_info.hi_offset    = offset + len;
11304		fault_info.user_tag     = entry->alias;
11305
11306		/*
11307		 * If there's no read permission to this mapping, then just
11308		 * skip it.
11309		 */
11310		if ((entry->protection & VM_PROT_READ) == 0) {
11311			entry = entry->vme_next;
11312			start = entry->vme_start;
11313			continue;
11314		}
11315
11316		/*
11317		 * Find the file object backing this map entry.  If there is
11318		 * none, then we simply ignore the "will need" advice for this
11319		 * entry and go on to the next one.
11320		 */
11321		if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
11322			entry = entry->vme_next;
11323			start = entry->vme_start;
11324			continue;
11325		}
11326
11327		/*
11328		 * The data_request() could take a long time, so let's
11329		 * release the map lock to avoid blocking other threads.
11330		 */
11331		vm_map_unlock_read(map);
11332
11333		vm_object_paging_begin(object);
11334		pager = object->pager;
11335		vm_object_unlock(object);
11336
11337		/*
11338		 * Get the data from the object asynchronously.
11339		 *
11340		 * Note that memory_object_data_request() places limits on the
11341		 * amount of I/O it will do.  Regardless of the len we
11342		 * specified, it won't do more than MAX_UPL_TRANSFER and it
11343		 * silently truncates the len to that size.  This isn't
11344		 * necessarily bad since madvise shouldn't really be used to
11345		 * page in unlimited amounts of data.  Other Unix variants
11346		 * limit the willneed case as well.  If this turns out to be an
11347		 * issue for developers, then we can always adjust the policy
11348		 * here and still be backwards compatible since this is all
11349		 * just "advice".
11350		 */
11351		kr = memory_object_data_request(
11352			pager,
11353			offset + object->paging_offset,
11354			0,	/* ignored */
11355			VM_PROT_READ,
11356			(memory_object_fault_info_t)&fault_info);
11357
11358		vm_object_lock(object);
11359		vm_object_paging_end(object);
11360		vm_object_unlock(object);
11361
11362		/*
11363		 * If we couldn't do the I/O for some reason, just give up on
11364		 * the madvise.  We still return success to the user since
11365		 * madvise isn't supposed to fail when the advice can't be
11366		 * taken.
11367		 */
11368		if (kr != KERN_SUCCESS) {
11369			return KERN_SUCCESS;
11370		}
11371
11372		start += len;
11373		if (start >= end) {
11374			/* done */
11375			return KERN_SUCCESS;
11376		}
11377
11378		/* look up next entry */
11379		vm_map_lock_read(map);
11380		if (! vm_map_lookup_entry(map, start, &entry)) {
11381			/*
11382			 * There's a new hole in the address range.
11383			 */
11384			vm_map_unlock_read(map);
11385			return KERN_INVALID_ADDRESS;
11386		}
11387	}
11388
11389	vm_map_unlock_read(map);
11390	return KERN_SUCCESS;
11391}
11392
11393static boolean_t
11394vm_map_entry_is_reusable(
11395	vm_map_entry_t entry)
11396{
11397	vm_object_t object;
11398
11399	switch (entry->alias) {
11400	case VM_MEMORY_MALLOC:
11401	case VM_MEMORY_MALLOC_SMALL:
11402	case VM_MEMORY_MALLOC_LARGE:
11403	case VM_MEMORY_REALLOC:
11404	case VM_MEMORY_MALLOC_TINY:
11405	case VM_MEMORY_MALLOC_LARGE_REUSABLE:
11406	case VM_MEMORY_MALLOC_LARGE_REUSED:
11407		/*
11408		 * This is a malloc() memory region: check if it's still
11409		 * in its original state and can be re-used for more
11410		 * malloc() allocations.
11411		 */
11412		break;
11413	default:
11414		/*
11415		 * Not a malloc() memory region: let the caller decide if
11416		 * it's re-usable.
11417		 */
11418		return TRUE;
11419	}
11420
11421	if (entry->is_shared ||
11422	    entry->is_sub_map ||
11423	    entry->in_transition ||
11424	    entry->protection != VM_PROT_DEFAULT ||
11425	    entry->max_protection != VM_PROT_ALL ||
11426	    entry->inheritance != VM_INHERIT_DEFAULT ||
11427	    entry->no_cache ||
11428	    entry->permanent ||
11429	    entry->superpage_size != FALSE ||
11430	    entry->zero_wired_pages ||
11431	    entry->wired_count != 0 ||
11432	    entry->user_wired_count != 0) {
11433		return FALSE;
11434	}
11435
11436	object = entry->object.vm_object;
11437	if (object == VM_OBJECT_NULL) {
11438		return TRUE;
11439	}
11440	if (
11441#if 0
11442		/*
11443		 * Let's proceed even if the VM object is potentially
11444		 * shared.
11445		 * We check for this later when processing the actual
11446		 * VM pages, so the contents will be safe if shared.
11447		 *
11448		 * But we can still mark this memory region as "reusable" to
11449		 * acknowledge that the caller did let us know that the memory
11450		 * could be re-used and should not be penalized for holding
11451		 * on to it.  This allows its "resident size" to not include
11452		 * the reusable range.
11453		 */
11454	    object->ref_count == 1 &&
11455#endif
11456	    object->wired_page_count == 0 &&
11457	    object->copy == VM_OBJECT_NULL &&
11458	    object->shadow == VM_OBJECT_NULL &&
11459	    object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11460	    object->internal &&
11461	    !object->true_share &&
11462	    object->wimg_bits == VM_WIMG_USE_DEFAULT &&
11463	    !object->code_signed) {
11464		return TRUE;
11465	}
11466	return FALSE;
11467
11468
11469}
11470
11471static kern_return_t
11472vm_map_reuse_pages(
11473	vm_map_t	map,
11474	vm_map_offset_t	start,
11475	vm_map_offset_t	end)
11476{
11477	vm_map_entry_t 			entry;
11478	vm_object_t			object;
11479	vm_object_offset_t		start_offset, end_offset;
11480
11481	/*
11482	 * The MADV_REUSE operation doesn't require any changes to the
11483	 * vm_map_entry_t's, so the read lock is sufficient.
11484	 */
11485
11486	vm_map_lock_read(map);
11487
11488	/*
11489	 * The madvise semantics require that the address range be fully
11490	 * allocated with no holes.  Otherwise, we're required to return
11491	 * an error.
11492	 */
11493
11494	if (!vm_map_range_check(map, start, end, &entry)) {
11495		vm_map_unlock_read(map);
11496		vm_page_stats_reusable.reuse_pages_failure++;
11497		return KERN_INVALID_ADDRESS;
11498	}
11499
11500	/*
11501	 * Examine each vm_map_entry_t in the range.
11502	 */
11503	for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
11504	     entry = entry->vme_next) {
11505		/*
11506		 * Sanity check on the VM map entry.
11507		 */
11508		if (! vm_map_entry_is_reusable(entry)) {
11509			vm_map_unlock_read(map);
11510			vm_page_stats_reusable.reuse_pages_failure++;
11511			return KERN_INVALID_ADDRESS;
11512		}
11513
11514		/*
11515		 * The first time through, the start address could be anywhere
11516		 * within the vm_map_entry we found.  So adjust the offset to
11517		 * correspond.
11518		 */
11519		if (entry->vme_start < start) {
11520			start_offset = start - entry->vme_start;
11521		} else {
11522			start_offset = 0;
11523		}
11524		end_offset = MIN(end, entry->vme_end) - entry->vme_start;
11525		start_offset += entry->offset;
11526		end_offset += entry->offset;
11527
11528		object = entry->object.vm_object;
11529		if (object != VM_OBJECT_NULL) {
11530			/* tell pmap to not count this range as "reusable" */
11531			pmap_reusable(map->pmap,
11532				      MAX(start, entry->vme_start),
11533				      MIN(end, entry->vme_end),
11534				      FALSE);
11535			vm_object_lock(object);
11536			vm_object_reuse_pages(object, start_offset, end_offset,
11537					      TRUE);
11538			vm_object_unlock(object);
11539		}
11540
11541		if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
11542			/*
11543			 * XXX
11544			 * We do not hold the VM map exclusively here.
11545			 * The "alias" field is not that critical, so it's
11546			 * safe to update it here, as long as it is the only
11547			 * one that can be modified while holding the VM map
11548			 * "shared".
11549			 */
11550			entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED;
11551		}
11552	}
11553
11554	vm_map_unlock_read(map);
11555	vm_page_stats_reusable.reuse_pages_success++;
11556	return KERN_SUCCESS;
11557}
11558
11559
11560static kern_return_t
11561vm_map_reusable_pages(
11562	vm_map_t	map,
11563	vm_map_offset_t	start,
11564	vm_map_offset_t	end)
11565{
11566	vm_map_entry_t 			entry;
11567	vm_object_t			object;
11568	vm_object_offset_t		start_offset, end_offset;
11569
11570	/*
11571	 * The MADV_REUSABLE operation doesn't require any changes to the
11572	 * vm_map_entry_t's, so the read lock is sufficient.
11573	 */
11574
11575	vm_map_lock_read(map);
11576
11577	/*
11578	 * The madvise semantics require that the address range be fully
11579	 * allocated with no holes.  Otherwise, we're required to return
11580	 * an error.
11581	 */
11582
11583	if (!vm_map_range_check(map, start, end, &entry)) {
11584		vm_map_unlock_read(map);
11585		vm_page_stats_reusable.reusable_pages_failure++;
11586		return KERN_INVALID_ADDRESS;
11587	}
11588
11589	/*
11590	 * Examine each vm_map_entry_t in the range.
11591	 */
11592	for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
11593	     entry = entry->vme_next) {
11594		int kill_pages = 0;
11595
11596		/*
11597		 * Sanity check on the VM map entry.
11598		 */
11599		if (! vm_map_entry_is_reusable(entry)) {
11600			vm_map_unlock_read(map);
11601			vm_page_stats_reusable.reusable_pages_failure++;
11602			return KERN_INVALID_ADDRESS;
11603		}
11604
11605		/*
11606		 * The first time through, the start address could be anywhere
11607		 * within the vm_map_entry we found.  So adjust the offset to
11608		 * correspond.
11609		 */
11610		if (entry->vme_start < start) {
11611			start_offset = start - entry->vme_start;
11612		} else {
11613			start_offset = 0;
11614		}
11615		end_offset = MIN(end, entry->vme_end) - entry->vme_start;
11616		start_offset += entry->offset;
11617		end_offset += entry->offset;
11618
11619		object = entry->object.vm_object;
11620		if (object == VM_OBJECT_NULL)
11621			continue;
11622
11623
11624		vm_object_lock(object);
11625		if (object->ref_count == 1 && !object->shadow)
11626			kill_pages = 1;
11627		else
11628			kill_pages = -1;
11629		if (kill_pages != -1) {
11630			/* tell pmap to count this range as "reusable" */
11631			pmap_reusable(map->pmap,
11632				      MAX(start, entry->vme_start),
11633				      MIN(end, entry->vme_end),
11634				      TRUE);
11635			vm_object_deactivate_pages(object,
11636						   start_offset,
11637						   end_offset - start_offset,
11638						   kill_pages,
11639						   TRUE /*reusable_pages*/);
11640		} else {
11641			vm_page_stats_reusable.reusable_pages_shared++;
11642		}
11643		vm_object_unlock(object);
11644
11645		if (entry->alias == VM_MEMORY_MALLOC_LARGE ||
11646		    entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) {
11647			/*
11648			 * XXX
11649			 * We do not hold the VM map exclusively here.
11650			 * The "alias" field is not that critical, so it's
11651			 * safe to update it here, as long as it is the only
11652			 * one that can be modified while holding the VM map
11653			 * "shared".
11654			 */
11655			entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE;
11656		}
11657	}
11658
11659	vm_map_unlock_read(map);
11660	vm_page_stats_reusable.reusable_pages_success++;
11661	return KERN_SUCCESS;
11662}
11663
11664
11665static kern_return_t
11666vm_map_can_reuse(
11667	vm_map_t	map,
11668	vm_map_offset_t	start,
11669	vm_map_offset_t	end)
11670{
11671	vm_map_entry_t 			entry;
11672
11673	/*
11674	 * The MADV_REUSABLE operation doesn't require any changes to the
11675	 * vm_map_entry_t's, so the read lock is sufficient.
11676	 */
11677
11678	vm_map_lock_read(map);
11679
11680	/*
11681	 * The madvise semantics require that the address range be fully
11682	 * allocated with no holes.  Otherwise, we're required to return
11683	 * an error.
11684	 */
11685
11686	if (!vm_map_range_check(map, start, end, &entry)) {
11687		vm_map_unlock_read(map);
11688		vm_page_stats_reusable.can_reuse_failure++;
11689		return KERN_INVALID_ADDRESS;
11690	}
11691
11692	/*
11693	 * Examine each vm_map_entry_t in the range.
11694	 */
11695	for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
11696	     entry = entry->vme_next) {
11697		/*
11698		 * Sanity check on the VM map entry.
11699		 */
11700		if (! vm_map_entry_is_reusable(entry)) {
11701			vm_map_unlock_read(map);
11702			vm_page_stats_reusable.can_reuse_failure++;
11703			return KERN_INVALID_ADDRESS;
11704		}
11705	}
11706
11707	vm_map_unlock_read(map);
11708	vm_page_stats_reusable.can_reuse_success++;
11709	return KERN_SUCCESS;
11710}
11711
11712
11713/*
11714 *	Routine:	vm_map_entry_insert
11715 *
11716 *	Descritpion:	This routine inserts a new vm_entry in a locked map.
11717 */
11718vm_map_entry_t
11719vm_map_entry_insert(
11720	vm_map_t		map,
11721	vm_map_entry_t		insp_entry,
11722	vm_map_offset_t		start,
11723	vm_map_offset_t		end,
11724	vm_object_t		object,
11725	vm_object_offset_t	offset,
11726	boolean_t		needs_copy,
11727	boolean_t		is_shared,
11728	boolean_t		in_transition,
11729	vm_prot_t		cur_protection,
11730	vm_prot_t		max_protection,
11731	vm_behavior_t		behavior,
11732	vm_inherit_t		inheritance,
11733	unsigned		wired_count,
11734	boolean_t		no_cache,
11735	boolean_t		permanent,
11736	unsigned int		superpage_size,
11737	boolean_t		clear_map_aligned)
11738{
11739	vm_map_entry_t	new_entry;
11740
11741	assert(insp_entry != (vm_map_entry_t)0);
11742
11743	new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
11744
11745	if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
11746		new_entry->map_aligned = TRUE;
11747	} else {
11748		new_entry->map_aligned = FALSE;
11749	}
11750	if (clear_map_aligned &&
11751	    ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map))) {
11752		new_entry->map_aligned = FALSE;
11753	}
11754
11755	new_entry->vme_start = start;
11756	new_entry->vme_end = end;
11757	assert(page_aligned(new_entry->vme_start));
11758	assert(page_aligned(new_entry->vme_end));
11759	assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
11760				   VM_MAP_PAGE_MASK(map)));
11761	if (new_entry->map_aligned) {
11762		assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
11763					   VM_MAP_PAGE_MASK(map)));
11764	}
11765	assert(new_entry->vme_start < new_entry->vme_end);
11766
11767	new_entry->object.vm_object = object;
11768	new_entry->offset = offset;
11769	new_entry->is_shared = is_shared;
11770	new_entry->is_sub_map = FALSE;
11771	new_entry->needs_copy = needs_copy;
11772	new_entry->in_transition = in_transition;
11773	new_entry->needs_wakeup = FALSE;
11774	new_entry->inheritance = inheritance;
11775	new_entry->protection = cur_protection;
11776	new_entry->max_protection = max_protection;
11777	new_entry->behavior = behavior;
11778	new_entry->wired_count = wired_count;
11779	new_entry->user_wired_count = 0;
11780	new_entry->use_pmap = FALSE;
11781	new_entry->alias = 0;
11782	new_entry->zero_wired_pages = FALSE;
11783	new_entry->no_cache = no_cache;
11784	new_entry->permanent = permanent;
11785	if (superpage_size)
11786		new_entry->superpage_size = TRUE;
11787	else
11788		new_entry->superpage_size = FALSE;
11789	new_entry->used_for_jit = FALSE;
11790
11791	/*
11792	 *	Insert the new entry into the list.
11793	 */
11794
11795	vm_map_store_entry_link(map, insp_entry, new_entry);
11796	map->size += end - start;
11797
11798	/*
11799	 *	Update the free space hint and the lookup hint.
11800	 */
11801
11802	SAVE_HINT_MAP_WRITE(map, new_entry);
11803	return new_entry;
11804}
11805
11806/*
11807 *	Routine:	vm_map_remap_extract
11808 *
11809 *	Descritpion:	This routine returns a vm_entry list from a map.
11810 */
11811static kern_return_t
11812vm_map_remap_extract(
11813	vm_map_t		map,
11814	vm_map_offset_t		addr,
11815	vm_map_size_t		size,
11816	boolean_t		copy,
11817	struct vm_map_header	*map_header,
11818	vm_prot_t		*cur_protection,
11819	vm_prot_t		*max_protection,
11820	/* What, no behavior? */
11821	vm_inherit_t		inheritance,
11822	boolean_t		pageable)
11823{
11824	kern_return_t		result;
11825	vm_map_size_t		mapped_size;
11826	vm_map_size_t		tmp_size;
11827	vm_map_entry_t		src_entry;     /* result of last map lookup */
11828	vm_map_entry_t		new_entry;
11829	vm_object_offset_t	offset;
11830	vm_map_offset_t		map_address;
11831	vm_map_offset_t		src_start;     /* start of entry to map */
11832	vm_map_offset_t		src_end;       /* end of region to be mapped */
11833	vm_object_t		object;
11834	vm_map_version_t	version;
11835	boolean_t		src_needs_copy;
11836	boolean_t		new_entry_needs_copy;
11837
11838	assert(map != VM_MAP_NULL);
11839	assert(size != 0);
11840	assert(size == vm_map_round_page(size, PAGE_MASK));
11841	assert(inheritance == VM_INHERIT_NONE ||
11842	       inheritance == VM_INHERIT_COPY ||
11843	       inheritance == VM_INHERIT_SHARE);
11844
11845	/*
11846	 *	Compute start and end of region.
11847	 */
11848	src_start = vm_map_trunc_page(addr, PAGE_MASK);
11849	src_end = vm_map_round_page(src_start + size, PAGE_MASK);
11850
11851
11852	/*
11853	 *	Initialize map_header.
11854	 */
11855	map_header->links.next = (struct vm_map_entry *)&map_header->links;
11856	map_header->links.prev = (struct vm_map_entry *)&map_header->links;
11857	map_header->nentries = 0;
11858	map_header->entries_pageable = pageable;
11859	map_header->page_shift = PAGE_SHIFT;
11860
11861	vm_map_store_init( map_header );
11862
11863	*cur_protection = VM_PROT_ALL;
11864	*max_protection = VM_PROT_ALL;
11865
11866	map_address = 0;
11867	mapped_size = 0;
11868	result = KERN_SUCCESS;
11869
11870	/*
11871	 *	The specified source virtual space might correspond to
11872	 *	multiple map entries, need to loop on them.
11873	 */
11874	vm_map_lock(map);
11875	while (mapped_size != size) {
11876		vm_map_size_t	entry_size;
11877
11878		/*
11879		 *	Find the beginning of the region.
11880		 */
11881		if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
11882			result = KERN_INVALID_ADDRESS;
11883			break;
11884		}
11885
11886		if (src_start < src_entry->vme_start ||
11887		    (mapped_size && src_start != src_entry->vme_start)) {
11888			result = KERN_INVALID_ADDRESS;
11889			break;
11890		}
11891
11892		tmp_size = size - mapped_size;
11893		if (src_end > src_entry->vme_end)
11894			tmp_size -= (src_end - src_entry->vme_end);
11895
11896		entry_size = (vm_map_size_t)(src_entry->vme_end -
11897					     src_entry->vme_start);
11898
11899		if(src_entry->is_sub_map) {
11900			vm_map_reference(src_entry->object.sub_map);
11901			object = VM_OBJECT_NULL;
11902		} else {
11903			object = src_entry->object.vm_object;
11904
11905			if (object == VM_OBJECT_NULL) {
11906				object = vm_object_allocate(entry_size);
11907				src_entry->offset = 0;
11908				src_entry->object.vm_object = object;
11909			} else if (object->copy_strategy !=
11910				   MEMORY_OBJECT_COPY_SYMMETRIC) {
11911				/*
11912				 *	We are already using an asymmetric
11913				 *	copy, and therefore we already have
11914				 *	the right object.
11915				 */
11916				assert(!src_entry->needs_copy);
11917			} else if (src_entry->needs_copy || object->shadowed ||
11918				   (object->internal && !object->true_share &&
11919				    !src_entry->is_shared &&
11920				    object->vo_size > entry_size)) {
11921
11922				vm_object_shadow(&src_entry->object.vm_object,
11923						 &src_entry->offset,
11924						 entry_size);
11925
11926				if (!src_entry->needs_copy &&
11927				    (src_entry->protection & VM_PROT_WRITE)) {
11928				        vm_prot_t prot;
11929
11930				        prot = src_entry->protection & ~VM_PROT_WRITE;
11931
11932					if (override_nx(map, src_entry->alias) && prot)
11933					        prot |= VM_PROT_EXECUTE;
11934
11935					if(map->mapped_in_other_pmaps) {
11936						vm_object_pmap_protect(
11937							src_entry->object.vm_object,
11938							src_entry->offset,
11939							entry_size,
11940							PMAP_NULL,
11941							src_entry->vme_start,
11942							prot);
11943					} else {
11944						pmap_protect(vm_map_pmap(map),
11945							     src_entry->vme_start,
11946							     src_entry->vme_end,
11947							     prot);
11948					}
11949				}
11950
11951				object = src_entry->object.vm_object;
11952				src_entry->needs_copy = FALSE;
11953			}
11954
11955
11956			vm_object_lock(object);
11957			vm_object_reference_locked(object); /* object ref. for new entry */
11958			if (object->copy_strategy ==
11959			    MEMORY_OBJECT_COPY_SYMMETRIC) {
11960				object->copy_strategy =
11961					MEMORY_OBJECT_COPY_DELAY;
11962			}
11963			vm_object_unlock(object);
11964		}
11965
11966		offset = src_entry->offset + (src_start - src_entry->vme_start);
11967
11968		new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
11969		vm_map_entry_copy(new_entry, src_entry);
11970		new_entry->use_pmap = FALSE; /* clr address space specifics */
11971
11972		new_entry->map_aligned = FALSE;
11973
11974		new_entry->vme_start = map_address;
11975		new_entry->vme_end = map_address + tmp_size;
11976		assert(new_entry->vme_start < new_entry->vme_end);
11977		new_entry->inheritance = inheritance;
11978		new_entry->offset = offset;
11979
11980		/*
11981		 * The new region has to be copied now if required.
11982		 */
11983	RestartCopy:
11984		if (!copy) {
11985			/*
11986			 * Cannot allow an entry describing a JIT
11987			 * region to be shared across address spaces.
11988			 */
11989			if (src_entry->used_for_jit == TRUE) {
11990				result = KERN_INVALID_ARGUMENT;
11991				break;
11992			}
11993			src_entry->is_shared = TRUE;
11994			new_entry->is_shared = TRUE;
11995			if (!(new_entry->is_sub_map))
11996				new_entry->needs_copy = FALSE;
11997
11998		} else if (src_entry->is_sub_map) {
11999			/* make this a COW sub_map if not already */
12000			new_entry->needs_copy = TRUE;
12001			object = VM_OBJECT_NULL;
12002		} else if (src_entry->wired_count == 0 &&
12003			   vm_object_copy_quickly(&new_entry->object.vm_object,
12004						  new_entry->offset,
12005						  (new_entry->vme_end -
12006						   new_entry->vme_start),
12007						  &src_needs_copy,
12008						  &new_entry_needs_copy)) {
12009
12010			new_entry->needs_copy = new_entry_needs_copy;
12011			new_entry->is_shared = FALSE;
12012
12013			/*
12014			 * Handle copy_on_write semantics.
12015			 */
12016			if (src_needs_copy && !src_entry->needs_copy) {
12017			        vm_prot_t prot;
12018
12019				prot = src_entry->protection & ~VM_PROT_WRITE;
12020
12021				if (override_nx(map, src_entry->alias) && prot)
12022				        prot |= VM_PROT_EXECUTE;
12023
12024				vm_object_pmap_protect(object,
12025						       offset,
12026						       entry_size,
12027						       ((src_entry->is_shared
12028							 || map->mapped_in_other_pmaps) ?
12029							PMAP_NULL : map->pmap),
12030						       src_entry->vme_start,
12031						       prot);
12032
12033				src_entry->needs_copy = TRUE;
12034			}
12035			/*
12036			 * Throw away the old object reference of the new entry.
12037			 */
12038			vm_object_deallocate(object);
12039
12040		} else {
12041			new_entry->is_shared = FALSE;
12042
12043			/*
12044			 * The map can be safely unlocked since we
12045			 * already hold a reference on the object.
12046			 *
12047			 * Record the timestamp of the map for later
12048			 * verification, and unlock the map.
12049			 */
12050			version.main_timestamp = map->timestamp;
12051			vm_map_unlock(map); 	/* Increments timestamp once! */
12052
12053			/*
12054			 * Perform the copy.
12055			 */
12056			if (src_entry->wired_count > 0) {
12057				vm_object_lock(object);
12058				result = vm_object_copy_slowly(
12059					object,
12060					offset,
12061					entry_size,
12062					THREAD_UNINT,
12063					&new_entry->object.vm_object);
12064
12065				new_entry->offset = 0;
12066				new_entry->needs_copy = FALSE;
12067			} else {
12068				result = vm_object_copy_strategically(
12069					object,
12070					offset,
12071					entry_size,
12072					&new_entry->object.vm_object,
12073					&new_entry->offset,
12074					&new_entry_needs_copy);
12075
12076				new_entry->needs_copy = new_entry_needs_copy;
12077			}
12078
12079			/*
12080			 * Throw away the old object reference of the new entry.
12081			 */
12082			vm_object_deallocate(object);
12083
12084			if (result != KERN_SUCCESS &&
12085			    result != KERN_MEMORY_RESTART_COPY) {
12086				_vm_map_entry_dispose(map_header, new_entry);
12087				break;
12088			}
12089
12090			/*
12091			 * Verify that the map has not substantially
12092			 * changed while the copy was being made.
12093			 */
12094
12095			vm_map_lock(map);
12096			if (version.main_timestamp + 1 != map->timestamp) {
12097				/*
12098				 * Simple version comparison failed.
12099				 *
12100				 * Retry the lookup and verify that the
12101				 * same object/offset are still present.
12102				 */
12103				vm_object_deallocate(new_entry->
12104						     object.vm_object);
12105				_vm_map_entry_dispose(map_header, new_entry);
12106				if (result == KERN_MEMORY_RESTART_COPY)
12107					result = KERN_SUCCESS;
12108				continue;
12109			}
12110
12111			if (result == KERN_MEMORY_RESTART_COPY) {
12112				vm_object_reference(object);
12113				goto RestartCopy;
12114			}
12115		}
12116
12117		_vm_map_store_entry_link(map_header,
12118				   map_header->links.prev, new_entry);
12119
12120		/*Protections for submap mapping are irrelevant here*/
12121		if( !src_entry->is_sub_map ) {
12122			*cur_protection &= src_entry->protection;
12123			*max_protection &= src_entry->max_protection;
12124		}
12125		map_address += tmp_size;
12126		mapped_size += tmp_size;
12127		src_start += tmp_size;
12128
12129	} /* end while */
12130
12131	vm_map_unlock(map);
12132	if (result != KERN_SUCCESS) {
12133		/*
12134		 * Free all allocated elements.
12135		 */
12136		for (src_entry = map_header->links.next;
12137		     src_entry != (struct vm_map_entry *)&map_header->links;
12138		     src_entry = new_entry) {
12139			new_entry = src_entry->vme_next;
12140			_vm_map_store_entry_unlink(map_header, src_entry);
12141			if (src_entry->is_sub_map) {
12142				vm_map_deallocate(src_entry->object.sub_map);
12143			} else {
12144				vm_object_deallocate(src_entry->object.vm_object);
12145			}
12146			_vm_map_entry_dispose(map_header, src_entry);
12147		}
12148	}
12149	return result;
12150}
12151
12152/*
12153 *	Routine:	vm_remap
12154 *
12155 *			Map portion of a task's address space.
12156 *			Mapped region must not overlap more than
12157 *			one vm memory object. Protections and
12158 *			inheritance attributes remain the same
12159 *			as in the original task and are	out parameters.
12160 *			Source and Target task can be identical
12161 *			Other attributes are identical as for vm_map()
12162 */
12163kern_return_t
12164vm_map_remap(
12165	vm_map_t		target_map,
12166	vm_map_address_t	*address,
12167	vm_map_size_t		size,
12168	vm_map_offset_t		mask,
12169	int			flags,
12170	vm_map_t		src_map,
12171	vm_map_offset_t		memory_address,
12172	boolean_t		copy,
12173	vm_prot_t		*cur_protection,
12174	vm_prot_t		*max_protection,
12175	vm_inherit_t		inheritance)
12176{
12177	kern_return_t		result;
12178	vm_map_entry_t		entry;
12179	vm_map_entry_t		insp_entry = VM_MAP_ENTRY_NULL;
12180	vm_map_entry_t		new_entry;
12181	struct vm_map_header	map_header;
12182	vm_map_offset_t		offset_in_mapping;
12183
12184	if (target_map == VM_MAP_NULL)
12185		return KERN_INVALID_ARGUMENT;
12186
12187	switch (inheritance) {
12188	case VM_INHERIT_NONE:
12189	case VM_INHERIT_COPY:
12190	case VM_INHERIT_SHARE:
12191		if (size != 0 && src_map != VM_MAP_NULL)
12192			break;
12193		/*FALL THRU*/
12194	default:
12195		return KERN_INVALID_ARGUMENT;
12196	}
12197
12198	/*
12199	 * If the user is requesting that we return the address of the
12200	 * first byte of the data (rather than the base of the page),
12201	 * then we use different rounding semantics: specifically,
12202	 * we assume that (memory_address, size) describes a region
12203	 * all of whose pages we must cover, rather than a base to be truncated
12204	 * down and a size to be added to that base.  So we figure out
12205	 * the highest page that the requested region includes and make
12206	 * sure that the size will cover it.
12207	 *
12208 	 * The key example we're worried about it is of the form:
12209	 *
12210	 * 		memory_address = 0x1ff0, size = 0x20
12211	 *
12212	 * With the old semantics, we round down the memory_address to 0x1000
12213	 * and round up the size to 0x1000, resulting in our covering *only*
12214	 * page 0x1000.  With the new semantics, we'd realize that the region covers
12215	 * 0x1ff0-0x2010, and compute a size of 0x2000.  Thus, we cover both page
12216	 * 0x1000 and page 0x2000 in the region we remap.
12217	 */
12218	if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
12219		offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
12220		size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
12221	} else {
12222		size = vm_map_round_page(size, PAGE_MASK);
12223	}
12224
12225	result = vm_map_remap_extract(src_map, memory_address,
12226				      size, copy, &map_header,
12227				      cur_protection,
12228				      max_protection,
12229				      inheritance,
12230				      target_map->hdr.entries_pageable);
12231
12232	if (result != KERN_SUCCESS) {
12233		return result;
12234	}
12235
12236	/*
12237	 * Allocate/check a range of free virtual address
12238	 * space for the target
12239	 */
12240	*address = vm_map_trunc_page(*address,
12241				     VM_MAP_PAGE_MASK(target_map));
12242	vm_map_lock(target_map);
12243	result = vm_map_remap_range_allocate(target_map, address, size,
12244					     mask, flags, &insp_entry);
12245
12246	for (entry = map_header.links.next;
12247	     entry != (struct vm_map_entry *)&map_header.links;
12248	     entry = new_entry) {
12249		new_entry = entry->vme_next;
12250		_vm_map_store_entry_unlink(&map_header, entry);
12251		if (result == KERN_SUCCESS) {
12252			entry->vme_start += *address;
12253			entry->vme_end += *address;
12254			assert(!entry->map_aligned);
12255			vm_map_store_entry_link(target_map, insp_entry, entry);
12256			insp_entry = entry;
12257		} else {
12258			if (!entry->is_sub_map) {
12259				vm_object_deallocate(entry->object.vm_object);
12260			} else {
12261				vm_map_deallocate(entry->object.sub_map);
12262			}
12263			_vm_map_entry_dispose(&map_header, entry);
12264		}
12265	}
12266
12267	if( target_map->disable_vmentry_reuse == TRUE) {
12268		if( target_map->highest_entry_end < insp_entry->vme_end ){
12269			target_map->highest_entry_end = insp_entry->vme_end;
12270		}
12271	}
12272
12273	if (result == KERN_SUCCESS) {
12274		target_map->size += size;
12275		SAVE_HINT_MAP_WRITE(target_map, insp_entry);
12276	}
12277	vm_map_unlock(target_map);
12278
12279	if (result == KERN_SUCCESS && target_map->wiring_required)
12280		result = vm_map_wire(target_map, *address,
12281				     *address + size, *cur_protection, TRUE);
12282
12283	/*
12284	 * If requested, return the address of the data pointed to by the
12285	 * request, rather than the base of the resulting page.
12286	 */
12287	if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
12288		*address += offset_in_mapping;
12289	}
12290
12291	return result;
12292}
12293
12294/*
12295 *	Routine:	vm_map_remap_range_allocate
12296 *
12297 *	Description:
12298 *		Allocate a range in the specified virtual address map.
12299 *		returns the address and the map entry just before the allocated
12300 *		range
12301 *
12302 *	Map must be locked.
12303 */
12304
12305static kern_return_t
12306vm_map_remap_range_allocate(
12307	vm_map_t		map,
12308	vm_map_address_t	*address,	/* IN/OUT */
12309	vm_map_size_t		size,
12310	vm_map_offset_t		mask,
12311	int			flags,
12312	vm_map_entry_t		*map_entry)	/* OUT */
12313{
12314	vm_map_entry_t	entry;
12315	vm_map_offset_t	start;
12316	vm_map_offset_t	end;
12317	kern_return_t	kr;
12318
12319StartAgain: ;
12320
12321	start = *address;
12322
12323	if (flags & VM_FLAGS_ANYWHERE)
12324	{
12325		/*
12326		 *	Calculate the first possible address.
12327		 */
12328
12329		if (start < map->min_offset)
12330			start = map->min_offset;
12331		if (start > map->max_offset)
12332			return(KERN_NO_SPACE);
12333
12334		/*
12335		 *	Look for the first possible address;
12336		 *	if there's already something at this
12337		 *	address, we have to start after it.
12338		 */
12339
12340		if( map->disable_vmentry_reuse == TRUE) {
12341			VM_MAP_HIGHEST_ENTRY(map, entry, start);
12342		} else {
12343			assert(first_free_is_valid(map));
12344			if (start == map->min_offset) {
12345				if ((entry = map->first_free) != vm_map_to_entry(map))
12346					start = entry->vme_end;
12347			} else {
12348				vm_map_entry_t	tmp_entry;
12349				if (vm_map_lookup_entry(map, start, &tmp_entry))
12350					start = tmp_entry->vme_end;
12351				entry = tmp_entry;
12352			}
12353			start = vm_map_round_page(start,
12354						  VM_MAP_PAGE_MASK(map));
12355		}
12356
12357		/*
12358		 *	In any case, the "entry" always precedes
12359		 *	the proposed new region throughout the
12360		 *	loop:
12361		 */
12362
12363		while (TRUE) {
12364			register vm_map_entry_t	next;
12365
12366			/*
12367			 *	Find the end of the proposed new region.
12368			 *	Be sure we didn't go beyond the end, or
12369			 *	wrap around the address.
12370			 */
12371
12372			end = ((start + mask) & ~mask);
12373			end = vm_map_round_page(end,
12374						VM_MAP_PAGE_MASK(map));
12375			if (end < start)
12376				return(KERN_NO_SPACE);
12377			start = end;
12378			end += size;
12379
12380			if ((end > map->max_offset) || (end < start)) {
12381				if (map->wait_for_space) {
12382					if (size <= (map->max_offset -
12383						     map->min_offset)) {
12384						assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
12385						vm_map_unlock(map);
12386						thread_block(THREAD_CONTINUE_NULL);
12387						vm_map_lock(map);
12388						goto StartAgain;
12389					}
12390				}
12391
12392				return(KERN_NO_SPACE);
12393			}
12394
12395			/*
12396			 *	If there are no more entries, we must win.
12397			 */
12398
12399			next = entry->vme_next;
12400			if (next == vm_map_to_entry(map))
12401				break;
12402
12403			/*
12404			 *	If there is another entry, it must be
12405			 *	after the end of the potential new region.
12406			 */
12407
12408			if (next->vme_start >= end)
12409				break;
12410
12411			/*
12412			 *	Didn't fit -- move to the next entry.
12413			 */
12414
12415			entry = next;
12416			start = entry->vme_end;
12417		}
12418		*address = start;
12419	} else {
12420		vm_map_entry_t		temp_entry;
12421
12422		/*
12423		 *	Verify that:
12424		 *		the address doesn't itself violate
12425		 *		the mask requirement.
12426		 */
12427
12428		if ((start & mask) != 0)
12429			return(KERN_NO_SPACE);
12430
12431
12432		/*
12433		 *	...	the address is within bounds
12434		 */
12435
12436		end = start + size;
12437
12438		if ((start < map->min_offset) ||
12439		    (end > map->max_offset) ||
12440		    (start >= end)) {
12441			return(KERN_INVALID_ADDRESS);
12442		}
12443
12444		/*
12445		 * If we're asked to overwrite whatever was mapped in that
12446		 * range, first deallocate that range.
12447		 */
12448		if (flags & VM_FLAGS_OVERWRITE) {
12449			vm_map_t zap_map;
12450
12451			/*
12452			 * We use a "zap_map" to avoid having to unlock
12453			 * the "map" in vm_map_delete(), which would compromise
12454			 * the atomicity of the "deallocate" and then "remap"
12455			 * combination.
12456			 */
12457			zap_map = vm_map_create(PMAP_NULL,
12458						start,
12459						end,
12460						map->hdr.entries_pageable);
12461			if (zap_map == VM_MAP_NULL) {
12462				return KERN_RESOURCE_SHORTAGE;
12463			}
12464			vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
12465
12466			kr = vm_map_delete(map, start, end,
12467					   VM_MAP_REMOVE_SAVE_ENTRIES,
12468					   zap_map);
12469			if (kr == KERN_SUCCESS) {
12470				vm_map_destroy(zap_map,
12471					       VM_MAP_REMOVE_NO_PMAP_CLEANUP);
12472				zap_map = VM_MAP_NULL;
12473			}
12474		}
12475
12476		/*
12477		 *	...	the starting address isn't allocated
12478		 */
12479
12480		if (vm_map_lookup_entry(map, start, &temp_entry))
12481			return(KERN_NO_SPACE);
12482
12483		entry = temp_entry;
12484
12485		/*
12486		 *	...	the next region doesn't overlap the
12487		 *		end point.
12488		 */
12489
12490		if ((entry->vme_next != vm_map_to_entry(map)) &&
12491		    (entry->vme_next->vme_start < end))
12492			return(KERN_NO_SPACE);
12493	}
12494	*map_entry = entry;
12495	return(KERN_SUCCESS);
12496}
12497
12498/*
12499 *	vm_map_switch:
12500 *
12501 *	Set the address map for the current thread to the specified map
12502 */
12503
12504vm_map_t
12505vm_map_switch(
12506	vm_map_t	map)
12507{
12508	int		mycpu;
12509	thread_t	thread = current_thread();
12510	vm_map_t	oldmap = thread->map;
12511
12512	mp_disable_preemption();
12513	mycpu = cpu_number();
12514
12515	/*
12516	 *	Deactivate the current map and activate the requested map
12517	 */
12518	PMAP_SWITCH_USER(thread, map, mycpu);
12519
12520	mp_enable_preemption();
12521	return(oldmap);
12522}
12523
12524
12525/*
12526 *	Routine:	vm_map_write_user
12527 *
12528 *	Description:
12529 *		Copy out data from a kernel space into space in the
12530 *		destination map. The space must already exist in the
12531 *		destination map.
12532 *		NOTE:  This routine should only be called by threads
12533 *		which can block on a page fault. i.e. kernel mode user
12534 *		threads.
12535 *
12536 */
12537kern_return_t
12538vm_map_write_user(
12539	vm_map_t		map,
12540	void			*src_p,
12541	vm_map_address_t	dst_addr,
12542	vm_size_t		size)
12543{
12544	kern_return_t	kr = KERN_SUCCESS;
12545
12546	if(current_map() == map) {
12547		if (copyout(src_p, dst_addr, size)) {
12548			kr = KERN_INVALID_ADDRESS;
12549		}
12550	} else {
12551		vm_map_t	oldmap;
12552
12553		/* take on the identity of the target map while doing */
12554		/* the transfer */
12555
12556		vm_map_reference(map);
12557		oldmap = vm_map_switch(map);
12558		if (copyout(src_p, dst_addr, size)) {
12559			kr = KERN_INVALID_ADDRESS;
12560		}
12561		vm_map_switch(oldmap);
12562		vm_map_deallocate(map);
12563	}
12564	return kr;
12565}
12566
12567/*
12568 *	Routine:	vm_map_read_user
12569 *
12570 *	Description:
12571 *		Copy in data from a user space source map into the
12572 *		kernel map. The space must already exist in the
12573 *		kernel map.
12574 *		NOTE:  This routine should only be called by threads
12575 *		which can block on a page fault. i.e. kernel mode user
12576 *		threads.
12577 *
12578 */
12579kern_return_t
12580vm_map_read_user(
12581	vm_map_t		map,
12582	vm_map_address_t	src_addr,
12583	void			*dst_p,
12584	vm_size_t		size)
12585{
12586	kern_return_t	kr = KERN_SUCCESS;
12587
12588	if(current_map() == map) {
12589		if (copyin(src_addr, dst_p, size)) {
12590			kr = KERN_INVALID_ADDRESS;
12591		}
12592	} else {
12593		vm_map_t	oldmap;
12594
12595		/* take on the identity of the target map while doing */
12596		/* the transfer */
12597
12598		vm_map_reference(map);
12599		oldmap = vm_map_switch(map);
12600		if (copyin(src_addr, dst_p, size)) {
12601			kr = KERN_INVALID_ADDRESS;
12602		}
12603		vm_map_switch(oldmap);
12604		vm_map_deallocate(map);
12605	}
12606	return kr;
12607}
12608
12609
12610/*
12611 *	vm_map_check_protection:
12612 *
12613 *	Assert that the target map allows the specified
12614 *	privilege on the entire address region given.
12615 *	The entire region must be allocated.
12616 */
12617boolean_t
12618vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
12619			vm_map_offset_t end, vm_prot_t protection)
12620{
12621	vm_map_entry_t entry;
12622	vm_map_entry_t tmp_entry;
12623
12624	vm_map_lock(map);
12625
12626	if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
12627	{
12628		vm_map_unlock(map);
12629		return (FALSE);
12630	}
12631
12632	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
12633		vm_map_unlock(map);
12634		return(FALSE);
12635	}
12636
12637	entry = tmp_entry;
12638
12639	while (start < end) {
12640		if (entry == vm_map_to_entry(map)) {
12641			vm_map_unlock(map);
12642			return(FALSE);
12643		}
12644
12645		/*
12646		 *	No holes allowed!
12647		 */
12648
12649		if (start < entry->vme_start) {
12650			vm_map_unlock(map);
12651			return(FALSE);
12652		}
12653
12654		/*
12655		 * Check protection associated with entry.
12656		 */
12657
12658		if ((entry->protection & protection) != protection) {
12659			vm_map_unlock(map);
12660			return(FALSE);
12661		}
12662
12663		/* go to next entry */
12664
12665		start = entry->vme_end;
12666		entry = entry->vme_next;
12667	}
12668	vm_map_unlock(map);
12669	return(TRUE);
12670}
12671
12672kern_return_t
12673vm_map_purgable_control(
12674	vm_map_t		map,
12675	vm_map_offset_t		address,
12676	vm_purgable_t		control,
12677	int			*state)
12678{
12679	vm_map_entry_t		entry;
12680	vm_object_t		object;
12681	kern_return_t		kr;
12682
12683	/*
12684	 * Vet all the input parameters and current type and state of the
12685	 * underlaying object.  Return with an error if anything is amiss.
12686	 */
12687	if (map == VM_MAP_NULL)
12688		return(KERN_INVALID_ARGUMENT);
12689
12690	if (control != VM_PURGABLE_SET_STATE &&
12691	    control != VM_PURGABLE_GET_STATE &&
12692	    control != VM_PURGABLE_PURGE_ALL)
12693		return(KERN_INVALID_ARGUMENT);
12694
12695	if (control == VM_PURGABLE_PURGE_ALL) {
12696		vm_purgeable_object_purge_all();
12697		return KERN_SUCCESS;
12698	}
12699
12700	if (control == VM_PURGABLE_SET_STATE &&
12701	    (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
12702	     ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
12703		return(KERN_INVALID_ARGUMENT);
12704
12705	vm_map_lock_read(map);
12706
12707	if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
12708
12709		/*
12710		 * Must pass a valid non-submap address.
12711		 */
12712		vm_map_unlock_read(map);
12713		return(KERN_INVALID_ADDRESS);
12714	}
12715
12716	if ((entry->protection & VM_PROT_WRITE) == 0) {
12717		/*
12718		 * Can't apply purgable controls to something you can't write.
12719		 */
12720		vm_map_unlock_read(map);
12721		return(KERN_PROTECTION_FAILURE);
12722	}
12723
12724	object = entry->object.vm_object;
12725	if (object == VM_OBJECT_NULL) {
12726		/*
12727		 * Object must already be present or it can't be purgable.
12728		 */
12729		vm_map_unlock_read(map);
12730		return KERN_INVALID_ARGUMENT;
12731	}
12732
12733	vm_object_lock(object);
12734
12735#if 00
12736	if (entry->offset != 0 ||
12737	    entry->vme_end - entry->vme_start != object->vo_size) {
12738		/*
12739		 * Can only apply purgable controls to the whole (existing)
12740		 * object at once.
12741		 */
12742		vm_map_unlock_read(map);
12743		vm_object_unlock(object);
12744		return KERN_INVALID_ARGUMENT;
12745	}
12746#endif
12747
12748	vm_map_unlock_read(map);
12749
12750	kr = vm_object_purgable_control(object, control, state);
12751
12752	vm_object_unlock(object);
12753
12754	return kr;
12755}
12756
12757kern_return_t
12758vm_map_page_query_internal(
12759	vm_map_t	target_map,
12760	vm_map_offset_t	offset,
12761	int		*disposition,
12762	int		*ref_count)
12763{
12764	kern_return_t			kr;
12765	vm_page_info_basic_data_t	info;
12766	mach_msg_type_number_t		count;
12767
12768	count = VM_PAGE_INFO_BASIC_COUNT;
12769	kr = vm_map_page_info(target_map,
12770			      offset,
12771			      VM_PAGE_INFO_BASIC,
12772			      (vm_page_info_t) &info,
12773			      &count);
12774	if (kr == KERN_SUCCESS) {
12775		*disposition = info.disposition;
12776		*ref_count = info.ref_count;
12777	} else {
12778		*disposition = 0;
12779		*ref_count = 0;
12780	}
12781
12782	return kr;
12783}
12784
12785kern_return_t
12786vm_map_page_info(
12787	vm_map_t		map,
12788	vm_map_offset_t		offset,
12789	vm_page_info_flavor_t	flavor,
12790	vm_page_info_t		info,
12791	mach_msg_type_number_t	*count)
12792{
12793	vm_map_entry_t		map_entry;
12794	vm_object_t		object;
12795	vm_page_t		m;
12796	kern_return_t		kr;
12797	kern_return_t		retval = KERN_SUCCESS;
12798	boolean_t		top_object;
12799	int			disposition;
12800	int 			ref_count;
12801	vm_page_info_basic_t	basic_info;
12802	int			depth;
12803	vm_map_offset_t		offset_in_page;
12804
12805	switch (flavor) {
12806	case VM_PAGE_INFO_BASIC:
12807		if (*count != VM_PAGE_INFO_BASIC_COUNT) {
12808			/*
12809			 * The "vm_page_info_basic_data" structure was not
12810			 * properly padded, so allow the size to be off by
12811			 * one to maintain backwards binary compatibility...
12812			 */
12813			if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
12814				return KERN_INVALID_ARGUMENT;
12815		}
12816		break;
12817	default:
12818		return KERN_INVALID_ARGUMENT;
12819	}
12820
12821	disposition = 0;
12822	ref_count = 0;
12823	top_object = TRUE;
12824	depth = 0;
12825
12826	retval = KERN_SUCCESS;
12827	offset_in_page = offset & PAGE_MASK;
12828	offset = vm_map_trunc_page(offset, PAGE_MASK);
12829
12830	vm_map_lock_read(map);
12831
12832	/*
12833	 * First, find the map entry covering "offset", going down
12834	 * submaps if necessary.
12835	 */
12836	for (;;) {
12837		if (!vm_map_lookup_entry(map, offset, &map_entry)) {
12838			vm_map_unlock_read(map);
12839			return KERN_INVALID_ADDRESS;
12840		}
12841		/* compute offset from this map entry's start */
12842		offset -= map_entry->vme_start;
12843		/* compute offset into this map entry's object (or submap) */
12844		offset += map_entry->offset;
12845
12846		if (map_entry->is_sub_map) {
12847			vm_map_t sub_map;
12848
12849			sub_map = map_entry->object.sub_map;
12850			vm_map_lock_read(sub_map);
12851			vm_map_unlock_read(map);
12852
12853			map = sub_map;
12854
12855			ref_count = MAX(ref_count, map->ref_count);
12856			continue;
12857		}
12858		break;
12859	}
12860
12861	object = map_entry->object.vm_object;
12862	if (object == VM_OBJECT_NULL) {
12863		/* no object -> no page */
12864		vm_map_unlock_read(map);
12865		goto done;
12866	}
12867
12868	vm_object_lock(object);
12869	vm_map_unlock_read(map);
12870
12871	/*
12872	 * Go down the VM object shadow chain until we find the page
12873	 * we're looking for.
12874	 */
12875	for (;;) {
12876		ref_count = MAX(ref_count, object->ref_count);
12877
12878		m = vm_page_lookup(object, offset);
12879
12880		if (m != VM_PAGE_NULL) {
12881			disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
12882			break;
12883		} else {
12884#if MACH_PAGEMAP
12885			if (object->existence_map) {
12886				if (vm_external_state_get(object->existence_map,
12887							  offset) ==
12888				    VM_EXTERNAL_STATE_EXISTS) {
12889					/*
12890					 * this page has been paged out
12891					 */
12892				        disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12893					break;
12894				}
12895			} else
12896#endif
12897			if (object->internal &&
12898			    object->alive &&
12899			    !object->terminating &&
12900			    object->pager_ready) {
12901
12902				if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
12903					if (VM_COMPRESSOR_PAGER_STATE_GET(
12904						    object,
12905						    offset)
12906					    == VM_EXTERNAL_STATE_EXISTS) {
12907						/* the pager has that page */
12908						disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12909						break;
12910					}
12911				} else {
12912					memory_object_t pager;
12913
12914					vm_object_paging_begin(object);
12915					pager = object->pager;
12916					vm_object_unlock(object);
12917
12918					/*
12919					 * Ask the default pager if
12920					 * it has this page.
12921					 */
12922					kr = memory_object_data_request(
12923						pager,
12924						offset + object->paging_offset,
12925						0, /* just poke the pager */
12926						VM_PROT_READ,
12927						NULL);
12928
12929					vm_object_lock(object);
12930					vm_object_paging_end(object);
12931
12932					if (kr == KERN_SUCCESS) {
12933						/* the default pager has it */
12934						disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12935						break;
12936					}
12937				}
12938			}
12939
12940			if (object->shadow != VM_OBJECT_NULL) {
12941			        vm_object_t shadow;
12942
12943				offset += object->vo_shadow_offset;
12944				shadow = object->shadow;
12945
12946				vm_object_lock(shadow);
12947				vm_object_unlock(object);
12948
12949				object = shadow;
12950				top_object = FALSE;
12951				depth++;
12952			} else {
12953//			        if (!object->internal)
12954//				        break;
12955//				retval = KERN_FAILURE;
12956//				goto done_with_object;
12957				break;
12958			}
12959		}
12960	}
12961	/* The ref_count is not strictly accurate, it measures the number   */
12962	/* of entities holding a ref on the object, they may not be mapping */
12963	/* the object or may not be mapping the section holding the         */
12964	/* target page but its still a ball park number and though an over- */
12965	/* count, it picks up the copy-on-write cases                       */
12966
12967	/* We could also get a picture of page sharing from pmap_attributes */
12968	/* but this would under count as only faulted-in mappings would     */
12969	/* show up.							    */
12970
12971	if (top_object == TRUE && object->shadow)
12972		disposition |= VM_PAGE_QUERY_PAGE_COPIED;
12973
12974	if (! object->internal)
12975		disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
12976
12977	if (m == VM_PAGE_NULL)
12978	        goto done_with_object;
12979
12980	if (m->fictitious) {
12981		disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
12982		goto done_with_object;
12983	}
12984	if (m->dirty || pmap_is_modified(m->phys_page))
12985		disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
12986
12987	if (m->reference || pmap_is_referenced(m->phys_page))
12988		disposition |= VM_PAGE_QUERY_PAGE_REF;
12989
12990	if (m->speculative)
12991		disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
12992
12993	if (m->cs_validated)
12994		disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
12995	if (m->cs_tainted)
12996		disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
12997
12998done_with_object:
12999	vm_object_unlock(object);
13000done:
13001
13002	switch (flavor) {
13003	case VM_PAGE_INFO_BASIC:
13004		basic_info = (vm_page_info_basic_t) info;
13005		basic_info->disposition = disposition;
13006		basic_info->ref_count = ref_count;
13007		basic_info->object_id = (vm_object_id_t) (uintptr_t)
13008			VM_KERNEL_ADDRPERM(object);
13009		basic_info->offset =
13010			(memory_object_offset_t) offset + offset_in_page;
13011		basic_info->depth = depth;
13012		break;
13013	}
13014
13015	return retval;
13016}
13017
13018/*
13019 *	vm_map_msync
13020 *
13021 *	Synchronises the memory range specified with its backing store
13022 *	image by either flushing or cleaning the contents to the appropriate
13023 *	memory manager engaging in a memory object synchronize dialog with
13024 *	the manager.  The client doesn't return until the manager issues
13025 *	m_o_s_completed message.  MIG Magically converts user task parameter
13026 *	to the task's address map.
13027 *
13028 *	interpretation of sync_flags
13029 *	VM_SYNC_INVALIDATE	- discard pages, only return precious
13030 *				  pages to manager.
13031 *
13032 *	VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
13033 *				- discard pages, write dirty or precious
13034 *				  pages back to memory manager.
13035 *
13036 *	VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
13037 *				- write dirty or precious pages back to
13038 *				  the memory manager.
13039 *
13040 *	VM_SYNC_CONTIGUOUS	- does everything normally, but if there
13041 *				  is a hole in the region, and we would
13042 *				  have returned KERN_SUCCESS, return
13043 *				  KERN_INVALID_ADDRESS instead.
13044 *
13045 *	NOTE
13046 *	The memory object attributes have not yet been implemented, this
13047 *	function will have to deal with the invalidate attribute
13048 *
13049 *	RETURNS
13050 *	KERN_INVALID_TASK		Bad task parameter
13051 *	KERN_INVALID_ARGUMENT		both sync and async were specified.
13052 *	KERN_SUCCESS			The usual.
13053 *	KERN_INVALID_ADDRESS		There was a hole in the region.
13054 */
13055
13056kern_return_t
13057vm_map_msync(
13058	vm_map_t		map,
13059	vm_map_address_t	address,
13060	vm_map_size_t		size,
13061	vm_sync_t		sync_flags)
13062{
13063	msync_req_t		msr;
13064	msync_req_t		new_msr;
13065	queue_chain_t		req_q;	/* queue of requests for this msync */
13066	vm_map_entry_t		entry;
13067	vm_map_size_t		amount_left;
13068	vm_object_offset_t	offset;
13069	boolean_t		do_sync_req;
13070	boolean_t		had_hole = FALSE;
13071	memory_object_t		pager;
13072
13073	if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
13074	    (sync_flags & VM_SYNC_SYNCHRONOUS))
13075		return(KERN_INVALID_ARGUMENT);
13076
13077	/*
13078	 * align address and size on page boundaries
13079	 */
13080	size = (vm_map_round_page(address + size,
13081				  VM_MAP_PAGE_MASK(map)) -
13082		vm_map_trunc_page(address,
13083				  VM_MAP_PAGE_MASK(map)));
13084	address = vm_map_trunc_page(address,
13085				    VM_MAP_PAGE_MASK(map));
13086
13087        if (map == VM_MAP_NULL)
13088                return(KERN_INVALID_TASK);
13089
13090	if (size == 0)
13091		return(KERN_SUCCESS);
13092
13093	queue_init(&req_q);
13094	amount_left = size;
13095
13096	while (amount_left > 0) {
13097		vm_object_size_t	flush_size;
13098		vm_object_t		object;
13099
13100		vm_map_lock(map);
13101		if (!vm_map_lookup_entry(map,
13102					 vm_map_trunc_page(
13103						 address,
13104						 VM_MAP_PAGE_MASK(map)),
13105					 &entry)) {
13106
13107			vm_map_size_t	skip;
13108
13109			/*
13110			 * hole in the address map.
13111			 */
13112			had_hole = TRUE;
13113
13114			/*
13115			 * Check for empty map.
13116			 */
13117			if (entry == vm_map_to_entry(map) &&
13118			    entry->vme_next == entry) {
13119				vm_map_unlock(map);
13120				break;
13121			}
13122			/*
13123			 * Check that we don't wrap and that
13124			 * we have at least one real map entry.
13125			 */
13126			if ((map->hdr.nentries == 0) ||
13127			    (entry->vme_next->vme_start < address)) {
13128				vm_map_unlock(map);
13129				break;
13130			}
13131			/*
13132			 * Move up to the next entry if needed
13133			 */
13134			skip = (entry->vme_next->vme_start - address);
13135			if (skip >= amount_left)
13136				amount_left = 0;
13137			else
13138				amount_left -= skip;
13139			address = entry->vme_next->vme_start;
13140			vm_map_unlock(map);
13141			continue;
13142		}
13143
13144		offset = address - entry->vme_start;
13145
13146		/*
13147		 * do we have more to flush than is contained in this
13148		 * entry ?
13149		 */
13150		if (amount_left + entry->vme_start + offset > entry->vme_end) {
13151			flush_size = entry->vme_end -
13152				(entry->vme_start + offset);
13153		} else {
13154			flush_size = amount_left;
13155		}
13156		amount_left -= flush_size;
13157		address += flush_size;
13158
13159		if (entry->is_sub_map == TRUE) {
13160			vm_map_t	local_map;
13161			vm_map_offset_t	local_offset;
13162
13163			local_map = entry->object.sub_map;
13164			local_offset = entry->offset;
13165			vm_map_unlock(map);
13166			if (vm_map_msync(
13167				    local_map,
13168				    local_offset,
13169				    flush_size,
13170				    sync_flags) == KERN_INVALID_ADDRESS) {
13171				had_hole = TRUE;
13172			}
13173			continue;
13174		}
13175		object = entry->object.vm_object;
13176
13177		/*
13178		 * We can't sync this object if the object has not been
13179		 * created yet
13180		 */
13181		if (object == VM_OBJECT_NULL) {
13182			vm_map_unlock(map);
13183			continue;
13184		}
13185		offset += entry->offset;
13186
13187                vm_object_lock(object);
13188
13189		if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
13190		        int kill_pages = 0;
13191			boolean_t reusable_pages = FALSE;
13192
13193			if (sync_flags & VM_SYNC_KILLPAGES) {
13194			        if (object->ref_count == 1 && !object->shadow)
13195				        kill_pages = 1;
13196				else
13197				        kill_pages = -1;
13198			}
13199			if (kill_pages != -1)
13200			        vm_object_deactivate_pages(object, offset,
13201							   (vm_object_size_t)flush_size, kill_pages, reusable_pages);
13202			vm_object_unlock(object);
13203			vm_map_unlock(map);
13204			continue;
13205		}
13206		/*
13207		 * We can't sync this object if there isn't a pager.
13208		 * Don't bother to sync internal objects, since there can't
13209		 * be any "permanent" storage for these objects anyway.
13210		 */
13211		if ((object->pager == MEMORY_OBJECT_NULL) ||
13212		    (object->internal) || (object->private)) {
13213			vm_object_unlock(object);
13214			vm_map_unlock(map);
13215			continue;
13216		}
13217		/*
13218		 * keep reference on the object until syncing is done
13219		 */
13220		vm_object_reference_locked(object);
13221		vm_object_unlock(object);
13222
13223		vm_map_unlock(map);
13224
13225		do_sync_req = vm_object_sync(object,
13226					     offset,
13227					     flush_size,
13228					     sync_flags & VM_SYNC_INVALIDATE,
13229					     ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
13230					      (sync_flags & VM_SYNC_ASYNCHRONOUS)),
13231					     sync_flags & VM_SYNC_SYNCHRONOUS);
13232		/*
13233		 * only send a m_o_s if we returned pages or if the entry
13234		 * is writable (ie dirty pages may have already been sent back)
13235		 */
13236		if (!do_sync_req) {
13237			if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
13238				/*
13239				 * clear out the clustering and read-ahead hints
13240				 */
13241				vm_object_lock(object);
13242
13243				object->pages_created = 0;
13244				object->pages_used = 0;
13245				object->sequential = 0;
13246				object->last_alloc = 0;
13247
13248				vm_object_unlock(object);
13249			}
13250			vm_object_deallocate(object);
13251			continue;
13252		}
13253		msync_req_alloc(new_msr);
13254
13255                vm_object_lock(object);
13256		offset += object->paging_offset;
13257
13258		new_msr->offset = offset;
13259		new_msr->length = flush_size;
13260		new_msr->object = object;
13261		new_msr->flag = VM_MSYNC_SYNCHRONIZING;
13262	re_iterate:
13263
13264		/*
13265		 * We can't sync this object if there isn't a pager.  The
13266		 * pager can disappear anytime we're not holding the object
13267		 * lock.  So this has to be checked anytime we goto re_iterate.
13268		 */
13269
13270		pager = object->pager;
13271
13272		if (pager == MEMORY_OBJECT_NULL) {
13273			vm_object_unlock(object);
13274			vm_object_deallocate(object);
13275			msync_req_free(new_msr);
13276			new_msr = NULL;
13277			continue;
13278		}
13279
13280		queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
13281			/*
13282			 * need to check for overlapping entry, if found, wait
13283			 * on overlapping msr to be done, then reiterate
13284			 */
13285			msr_lock(msr);
13286			if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
13287			    ((offset >= msr->offset &&
13288			      offset < (msr->offset + msr->length)) ||
13289			     (msr->offset >= offset &&
13290			      msr->offset < (offset + flush_size))))
13291			{
13292				assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
13293				msr_unlock(msr);
13294				vm_object_unlock(object);
13295				thread_block(THREAD_CONTINUE_NULL);
13296				vm_object_lock(object);
13297				goto re_iterate;
13298			}
13299			msr_unlock(msr);
13300		}/* queue_iterate */
13301
13302		queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
13303
13304		vm_object_paging_begin(object);
13305		vm_object_unlock(object);
13306
13307		queue_enter(&req_q, new_msr, msync_req_t, req_q);
13308
13309		(void) memory_object_synchronize(
13310			pager,
13311			offset,
13312			flush_size,
13313			sync_flags & ~VM_SYNC_CONTIGUOUS);
13314
13315		vm_object_lock(object);
13316		vm_object_paging_end(object);
13317		vm_object_unlock(object);
13318	}/* while */
13319
13320	/*
13321	 * wait for memory_object_sychronize_completed messages from pager(s)
13322	 */
13323
13324	while (!queue_empty(&req_q)) {
13325		msr = (msync_req_t)queue_first(&req_q);
13326		msr_lock(msr);
13327		while(msr->flag != VM_MSYNC_DONE) {
13328			assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
13329			msr_unlock(msr);
13330			thread_block(THREAD_CONTINUE_NULL);
13331			msr_lock(msr);
13332		}/* while */
13333		queue_remove(&req_q, msr, msync_req_t, req_q);
13334		msr_unlock(msr);
13335		vm_object_deallocate(msr->object);
13336		msync_req_free(msr);
13337	}/* queue_iterate */
13338
13339	/* for proper msync() behaviour */
13340	if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
13341		return(KERN_INVALID_ADDRESS);
13342
13343	return(KERN_SUCCESS);
13344}/* vm_msync */
13345
13346/*
13347 *	Routine:	convert_port_entry_to_map
13348 *	Purpose:
13349 *		Convert from a port specifying an entry or a task
13350 *		to a map. Doesn't consume the port ref; produces a map ref,
13351 *		which may be null.  Unlike convert_port_to_map, the
13352 *		port may be task or a named entry backed.
13353 *	Conditions:
13354 *		Nothing locked.
13355 */
13356
13357
13358vm_map_t
13359convert_port_entry_to_map(
13360	ipc_port_t	port)
13361{
13362	vm_map_t map;
13363	vm_named_entry_t	named_entry;
13364	uint32_t	try_failed_count = 0;
13365
13366	if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
13367		while(TRUE) {
13368			ip_lock(port);
13369			if(ip_active(port) && (ip_kotype(port)
13370					       == IKOT_NAMED_ENTRY)) {
13371				named_entry =
13372					(vm_named_entry_t)port->ip_kobject;
13373				if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
13374                       			ip_unlock(port);
13375
13376					try_failed_count++;
13377                       			mutex_pause(try_failed_count);
13378                       			continue;
13379                		}
13380				named_entry->ref_count++;
13381				lck_mtx_unlock(&(named_entry)->Lock);
13382				ip_unlock(port);
13383				if ((named_entry->is_sub_map) &&
13384				    (named_entry->protection
13385				     & VM_PROT_WRITE)) {
13386					map = named_entry->backing.map;
13387				} else {
13388					mach_destroy_memory_entry(port);
13389					return VM_MAP_NULL;
13390				}
13391				vm_map_reference_swap(map);
13392				mach_destroy_memory_entry(port);
13393				break;
13394			}
13395			else
13396				return VM_MAP_NULL;
13397		}
13398	}
13399	else
13400		map = convert_port_to_map(port);
13401
13402	return map;
13403}
13404
13405/*
13406 *	Routine:	convert_port_entry_to_object
13407 *	Purpose:
13408 *		Convert from a port specifying a named entry to an
13409 *		object. Doesn't consume the port ref; produces a map ref,
13410 *		which may be null.
13411 *	Conditions:
13412 *		Nothing locked.
13413 */
13414
13415
13416vm_object_t
13417convert_port_entry_to_object(
13418	ipc_port_t	port)
13419{
13420	vm_object_t		object = VM_OBJECT_NULL;
13421	vm_named_entry_t	named_entry;
13422	uint32_t		try_failed_count = 0;
13423
13424	if (IP_VALID(port) &&
13425	    (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
13426	try_again:
13427		ip_lock(port);
13428		if (ip_active(port) &&
13429		    (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
13430			named_entry = (vm_named_entry_t)port->ip_kobject;
13431			if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
13432				ip_unlock(port);
13433				try_failed_count++;
13434				mutex_pause(try_failed_count);
13435                       		goto try_again;
13436			}
13437			named_entry->ref_count++;
13438			lck_mtx_unlock(&(named_entry)->Lock);
13439			ip_unlock(port);
13440			if (!(named_entry->is_sub_map) &&
13441			    !(named_entry->is_pager) &&
13442			    !(named_entry->is_copy) &&
13443			    (named_entry->protection & VM_PROT_WRITE)) {
13444				object = named_entry->backing.object;
13445				vm_object_reference(object);
13446			}
13447			mach_destroy_memory_entry(port);
13448		}
13449	}
13450
13451	return object;
13452}
13453
13454/*
13455 * Export routines to other components for the things we access locally through
13456 * macros.
13457 */
13458#undef current_map
13459vm_map_t
13460current_map(void)
13461{
13462	return (current_map_fast());
13463}
13464
13465/*
13466 *	vm_map_reference:
13467 *
13468 *	Most code internal to the osfmk will go through a
13469 *	macro defining this.  This is always here for the
13470 *	use of other kernel components.
13471 */
13472#undef vm_map_reference
13473void
13474vm_map_reference(
13475	register vm_map_t	map)
13476{
13477	if (map == VM_MAP_NULL)
13478		return;
13479
13480	lck_mtx_lock(&map->s_lock);
13481#if	TASK_SWAPPER
13482	assert(map->res_count > 0);
13483	assert(map->ref_count >= map->res_count);
13484	map->res_count++;
13485#endif
13486	map->ref_count++;
13487	lck_mtx_unlock(&map->s_lock);
13488}
13489
13490/*
13491 *	vm_map_deallocate:
13492 *
13493 *	Removes a reference from the specified map,
13494 *	destroying it if no references remain.
13495 *	The map should not be locked.
13496 */
13497void
13498vm_map_deallocate(
13499	register vm_map_t	map)
13500{
13501	unsigned int		ref;
13502
13503	if (map == VM_MAP_NULL)
13504		return;
13505
13506	lck_mtx_lock(&map->s_lock);
13507	ref = --map->ref_count;
13508	if (ref > 0) {
13509		vm_map_res_deallocate(map);
13510		lck_mtx_unlock(&map->s_lock);
13511		return;
13512	}
13513	assert(map->ref_count == 0);
13514	lck_mtx_unlock(&map->s_lock);
13515
13516#if	TASK_SWAPPER
13517	/*
13518	 * The map residence count isn't decremented here because
13519	 * the vm_map_delete below will traverse the entire map,
13520	 * deleting entries, and the residence counts on objects
13521	 * and sharing maps will go away then.
13522	 */
13523#endif
13524
13525	vm_map_destroy(map, VM_MAP_NO_FLAGS);
13526}
13527
13528
13529void
13530vm_map_disable_NX(vm_map_t map)
13531{
13532        if (map == NULL)
13533	        return;
13534        if (map->pmap == NULL)
13535	        return;
13536
13537        pmap_disable_NX(map->pmap);
13538}
13539
13540void
13541vm_map_disallow_data_exec(vm_map_t map)
13542{
13543    if (map == NULL)
13544        return;
13545
13546    map->map_disallow_data_exec = TRUE;
13547}
13548
13549/* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
13550 * more descriptive.
13551 */
13552void
13553vm_map_set_32bit(vm_map_t map)
13554{
13555	map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
13556}
13557
13558
13559void
13560vm_map_set_64bit(vm_map_t map)
13561{
13562	map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
13563}
13564
13565vm_map_offset_t
13566vm_compute_max_offset(unsigned is64)
13567{
13568	return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
13569}
13570
13571uint64_t
13572vm_map_get_max_aslr_slide_pages(vm_map_t map)
13573{
13574	return (1 << (vm_map_is_64bit(map) ? 16 : 8));
13575}
13576
13577boolean_t
13578vm_map_is_64bit(
13579		vm_map_t map)
13580{
13581	return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
13582}
13583
13584boolean_t
13585vm_map_has_hard_pagezero(
13586		vm_map_t 	map,
13587		vm_map_offset_t	pagezero_size)
13588{
13589	/*
13590	 * XXX FBDP
13591	 * We should lock the VM map (for read) here but we can get away
13592	 * with it for now because there can't really be any race condition:
13593	 * the VM map's min_offset is changed only when the VM map is created
13594	 * and when the zero page is established (when the binary gets loaded),
13595	 * and this routine gets called only when the task terminates and the
13596	 * VM map is being torn down, and when a new map is created via
13597	 * load_machfile()/execve().
13598	 */
13599	return (map->min_offset >= pagezero_size);
13600}
13601
13602void
13603vm_map_set_4GB_pagezero(vm_map_t map)
13604{
13605#pragma unused(map)
13606
13607}
13608
13609void
13610vm_map_clear_4GB_pagezero(vm_map_t map)
13611{
13612#pragma unused(map)
13613}
13614
13615/*
13616 * Raise a VM map's maximun offset.
13617 */
13618kern_return_t
13619vm_map_raise_max_offset(
13620	vm_map_t	map,
13621	vm_map_offset_t	new_max_offset)
13622{
13623	kern_return_t	ret;
13624
13625	vm_map_lock(map);
13626	ret = KERN_INVALID_ADDRESS;
13627
13628	if (new_max_offset >= map->max_offset) {
13629		if (!vm_map_is_64bit(map)) {
13630			if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
13631				map->max_offset = new_max_offset;
13632				ret = KERN_SUCCESS;
13633			}
13634		} else {
13635			if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
13636				map->max_offset = new_max_offset;
13637				ret = KERN_SUCCESS;
13638			}
13639		}
13640	}
13641
13642	vm_map_unlock(map);
13643	return ret;
13644}
13645
13646
13647/*
13648 * Raise a VM map's minimum offset.
13649 * To strictly enforce "page zero" reservation.
13650 */
13651kern_return_t
13652vm_map_raise_min_offset(
13653	vm_map_t	map,
13654	vm_map_offset_t	new_min_offset)
13655{
13656	vm_map_entry_t	first_entry;
13657
13658	new_min_offset = vm_map_round_page(new_min_offset,
13659					   VM_MAP_PAGE_MASK(map));
13660
13661	vm_map_lock(map);
13662
13663	if (new_min_offset < map->min_offset) {
13664		/*
13665		 * Can't move min_offset backwards, as that would expose
13666		 * a part of the address space that was previously, and for
13667		 * possibly good reasons, inaccessible.
13668		 */
13669		vm_map_unlock(map);
13670		return KERN_INVALID_ADDRESS;
13671	}
13672
13673	first_entry = vm_map_first_entry(map);
13674	if (first_entry != vm_map_to_entry(map) &&
13675	    first_entry->vme_start < new_min_offset) {
13676		/*
13677		 * Some memory was already allocated below the new
13678		 * minimun offset.  It's too late to change it now...
13679		 */
13680		vm_map_unlock(map);
13681		return KERN_NO_SPACE;
13682	}
13683
13684	map->min_offset = new_min_offset;
13685
13686	vm_map_unlock(map);
13687
13688	return KERN_SUCCESS;
13689}
13690
13691/*
13692 * Set the limit on the maximum amount of user wired memory allowed for this map.
13693 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
13694 * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
13695 * don't have to reach over to the BSD data structures.
13696 */
13697
13698void
13699vm_map_set_user_wire_limit(vm_map_t 	map,
13700			   vm_size_t	limit)
13701{
13702	map->user_wire_limit = limit;
13703}
13704
13705
13706void vm_map_switch_protect(vm_map_t	map,
13707			   boolean_t	val)
13708{
13709	vm_map_lock(map);
13710	map->switch_protect=val;
13711	vm_map_unlock(map);
13712}
13713
13714/*
13715 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
13716 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
13717 * bump both counters.
13718 */
13719void
13720vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
13721{
13722	pmap_t pmap = vm_map_pmap(map);
13723
13724	ledger_credit(pmap->ledger, task_ledgers.iokit_mem, bytes);
13725	ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
13726}
13727
13728void
13729vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
13730{
13731	pmap_t pmap = vm_map_pmap(map);
13732
13733	ledger_debit(pmap->ledger, task_ledgers.iokit_mem, bytes);
13734	ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
13735}
13736
13737/* Add (generate) code signature for memory range */
13738#if CONFIG_DYNAMIC_CODE_SIGNING
13739kern_return_t vm_map_sign(vm_map_t map,
13740		 vm_map_offset_t start,
13741		 vm_map_offset_t end)
13742{
13743	vm_map_entry_t entry;
13744	vm_page_t m;
13745	vm_object_t object;
13746
13747	/*
13748	 * Vet all the input parameters and current type and state of the
13749	 * underlaying object.  Return with an error if anything is amiss.
13750	 */
13751	if (map == VM_MAP_NULL)
13752		return(KERN_INVALID_ARGUMENT);
13753
13754	vm_map_lock_read(map);
13755
13756	if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
13757		/*
13758		 * Must pass a valid non-submap address.
13759		 */
13760		vm_map_unlock_read(map);
13761		return(KERN_INVALID_ADDRESS);
13762	}
13763
13764	if((entry->vme_start > start) || (entry->vme_end < end)) {
13765		/*
13766		 * Map entry doesn't cover the requested range. Not handling
13767		 * this situation currently.
13768		 */
13769		vm_map_unlock_read(map);
13770		return(KERN_INVALID_ARGUMENT);
13771	}
13772
13773	object = entry->object.vm_object;
13774	if (object == VM_OBJECT_NULL) {
13775		/*
13776		 * Object must already be present or we can't sign.
13777		 */
13778		vm_map_unlock_read(map);
13779		return KERN_INVALID_ARGUMENT;
13780	}
13781
13782	vm_object_lock(object);
13783	vm_map_unlock_read(map);
13784
13785	while(start < end) {
13786		uint32_t refmod;
13787
13788		m = vm_page_lookup(object, start - entry->vme_start + entry->offset );
13789		if (m==VM_PAGE_NULL) {
13790			/* shoud we try to fault a page here? we can probably
13791			 * demand it exists and is locked for this request */
13792			vm_object_unlock(object);
13793			return KERN_FAILURE;
13794		}
13795		/* deal with special page status */
13796		if (m->busy ||
13797		    (m->unusual && (m->error || m->restart || m->private || m->absent))) {
13798			vm_object_unlock(object);
13799			return KERN_FAILURE;
13800		}
13801
13802		/* Page is OK... now "validate" it */
13803		/* This is the place where we'll call out to create a code
13804		 * directory, later */
13805		m->cs_validated = TRUE;
13806
13807		/* The page is now "clean" for codesigning purposes. That means
13808		 * we don't consider it as modified (wpmapped) anymore. But
13809		 * we'll disconnect the page so we note any future modification
13810		 * attempts. */
13811		m->wpmapped = FALSE;
13812		refmod = pmap_disconnect(m->phys_page);
13813
13814		/* Pull the dirty status from the pmap, since we cleared the
13815		 * wpmapped bit */
13816		if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
13817			SET_PAGE_DIRTY(m, FALSE);
13818		}
13819
13820		/* On to the next page */
13821		start += PAGE_SIZE;
13822	}
13823	vm_object_unlock(object);
13824
13825	return KERN_SUCCESS;
13826}
13827#endif
13828
13829#if CONFIG_FREEZE
13830
13831kern_return_t vm_map_freeze_walk(
13832             	vm_map_t map,
13833             	unsigned int *purgeable_count,
13834             	unsigned int *wired_count,
13835             	unsigned int *clean_count,
13836             	unsigned int *dirty_count,
13837             	unsigned int  dirty_budget,
13838             	boolean_t *has_shared)
13839{
13840	vm_map_entry_t entry;
13841
13842	vm_map_lock_read(map);
13843
13844	*purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13845	*has_shared = FALSE;
13846
13847	for (entry = vm_map_first_entry(map);
13848	     entry != vm_map_to_entry(map);
13849	     entry = entry->vme_next) {
13850		unsigned int purgeable, clean, dirty, wired;
13851		boolean_t shared;
13852
13853		if ((entry->object.vm_object == 0) ||
13854		    (entry->is_sub_map) ||
13855		    (entry->object.vm_object->phys_contiguous)) {
13856			continue;
13857		}
13858
13859		default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, entry->object.vm_object, NULL);
13860
13861		*purgeable_count += purgeable;
13862		*wired_count += wired;
13863		*clean_count += clean;
13864		*dirty_count += dirty;
13865
13866		if (shared) {
13867			*has_shared = TRUE;
13868		}
13869
13870		/* Adjust pageout budget and finish up if reached */
13871		if (dirty_budget) {
13872			dirty_budget -= dirty;
13873			if (dirty_budget == 0) {
13874				break;
13875			}
13876		}
13877	}
13878
13879	vm_map_unlock_read(map);
13880
13881	return KERN_SUCCESS;
13882}
13883
13884kern_return_t vm_map_freeze(
13885             	vm_map_t map,
13886             	unsigned int *purgeable_count,
13887             	unsigned int *wired_count,
13888             	unsigned int *clean_count,
13889             	unsigned int *dirty_count,
13890             	unsigned int dirty_budget,
13891             	boolean_t *has_shared)
13892{
13893	vm_map_entry_t	entry2 = VM_MAP_ENTRY_NULL;
13894	kern_return_t	kr = KERN_SUCCESS;
13895	boolean_t	default_freezer_active = TRUE;
13896
13897	*purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13898	*has_shared = FALSE;
13899
13900	/*
13901	 * We need the exclusive lock here so that we can
13902	 * block any page faults or lookups while we are
13903	 * in the middle of freezing this vm map.
13904	 */
13905	vm_map_lock(map);
13906
13907	if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
13908		default_freezer_active = FALSE;
13909	}
13910
13911	if (default_freezer_active) {
13912		if (map->default_freezer_handle == NULL) {
13913			map->default_freezer_handle = default_freezer_handle_allocate();
13914		}
13915
13916		if ((kr = default_freezer_handle_init(map->default_freezer_handle)) != KERN_SUCCESS) {
13917			/*
13918			 * Can happen if default_freezer_handle passed in is NULL
13919			 * Or, a table has already been allocated and associated
13920			 * with this handle, i.e. the map is already frozen.
13921			 */
13922			goto done;
13923		}
13924	}
13925
13926	for (entry2 = vm_map_first_entry(map);
13927	     entry2 != vm_map_to_entry(map);
13928	     entry2 = entry2->vme_next) {
13929
13930		vm_object_t	src_object = entry2->object.vm_object;
13931
13932		if (entry2->object.vm_object && !entry2->is_sub_map && !entry2->object.vm_object->phys_contiguous) {
13933			/* If eligible, scan the entry, moving eligible pages over to our parent object */
13934			if (default_freezer_active) {
13935				unsigned int purgeable, clean, dirty, wired;
13936				boolean_t shared;
13937
13938				default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared,
13939								src_object, map->default_freezer_handle);
13940
13941				*purgeable_count += purgeable;
13942				*wired_count += wired;
13943				*clean_count += clean;
13944				*dirty_count += dirty;
13945
13946				/* Adjust pageout budget and finish up if reached */
13947				if (dirty_budget) {
13948					dirty_budget -= dirty;
13949					if (dirty_budget == 0) {
13950						break;
13951					}
13952				}
13953
13954				if (shared) {
13955					*has_shared = TRUE;
13956				}
13957			} else {
13958				/*
13959				 * To the compressor.
13960				 */
13961				if (entry2->object.vm_object->internal == TRUE) {
13962					vm_object_pageout(entry2->object.vm_object);
13963				}
13964			}
13965		}
13966	}
13967
13968	if (default_freezer_active) {
13969		/* Finally, throw out the pages to swap */
13970		default_freezer_pageout(map->default_freezer_handle);
13971	}
13972
13973done:
13974	vm_map_unlock(map);
13975
13976	return kr;
13977}
13978
13979kern_return_t
13980vm_map_thaw(
13981	vm_map_t map)
13982{
13983	kern_return_t kr = KERN_SUCCESS;
13984
13985	if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
13986		/*
13987		 * We will on-demand thaw in the presence of the compressed pager.
13988		 */
13989		return kr;
13990	}
13991
13992	vm_map_lock(map);
13993
13994	if (map->default_freezer_handle == NULL) {
13995		/*
13996		 * This map is not in a frozen state.
13997		 */
13998		kr = KERN_FAILURE;
13999		goto out;
14000	}
14001
14002	kr = default_freezer_unpack(map->default_freezer_handle);
14003out:
14004	vm_map_unlock(map);
14005
14006	return kr;
14007}
14008#endif
14009
14010/*
14011 * vm_map_entry_should_cow_for_true_share:
14012 *
14013 * Determines if the map entry should be clipped and setup for copy-on-write
14014 * to avoid applying "true_share" to a large VM object when only a subset is
14015 * targeted.
14016 *
14017 * For now, we target only the map entries created for the Objective C
14018 * Garbage Collector, which initially have the following properties:
14019 *	- alias == VM_MEMORY_MALLOC
14020 * 	- wired_count == 0
14021 * 	- !needs_copy
14022 * and a VM object with:
14023 * 	- internal
14024 * 	- copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
14025 * 	- !true_share
14026 * 	- vo_size == ANON_CHUNK_SIZE
14027 */
14028boolean_t
14029vm_map_entry_should_cow_for_true_share(
14030	vm_map_entry_t	entry)
14031{
14032	vm_object_t	object;
14033
14034	if (entry->is_sub_map) {
14035		/* entry does not point at a VM object */
14036		return FALSE;
14037	}
14038
14039	if (entry->needs_copy) {
14040		/* already set for copy_on_write: done! */
14041		return FALSE;
14042	}
14043
14044	if (entry->alias != VM_MEMORY_MALLOC) {
14045		/* not tagged as an ObjectiveC's Garbage Collector entry */
14046		return FALSE;
14047	}
14048
14049	if (entry->wired_count) {
14050		/* wired: can't change the map entry... */
14051		return FALSE;
14052	}
14053
14054	object = entry->object.vm_object;
14055
14056	if (object == VM_OBJECT_NULL) {
14057		/* no object yet... */
14058		return FALSE;
14059	}
14060
14061	if (!object->internal) {
14062		/* not an internal object */
14063		return FALSE;
14064	}
14065
14066	if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
14067		/* not the default copy strategy */
14068		return FALSE;
14069	}
14070
14071	if (object->true_share) {
14072		/* already true_share: too late to avoid it */
14073		return FALSE;
14074	}
14075
14076	if (object->vo_size != ANON_CHUNK_SIZE) {
14077		/* not an object created for the ObjC Garbage Collector */
14078		return FALSE;
14079	}
14080
14081	/*
14082	 * All the criteria match: we have a large object being targeted for "true_share".
14083	 * To limit the adverse side-effects linked with "true_share", tell the caller to
14084	 * try and avoid setting up the entire object for "true_share" by clipping the
14085	 * targeted range and setting it up for copy-on-write.
14086	 */
14087	return TRUE;
14088}
14089
14090vm_map_offset_t
14091vm_map_round_page_mask(
14092 	vm_map_offset_t	offset,
14093	vm_map_offset_t	mask)
14094{
14095	return VM_MAP_ROUND_PAGE(offset, mask);
14096}
14097
14098vm_map_offset_t
14099vm_map_trunc_page_mask(
14100	vm_map_offset_t	offset,
14101	vm_map_offset_t	mask)
14102{
14103	return VM_MAP_TRUNC_PAGE(offset, mask);
14104}
14105
14106int
14107vm_map_page_shift(
14108	vm_map_t map)
14109{
14110	return VM_MAP_PAGE_SHIFT(map);
14111}
14112
14113int
14114vm_map_page_size(
14115	vm_map_t map)
14116{
14117	return VM_MAP_PAGE_SIZE(map);
14118}
14119
14120int
14121vm_map_page_mask(
14122	vm_map_t map)
14123{
14124	return VM_MAP_PAGE_MASK(map);
14125}
14126
14127kern_return_t
14128vm_map_set_page_shift(
14129	vm_map_t  	map,
14130	int		pageshift)
14131{
14132	if (map->hdr.nentries != 0) {
14133		/* too late to change page size */
14134		return KERN_FAILURE;
14135	}
14136
14137	map->hdr.page_shift = pageshift;
14138
14139	return KERN_SUCCESS;
14140}
14141
14142kern_return_t
14143vm_map_query_volatile(
14144	vm_map_t	map,
14145	mach_vm_size_t	*volatile_virtual_size_p,
14146	mach_vm_size_t	*volatile_resident_size_p,
14147	mach_vm_size_t	*volatile_pmap_size_p)
14148{
14149	mach_vm_size_t	volatile_virtual_size;
14150	mach_vm_size_t	volatile_resident_count;
14151	mach_vm_size_t	volatile_pmap_count;
14152	mach_vm_size_t	resident_count;
14153	vm_map_entry_t	entry;
14154	vm_object_t	object;
14155
14156	/* map should be locked by caller */
14157
14158	volatile_virtual_size = 0;
14159	volatile_resident_count = 0;
14160	volatile_pmap_count = 0;
14161
14162	for (entry = vm_map_first_entry(map);
14163	     entry != vm_map_to_entry(map);
14164	     entry = entry->vme_next) {
14165		if (entry->is_sub_map) {
14166			continue;
14167		}
14168		if (! (entry->protection & VM_PROT_WRITE)) {
14169			continue;
14170		}
14171		object = entry->object.vm_object;
14172		if (object == VM_OBJECT_NULL) {
14173			continue;
14174		}
14175		if (object->purgable != VM_PURGABLE_VOLATILE) {
14176			continue;
14177		}
14178		if (entry->offset != 0) {
14179			/*
14180			 * If the map entry has been split and the object now
14181			 * appears several times in the VM map, we don't want
14182			 * to count the object's resident_page_count more than
14183			 * once.  We count it only for the first one, starting
14184			 * at offset 0 and ignore the other VM map entries.
14185			 */
14186			continue;
14187		}
14188		resident_count = object->resident_page_count;
14189		if ((entry->offset / PAGE_SIZE) >= resident_count) {
14190			resident_count = 0;
14191		} else {
14192			resident_count -= (entry->offset / PAGE_SIZE);
14193		}
14194
14195		volatile_virtual_size += entry->vme_end - entry->vme_start;
14196		volatile_resident_count += resident_count;
14197		volatile_pmap_count += pmap_query_resident(map->pmap,
14198							   entry->vme_start,
14199							   entry->vme_end);
14200	}
14201
14202	/* map is still locked on return */
14203
14204	*volatile_virtual_size_p = volatile_virtual_size;
14205	*volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
14206	*volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
14207
14208	return KERN_SUCCESS;
14209}
14210