1/*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
49 *  School of Computer Science
50 *  Carnegie Mellon University
51 *  Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 *	File:	vm/vm_map.c
60 *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
61 *	Date:	1985
62 *
63 *	Virtual memory mapping module.
64 */
65
66#include <task_swapper.h>
67#include <mach_assert.h>
68#include <libkern/OSAtomic.h>
69
70#include <mach/kern_return.h>
71#include <mach/port.h>
72#include <mach/vm_attributes.h>
73#include <mach/vm_param.h>
74#include <mach/vm_behavior.h>
75#include <mach/vm_statistics.h>
76#include <mach/memory_object.h>
77#include <mach/mach_vm.h>
78#include <machine/cpu_capabilities.h>
79#include <mach/sdt.h>
80
81#include <kern/assert.h>
82#include <kern/counters.h>
83#include <kern/kalloc.h>
84#include <kern/zalloc.h>
85
86#include <vm/cpm.h>
87#include <vm/vm_init.h>
88#include <vm/vm_fault.h>
89#include <vm/vm_map.h>
90#include <vm/vm_object.h>
91#include <vm/vm_page.h>
92#include <vm/vm_kern.h>
93#include <ipc/ipc_port.h>
94#include <kern/sched_prim.h>
95#include <kern/misc_protos.h>
96#include <ddb/tr.h>
97#include <machine/db_machdep.h>
98#include <kern/xpr.h>
99
100#include <mach/vm_map_server.h>
101#include <mach/mach_host_server.h>
102#include <vm/vm_protos.h>
103
104#ifdef ppc
105#include <ppc/mappings.h>
106#endif /* ppc */
107
108#include <vm/vm_protos.h>
109#include <vm/vm_shared_region.h>
110
111/* Internal prototypes
112 */
113
114static void vm_map_simplify_range(
115	vm_map_t	map,
116	vm_map_offset_t	start,
117	vm_map_offset_t	end);	/* forward */
118
119static boolean_t	vm_map_range_check(
120	vm_map_t	map,
121	vm_map_offset_t	start,
122	vm_map_offset_t	end,
123	vm_map_entry_t	*entry);
124
125static vm_map_entry_t	_vm_map_entry_create(
126	struct vm_map_header	*map_header);
127
128static void		_vm_map_entry_dispose(
129	struct vm_map_header	*map_header,
130	vm_map_entry_t		entry);
131
132static void		vm_map_pmap_enter(
133	vm_map_t		map,
134	vm_map_offset_t 	addr,
135	vm_map_offset_t		end_addr,
136	vm_object_t 		object,
137	vm_object_offset_t	offset,
138	vm_prot_t		protection);
139
140static void		_vm_map_clip_end(
141	struct vm_map_header	*map_header,
142	vm_map_entry_t		entry,
143	vm_map_offset_t		end);
144
145static void		_vm_map_clip_start(
146	struct vm_map_header	*map_header,
147	vm_map_entry_t		entry,
148	vm_map_offset_t		start);
149
150static void		vm_map_entry_delete(
151	vm_map_t	map,
152	vm_map_entry_t	entry);
153
154static kern_return_t	vm_map_delete(
155	vm_map_t	map,
156	vm_map_offset_t	start,
157	vm_map_offset_t	end,
158	int		flags,
159	vm_map_t	zap_map);
160
161static kern_return_t	vm_map_copy_overwrite_unaligned(
162	vm_map_t	dst_map,
163	vm_map_entry_t	entry,
164	vm_map_copy_t	copy,
165	vm_map_address_t start);
166
167static kern_return_t	vm_map_copy_overwrite_aligned(
168	vm_map_t	dst_map,
169	vm_map_entry_t	tmp_entry,
170	vm_map_copy_t	copy,
171	vm_map_offset_t start,
172	pmap_t		pmap);
173
174static kern_return_t	vm_map_copyin_kernel_buffer(
175	vm_map_t	src_map,
176	vm_map_address_t src_addr,
177	vm_map_size_t	len,
178	boolean_t	src_destroy,
179	vm_map_copy_t	*copy_result);  /* OUT */
180
181static kern_return_t	vm_map_copyout_kernel_buffer(
182	vm_map_t	map,
183	vm_map_address_t *addr,	/* IN/OUT */
184	vm_map_copy_t	copy,
185	boolean_t	overwrite);
186
187static void		vm_map_fork_share(
188	vm_map_t	old_map,
189	vm_map_entry_t	old_entry,
190	vm_map_t	new_map);
191
192static boolean_t	vm_map_fork_copy(
193	vm_map_t	old_map,
194	vm_map_entry_t	*old_entry_p,
195	vm_map_t	new_map);
196
197void		vm_map_region_top_walk(
198	vm_map_entry_t		   entry,
199	vm_region_top_info_t       top);
200
201void		vm_map_region_walk(
202	vm_map_t		   map,
203	vm_map_offset_t		   va,
204	vm_map_entry_t		   entry,
205	vm_object_offset_t	   offset,
206	vm_object_size_t	   range,
207	vm_region_extended_info_t  extended,
208	boolean_t		   look_for_pages);
209
210static kern_return_t	vm_map_wire_nested(
211	vm_map_t		   map,
212	vm_map_offset_t		   start,
213	vm_map_offset_t		   end,
214	vm_prot_t		   access_type,
215	boolean_t		   user_wire,
216	pmap_t			   map_pmap,
217	vm_map_offset_t		   pmap_addr);
218
219static kern_return_t	vm_map_unwire_nested(
220	vm_map_t		   map,
221	vm_map_offset_t		   start,
222	vm_map_offset_t		   end,
223	boolean_t		   user_wire,
224	pmap_t			   map_pmap,
225	vm_map_offset_t		   pmap_addr);
226
227static kern_return_t	vm_map_overwrite_submap_recurse(
228	vm_map_t		   dst_map,
229	vm_map_offset_t		   dst_addr,
230	vm_map_size_t		   dst_size);
231
232static kern_return_t	vm_map_copy_overwrite_nested(
233	vm_map_t		   dst_map,
234	vm_map_offset_t		   dst_addr,
235	vm_map_copy_t		   copy,
236	boolean_t		   interruptible,
237	pmap_t			   pmap);
238
239static kern_return_t	vm_map_remap_extract(
240	vm_map_t		map,
241	vm_map_offset_t		addr,
242	vm_map_size_t		size,
243	boolean_t		copy,
244	struct vm_map_header 	*map_header,
245	vm_prot_t		*cur_protection,
246	vm_prot_t		*max_protection,
247	vm_inherit_t		inheritance,
248	boolean_t		pageable);
249
250static kern_return_t	vm_map_remap_range_allocate(
251	vm_map_t		map,
252	vm_map_address_t	*address,
253	vm_map_size_t		size,
254	vm_map_offset_t		mask,
255	boolean_t		anywhere,
256	vm_map_entry_t		*map_entry);
257
258static void		vm_map_region_look_for_page(
259	vm_map_t		   map,
260	vm_map_offset_t            va,
261	vm_object_t		   object,
262	vm_object_offset_t	   offset,
263	int                        max_refcnt,
264	int                        depth,
265	vm_region_extended_info_t  extended);
266
267static int		vm_map_region_count_obj_refs(
268	vm_map_entry_t    	   entry,
269	vm_object_t       	   object);
270
271/*
272 * Macros to copy a vm_map_entry. We must be careful to correctly
273 * manage the wired page count. vm_map_entry_copy() creates a new
274 * map entry to the same memory - the wired count in the new entry
275 * must be set to zero. vm_map_entry_copy_full() creates a new
276 * entry that is identical to the old entry.  This preserves the
277 * wire count; it's used for map splitting and zone changing in
278 * vm_map_copyout.
279 */
280#define vm_map_entry_copy(NEW,OLD) \
281MACRO_BEGIN                                     \
282	*(NEW) = *(OLD);                \
283	(NEW)->is_shared = FALSE;	\
284	(NEW)->needs_wakeup = FALSE;    \
285	(NEW)->in_transition = FALSE;   \
286	(NEW)->wired_count = 0;         \
287	(NEW)->user_wired_count = 0;    \
288MACRO_END
289
290#define vm_map_entry_copy_full(NEW,OLD)        (*(NEW) = *(OLD))
291
292/*
293 *	Decide if we want to allow processes to execute from their data or stack areas.
294 *	override_nx() returns true if we do.  Data/stack execution can be enabled independently
295 *	for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
296 *	or allow_stack_exec to enable data execution for that type of data area for that particular
297 *	ABI (or both by or'ing the flags together).  These are initialized in the architecture
298 *	specific pmap files since the default behavior varies according to architecture.  The
299 *	main reason it varies is because of the need to provide binary compatibility with old
300 *	applications that were written before these restrictions came into being.  In the old
301 *	days, an app could execute anything it could read, but this has slowly been tightened
302 *	up over time.  The default behavior is:
303 *
304 *	32-bit PPC apps		may execute from both stack and data areas
305 *	32-bit Intel apps	may exeucte from data areas but not stack
306 *	64-bit PPC/Intel apps	may not execute from either data or stack
307 *
308 *	An application on any architecture may override these defaults by explicitly
309 *	adding PROT_EXEC permission to the page in question with the mprotect(2)
310 *	system call.  This code here just determines what happens when an app tries to
311 * 	execute from a page that lacks execute permission.
312 *
313 *	Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
314 *	default behavior for both 32 and 64 bit apps on a system-wide basis.
315 */
316
317extern int allow_data_exec, allow_stack_exec;
318
319int
320override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
321{
322	int current_abi;
323
324	/*
325	 * Determine if the app is running in 32 or 64 bit mode.
326	 */
327
328	if (vm_map_is_64bit(map))
329		current_abi = VM_ABI_64;
330	else
331		current_abi = VM_ABI_32;
332
333	/*
334	 * Determine if we should allow the execution based on whether it's a
335	 * stack or data area and the current architecture.
336	 */
337
338	if (user_tag == VM_MEMORY_STACK)
339		return allow_stack_exec & current_abi;
340
341	return allow_data_exec & current_abi;
342}
343
344
345/*
346 *	Virtual memory maps provide for the mapping, protection,
347 *	and sharing of virtual memory objects.  In addition,
348 *	this module provides for an efficient virtual copy of
349 *	memory from one map to another.
350 *
351 *	Synchronization is required prior to most operations.
352 *
353 *	Maps consist of an ordered doubly-linked list of simple
354 *	entries; a single hint is used to speed up lookups.
355 *
356 *	Sharing maps have been deleted from this version of Mach.
357 *	All shared objects are now mapped directly into the respective
358 *	maps.  This requires a change in the copy on write strategy;
359 *	the asymmetric (delayed) strategy is used for shared temporary
360 *	objects instead of the symmetric (shadow) strategy.  All maps
361 *	are now "top level" maps (either task map, kernel map or submap
362 *	of the kernel map).
363 *
364 *	Since portions of maps are specified by start/end addreses,
365 *	which may not align with existing map entries, all
366 *	routines merely "clip" entries to these start/end values.
367 *	[That is, an entry is split into two, bordering at a
368 *	start or end value.]  Note that these clippings may not
369 *	always be necessary (as the two resulting entries are then
370 *	not changed); however, the clipping is done for convenience.
371 *	No attempt is currently made to "glue back together" two
372 *	abutting entries.
373 *
374 *	The symmetric (shadow) copy strategy implements virtual copy
375 *	by copying VM object references from one map to
376 *	another, and then marking both regions as copy-on-write.
377 *	It is important to note that only one writeable reference
378 *	to a VM object region exists in any map when this strategy
379 *	is used -- this means that shadow object creation can be
380 *	delayed until a write operation occurs.  The symmetric (delayed)
381 *	strategy allows multiple maps to have writeable references to
382 *	the same region of a vm object, and hence cannot delay creating
383 *	its copy objects.  See vm_object_copy_quickly() in vm_object.c.
384 *	Copying of permanent objects is completely different; see
385 *	vm_object_copy_strategically() in vm_object.c.
386 */
387
388static zone_t	vm_map_zone;		/* zone for vm_map structures */
389static zone_t	vm_map_entry_zone;	/* zone for vm_map_entry structures */
390static zone_t	vm_map_kentry_zone;	/* zone for kernel entry structures */
391static zone_t	vm_map_copy_zone;	/* zone for vm_map_copy structures */
392
393
394/*
395 *	Placeholder object for submap operations.  This object is dropped
396 *	into the range by a call to vm_map_find, and removed when
397 *	vm_map_submap creates the submap.
398 */
399
400vm_object_t	vm_submap_object;
401
402static void		*map_data;
403static vm_map_size_t	map_data_size;
404static void		*kentry_data;
405static vm_map_size_t	kentry_data_size;
406static int		kentry_count = 2048;		/* to init kentry_data_size */
407
408#define         NO_COALESCE_LIMIT  (1024 * 128)
409
410
411/* Skip acquiring locks if we're in the midst of a kernel core dump */
412extern unsigned int not_in_kdp;
413
414#if CONFIG_CODE_DECRYPTION
415/*
416 * vm_map_apple_protected:
417 * This remaps the requested part of the object with an object backed by
418 * the decrypting pager.
419 * crypt_info contains entry points and session data for the crypt module.
420 * The crypt_info block will be copied by vm_map_apple_protected. The data structures
421 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
422 */
423kern_return_t
424vm_map_apple_protected(
425	vm_map_t	map,
426	vm_map_offset_t	start,
427	vm_map_offset_t	end,
428	struct pager_crypt_info *crypt_info)
429{
430	boolean_t	map_locked;
431	kern_return_t	kr;
432	vm_map_entry_t	map_entry;
433	memory_object_t	protected_mem_obj;
434	vm_object_t	protected_object;
435	vm_map_offset_t	map_addr;
436
437	vm_map_lock_read(map);
438	map_locked = TRUE;
439
440	/* lookup the protected VM object */
441	if (!vm_map_lookup_entry(map,
442				 start,
443				 &map_entry) ||
444	    map_entry->vme_end < end ||
445	    map_entry->is_sub_map) {
446		/* that memory is not properly mapped */
447		kr = KERN_INVALID_ARGUMENT;
448		goto done;
449	}
450	protected_object = map_entry->object.vm_object;
451	if (protected_object == VM_OBJECT_NULL) {
452		/* there should be a VM object here at this point */
453		kr = KERN_INVALID_ARGUMENT;
454		goto done;
455	}
456
457	/*
458	 * Lookup (and create if necessary) the protected memory object
459	 * matching that VM object.
460	 * If successful, this also grabs a reference on the memory object,
461	 * to guarantee that it doesn't go away before we get a chance to map
462	 * it.
463	 */
464
465	protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
466	if (protected_mem_obj == NULL) {
467		kr = KERN_FAILURE;
468		goto done;
469	}
470
471	vm_map_unlock_read(map);
472	map_locked = FALSE;
473
474	/* map this memory object in place of the current one */
475	map_addr = start;
476	kr = vm_map_enter_mem_object(map,
477				     &map_addr,
478				     end - start,
479				     (mach_vm_offset_t) 0,
480				     VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
481				     (ipc_port_t) protected_mem_obj,
482				     (map_entry->offset +
483				      (start - map_entry->vme_start)),
484				     TRUE,
485				     map_entry->protection,
486				     map_entry->max_protection,
487				     map_entry->inheritance);
488	assert(map_addr == start);
489	/*
490	 * Release the reference obtained by apple_protect_pager_setup().
491	 * The mapping (if it succeeded) is now holding a reference on the
492	 * memory object.
493	 */
494	memory_object_deallocate(protected_mem_obj);
495
496done:
497	if (map_locked) {
498		vm_map_unlock_read(map);
499	}
500	return kr;
501}
502#endif	/* CONFIG_CODE_DECRYPTION */
503
504
505/*
506 *	vm_map_init:
507 *
508 *	Initialize the vm_map module.  Must be called before
509 *	any other vm_map routines.
510 *
511 *	Map and entry structures are allocated from zones -- we must
512 *	initialize those zones.
513 *
514 *	There are three zones of interest:
515 *
516 *	vm_map_zone:		used to allocate maps.
517 *	vm_map_entry_zone:	used to allocate map entries.
518 *	vm_map_kentry_zone:	used to allocate map entries for the kernel.
519 *
520 *	The kernel allocates map entries from a special zone that is initially
521 *	"crammed" with memory.  It would be difficult (perhaps impossible) for
522 *	the kernel to allocate more memory to a entry zone when it became
523 *	empty since the very act of allocating memory implies the creation
524 *	of a new entry.
525 */
526void
527vm_map_init(
528	void)
529{
530	vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
531			    PAGE_SIZE, "maps");
532
533	vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
534				  1024*1024, PAGE_SIZE*5,
535				  "non-kernel map entries");
536
537	vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
538				   kentry_data_size, kentry_data_size,
539				   "kernel map entries");
540
541	vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
542				 16*1024, PAGE_SIZE, "map copies");
543
544	/*
545	 *	Cram the map and kentry zones with initial data.
546	 *	Set kentry_zone non-collectible to aid zone_gc().
547	 */
548	zone_change(vm_map_zone, Z_COLLECT, FALSE);
549	zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE);
550	zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE);
551	zcram(vm_map_zone, map_data, map_data_size);
552	zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
553}
554
555void
556vm_map_steal_memory(
557	void)
558{
559	map_data_size = vm_map_round_page(10 * sizeof(struct _vm_map));
560	map_data = pmap_steal_memory(map_data_size);
561
562#if 0
563	/*
564	 * Limiting worst case: vm_map_kentry_zone needs to map each "available"
565	 * physical page (i.e. that beyond the kernel image and page tables)
566	 * individually; we guess at most one entry per eight pages in the
567	 * real world. This works out to roughly .1 of 1% of physical memory,
568	 * or roughly 1900 entries (64K) for a 64M machine with 4K pages.
569	 */
570#endif
571	kentry_count = pmap_free_pages() / 8;
572
573
574	kentry_data_size =
575		vm_map_round_page(kentry_count * sizeof(struct vm_map_entry));
576	kentry_data = pmap_steal_memory(kentry_data_size);
577}
578
579/*
580 *	vm_map_create:
581 *
582 *	Creates and returns a new empty VM map with
583 *	the given physical map structure, and having
584 *	the given lower and upper address bounds.
585 */
586vm_map_t
587vm_map_create(
588	pmap_t			pmap,
589	vm_map_offset_t	min,
590	vm_map_offset_t	max,
591	boolean_t		pageable)
592{
593	static int		color_seed = 0;
594	register vm_map_t	result;
595
596	result = (vm_map_t) zalloc(vm_map_zone);
597	if (result == VM_MAP_NULL)
598		panic("vm_map_create");
599
600	vm_map_first_entry(result) = vm_map_to_entry(result);
601	vm_map_last_entry(result)  = vm_map_to_entry(result);
602	result->hdr.nentries = 0;
603	result->hdr.entries_pageable = pageable;
604
605	result->size = 0;
606	result->user_wire_limit = MACH_VM_MAX_ADDRESS;	/* default limit is unlimited */
607	result->user_wire_size  = 0;
608	result->ref_count = 1;
609#if	TASK_SWAPPER
610	result->res_count = 1;
611	result->sw_state = MAP_SW_IN;
612#endif	/* TASK_SWAPPER */
613	result->pmap = pmap;
614	result->min_offset = min;
615	result->max_offset = max;
616	result->wiring_required = FALSE;
617	result->no_zero_fill = FALSE;
618	result->mapped = FALSE;
619#if CONFIG_EMBEDDED
620	result->prot_copy_allow = FALSE;
621#else
622	result->prot_copy_allow = TRUE;
623#endif
624	result->wait_for_space = FALSE;
625	result->first_free = vm_map_to_entry(result);
626	result->hint = vm_map_to_entry(result);
627	result->color_rr = (color_seed++) & vm_color_mask;
628	vm_map_lock_init(result);
629	mutex_init(&result->s_lock, 0);
630
631	return(result);
632}
633
634/*
635 *	vm_map_entry_create:	[ internal use only ]
636 *
637 *	Allocates a VM map entry for insertion in the
638 *	given map (or map copy).  No fields are filled.
639 */
640#define	vm_map_entry_create(map) \
641	_vm_map_entry_create(&(map)->hdr)
642
643#define	vm_map_copy_entry_create(copy) \
644	_vm_map_entry_create(&(copy)->cpy_hdr)
645
646static vm_map_entry_t
647_vm_map_entry_create(
648	register struct vm_map_header	*map_header)
649{
650	register zone_t	zone;
651	register vm_map_entry_t	entry;
652
653	if (map_header->entries_pageable)
654		zone = vm_map_entry_zone;
655	else
656		zone = vm_map_kentry_zone;
657
658	entry = (vm_map_entry_t) zalloc(zone);
659	if (entry == VM_MAP_ENTRY_NULL)
660		panic("vm_map_entry_create");
661
662	return(entry);
663}
664
665/*
666 *	vm_map_entry_dispose:	[ internal use only ]
667 *
668 *	Inverse of vm_map_entry_create.
669 *
670 * 	write map lock held so no need to
671 *	do anything special to insure correctness
672 * 	of the stores
673 */
674#define	vm_map_entry_dispose(map, entry)			\
675	MACRO_BEGIN						\
676	if((entry) == (map)->first_free)			\
677		(map)->first_free = vm_map_to_entry(map);	\
678	if((entry) == (map)->hint)				\
679		(map)->hint = vm_map_to_entry(map);		\
680	_vm_map_entry_dispose(&(map)->hdr, (entry));		\
681	MACRO_END
682
683#define	vm_map_copy_entry_dispose(map, entry) \
684	_vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
685
686static void
687_vm_map_entry_dispose(
688	register struct vm_map_header	*map_header,
689	register vm_map_entry_t		entry)
690{
691	register zone_t		zone;
692
693	if (map_header->entries_pageable)
694		zone = vm_map_entry_zone;
695	else
696		zone = vm_map_kentry_zone;
697
698	zfree(zone, entry);
699}
700
701#if MACH_ASSERT
702static boolean_t first_free_is_valid(vm_map_t map);	/* forward */
703static boolean_t first_free_check = FALSE;
704static boolean_t
705first_free_is_valid(
706	vm_map_t	map)
707{
708	vm_map_entry_t	entry, next;
709
710	if (!first_free_check)
711		return TRUE;
712
713	entry = vm_map_to_entry(map);
714	next = entry->vme_next;
715	while (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_end) ||
716	       (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_start) &&
717		next != vm_map_to_entry(map))) {
718		entry = next;
719		next = entry->vme_next;
720		if (entry == vm_map_to_entry(map))
721			break;
722	}
723	if (map->first_free != entry) {
724		printf("Bad first_free for map %p: %p should be %p\n",
725		       map, map->first_free, entry);
726		return FALSE;
727	}
728	return TRUE;
729}
730#endif /* MACH_ASSERT */
731
732/*
733 *	UPDATE_FIRST_FREE:
734 *
735 *	Updates the map->first_free pointer to the
736 *	entry immediately before the first hole in the map.
737 * 	The map should be locked.
738 */
739#define UPDATE_FIRST_FREE(map, new_first_free) 				\
740	MACRO_BEGIN							\
741	vm_map_t	UFF_map; 					\
742	vm_map_entry_t	UFF_first_free; 				\
743	vm_map_entry_t	UFF_next_entry; 				\
744	UFF_map = (map); 						\
745	UFF_first_free = (new_first_free);				\
746	UFF_next_entry = UFF_first_free->vme_next; 			\
747	while (vm_map_trunc_page(UFF_next_entry->vme_start) == 		\
748	       vm_map_trunc_page(UFF_first_free->vme_end) || 			\
749	       (vm_map_trunc_page(UFF_next_entry->vme_start) == 		\
750		vm_map_trunc_page(UFF_first_free->vme_start) &&		\
751		UFF_next_entry != vm_map_to_entry(UFF_map))) { 		\
752		UFF_first_free = UFF_next_entry; 			\
753		UFF_next_entry = UFF_first_free->vme_next; 		\
754		if (UFF_first_free == vm_map_to_entry(UFF_map)) 	\
755			break; 						\
756	} 								\
757	UFF_map->first_free = UFF_first_free; 				\
758	assert(first_free_is_valid(UFF_map));				\
759	MACRO_END
760
761/*
762 *	vm_map_entry_{un,}link:
763 *
764 *	Insert/remove entries from maps (or map copies).
765 */
766#define vm_map_entry_link(map, after_where, entry)			\
767	MACRO_BEGIN							\
768	vm_map_t VMEL_map; 						\
769	vm_map_entry_t VMEL_entry; 					\
770	VMEL_map = (map);						\
771	VMEL_entry = (entry); 						\
772	_vm_map_entry_link(&VMEL_map->hdr, after_where, VMEL_entry); 	\
773	UPDATE_FIRST_FREE(VMEL_map, VMEL_map->first_free); 		\
774	MACRO_END
775
776
777#define vm_map_copy_entry_link(copy, after_where, entry)		\
778	_vm_map_entry_link(&(copy)->cpy_hdr, after_where, (entry))
779
780#define _vm_map_entry_link(hdr, after_where, entry)			\
781	MACRO_BEGIN							\
782	(hdr)->nentries++;						\
783	(entry)->vme_prev = (after_where);				\
784	(entry)->vme_next = (after_where)->vme_next;			\
785	(entry)->vme_prev->vme_next = (entry)->vme_next->vme_prev = (entry); \
786	MACRO_END
787
788#define vm_map_entry_unlink(map, entry)					\
789	MACRO_BEGIN							\
790	vm_map_t VMEU_map; 						\
791	vm_map_entry_t VMEU_entry; 					\
792	vm_map_entry_t VMEU_first_free;					\
793	VMEU_map = (map); 						\
794	VMEU_entry = (entry); 						\
795	if (VMEU_entry->vme_start <= VMEU_map->first_free->vme_start)	\
796		VMEU_first_free = VMEU_entry->vme_prev;			\
797	else								\
798		VMEU_first_free = VMEU_map->first_free;			\
799	_vm_map_entry_unlink(&VMEU_map->hdr, VMEU_entry); 		\
800	UPDATE_FIRST_FREE(VMEU_map, VMEU_first_free);			\
801	MACRO_END
802
803#define vm_map_copy_entry_unlink(copy, entry)				\
804	_vm_map_entry_unlink(&(copy)->cpy_hdr, (entry))
805
806#define _vm_map_entry_unlink(hdr, entry)				\
807	MACRO_BEGIN							\
808	(hdr)->nentries--;						\
809	(entry)->vme_next->vme_prev = (entry)->vme_prev; 		\
810	(entry)->vme_prev->vme_next = (entry)->vme_next; 		\
811	MACRO_END
812
813#if	MACH_ASSERT && TASK_SWAPPER
814/*
815 *	vm_map_res_reference:
816 *
817 *	Adds another valid residence count to the given map.
818 *
819 *	Map is locked so this function can be called from
820 *	vm_map_swapin.
821 *
822 */
823void vm_map_res_reference(register vm_map_t map)
824{
825	/* assert map is locked */
826	assert(map->res_count >= 0);
827	assert(map->ref_count >= map->res_count);
828	if (map->res_count == 0) {
829		mutex_unlock(&map->s_lock);
830		vm_map_lock(map);
831		vm_map_swapin(map);
832		mutex_lock(&map->s_lock);
833		++map->res_count;
834		vm_map_unlock(map);
835	} else
836		++map->res_count;
837}
838
839/*
840 *	vm_map_reference_swap:
841 *
842 *	Adds valid reference and residence counts to the given map.
843 *
844 *	The map may not be in memory (i.e. zero residence count).
845 *
846 */
847void vm_map_reference_swap(register vm_map_t map)
848{
849	assert(map != VM_MAP_NULL);
850	mutex_lock(&map->s_lock);
851	assert(map->res_count >= 0);
852	assert(map->ref_count >= map->res_count);
853	map->ref_count++;
854	vm_map_res_reference(map);
855	mutex_unlock(&map->s_lock);
856}
857
858/*
859 *	vm_map_res_deallocate:
860 *
861 *	Decrement residence count on a map; possibly causing swapout.
862 *
863 *	The map must be in memory (i.e. non-zero residence count).
864 *
865 *	The map is locked, so this function is callable from vm_map_deallocate.
866 *
867 */
868void vm_map_res_deallocate(register vm_map_t map)
869{
870	assert(map->res_count > 0);
871	if (--map->res_count == 0) {
872		mutex_unlock(&map->s_lock);
873		vm_map_lock(map);
874		vm_map_swapout(map);
875		vm_map_unlock(map);
876		mutex_lock(&map->s_lock);
877	}
878	assert(map->ref_count >= map->res_count);
879}
880#endif	/* MACH_ASSERT && TASK_SWAPPER */
881
882/*
883 *	vm_map_destroy:
884 *
885 *	Actually destroy a map.
886 */
887void
888vm_map_destroy(
889	vm_map_t	map,
890	int		flags)
891{
892	vm_map_lock(map);
893
894	/* clean up regular map entries */
895	(void) vm_map_delete(map, map->min_offset, map->max_offset,
896			     flags, VM_MAP_NULL);
897	/* clean up leftover special mappings (commpage, etc...) */
898#ifdef __ppc__
899	/*
900	 * PPC51: ppc64 is limited to 51-bit addresses.
901	 * Memory beyond this 51-bit limit is mapped specially at the
902	 * pmap level, so do not interfere.
903	 * On PPC64, the commpage is mapped beyond the addressable range
904	 * via a special pmap hack, so ask pmap to clean it explicitly...
905	 */
906	if (map->pmap) {
907		pmap_unmap_sharedpage(map->pmap);
908	}
909	/* ... and do not let regular pmap cleanup apply here */
910	flags |= VM_MAP_REMOVE_NO_PMAP_CLEANUP;
911#endif /* __ppc__ */
912	(void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
913			     flags, VM_MAP_NULL);
914	vm_map_unlock(map);
915
916	assert(map->hdr.nentries == 0);
917
918	if(map->pmap)
919		pmap_destroy(map->pmap);
920
921	zfree(vm_map_zone, map);
922}
923
924#if	TASK_SWAPPER
925/*
926 * vm_map_swapin/vm_map_swapout
927 *
928 * Swap a map in and out, either referencing or releasing its resources.
929 * These functions are internal use only; however, they must be exported
930 * because they may be called from macros, which are exported.
931 *
932 * In the case of swapout, there could be races on the residence count,
933 * so if the residence count is up, we return, assuming that a
934 * vm_map_deallocate() call in the near future will bring us back.
935 *
936 * Locking:
937 *	-- We use the map write lock for synchronization among races.
938 *	-- The map write lock, and not the simple s_lock, protects the
939 *	   swap state of the map.
940 *	-- If a map entry is a share map, then we hold both locks, in
941 *	   hierarchical order.
942 *
943 * Synchronization Notes:
944 *	1) If a vm_map_swapin() call happens while swapout in progress, it
945 *	will block on the map lock and proceed when swapout is through.
946 *	2) A vm_map_reference() call at this time is illegal, and will
947 *	cause a panic.  vm_map_reference() is only allowed on resident
948 *	maps, since it refuses to block.
949 *	3) A vm_map_swapin() call during a swapin will block, and
950 *	proceeed when the first swapin is done, turning into a nop.
951 *	This is the reason the res_count is not incremented until
952 *	after the swapin is complete.
953 *	4) There is a timing hole after the checks of the res_count, before
954 *	the map lock is taken, during which a swapin may get the lock
955 *	before a swapout about to happen.  If this happens, the swapin
956 *	will detect the state and increment the reference count, causing
957 *	the swapout to be a nop, thereby delaying it until a later
958 *	vm_map_deallocate.  If the swapout gets the lock first, then
959 *	the swapin will simply block until the swapout is done, and
960 *	then proceed.
961 *
962 * Because vm_map_swapin() is potentially an expensive operation, it
963 * should be used with caution.
964 *
965 * Invariants:
966 *	1) A map with a residence count of zero is either swapped, or
967 *	   being swapped.
968 *	2) A map with a non-zero residence count is either resident,
969 *	   or being swapped in.
970 */
971
972int vm_map_swap_enable = 1;
973
974void vm_map_swapin (vm_map_t map)
975{
976	register vm_map_entry_t entry;
977
978	if (!vm_map_swap_enable)	/* debug */
979		return;
980
981	/*
982	 * Map is locked
983	 * First deal with various races.
984	 */
985	if (map->sw_state == MAP_SW_IN)
986		/*
987		 * we raced with swapout and won.  Returning will incr.
988		 * the res_count, turning the swapout into a nop.
989		 */
990		return;
991
992	/*
993	 * The residence count must be zero.  If we raced with another
994	 * swapin, the state would have been IN; if we raced with a
995	 * swapout (after another competing swapin), we must have lost
996	 * the race to get here (see above comment), in which case
997	 * res_count is still 0.
998	 */
999	assert(map->res_count == 0);
1000
1001	/*
1002	 * There are no intermediate states of a map going out or
1003	 * coming in, since the map is locked during the transition.
1004	 */
1005	assert(map->sw_state == MAP_SW_OUT);
1006
1007	/*
1008	 * We now operate upon each map entry.  If the entry is a sub-
1009	 * or share-map, we call vm_map_res_reference upon it.
1010	 * If the entry is an object, we call vm_object_res_reference
1011	 * (this may iterate through the shadow chain).
1012	 * Note that we hold the map locked the entire time,
1013	 * even if we get back here via a recursive call in
1014	 * vm_map_res_reference.
1015	 */
1016	entry = vm_map_first_entry(map);
1017
1018	while (entry != vm_map_to_entry(map)) {
1019		if (entry->object.vm_object != VM_OBJECT_NULL) {
1020			if (entry->is_sub_map) {
1021				vm_map_t lmap = entry->object.sub_map;
1022				mutex_lock(&lmap->s_lock);
1023				vm_map_res_reference(lmap);
1024				mutex_unlock(&lmap->s_lock);
1025			} else {
1026				vm_object_t object = entry->object.vm_object;
1027				vm_object_lock(object);
1028				/*
1029				 * This call may iterate through the
1030				 * shadow chain.
1031				 */
1032				vm_object_res_reference(object);
1033				vm_object_unlock(object);
1034			}
1035		}
1036		entry = entry->vme_next;
1037	}
1038	assert(map->sw_state == MAP_SW_OUT);
1039	map->sw_state = MAP_SW_IN;
1040}
1041
1042void vm_map_swapout(vm_map_t map)
1043{
1044	register vm_map_entry_t entry;
1045
1046	/*
1047	 * Map is locked
1048	 * First deal with various races.
1049	 * If we raced with a swapin and lost, the residence count
1050	 * will have been incremented to 1, and we simply return.
1051	 */
1052	mutex_lock(&map->s_lock);
1053	if (map->res_count != 0) {
1054		mutex_unlock(&map->s_lock);
1055		return;
1056	}
1057	mutex_unlock(&map->s_lock);
1058
1059	/*
1060	 * There are no intermediate states of a map going out or
1061	 * coming in, since the map is locked during the transition.
1062	 */
1063	assert(map->sw_state == MAP_SW_IN);
1064
1065	if (!vm_map_swap_enable)
1066		return;
1067
1068	/*
1069	 * We now operate upon each map entry.  If the entry is a sub-
1070	 * or share-map, we call vm_map_res_deallocate upon it.
1071	 * If the entry is an object, we call vm_object_res_deallocate
1072	 * (this may iterate through the shadow chain).
1073	 * Note that we hold the map locked the entire time,
1074	 * even if we get back here via a recursive call in
1075	 * vm_map_res_deallocate.
1076	 */
1077	entry = vm_map_first_entry(map);
1078
1079	while (entry != vm_map_to_entry(map)) {
1080		if (entry->object.vm_object != VM_OBJECT_NULL) {
1081			if (entry->is_sub_map) {
1082				vm_map_t lmap = entry->object.sub_map;
1083				mutex_lock(&lmap->s_lock);
1084				vm_map_res_deallocate(lmap);
1085				mutex_unlock(&lmap->s_lock);
1086			} else {
1087				vm_object_t object = entry->object.vm_object;
1088				vm_object_lock(object);
1089				/*
1090				 * This call may take a long time,
1091				 * since it could actively push
1092				 * out pages (if we implement it
1093				 * that way).
1094				 */
1095				vm_object_res_deallocate(object);
1096				vm_object_unlock(object);
1097			}
1098		}
1099		entry = entry->vme_next;
1100	}
1101	assert(map->sw_state == MAP_SW_IN);
1102	map->sw_state = MAP_SW_OUT;
1103}
1104
1105#endif	/* TASK_SWAPPER */
1106
1107
1108/*
1109 *	SAVE_HINT_MAP_READ:
1110 *
1111 *	Saves the specified entry as the hint for
1112 *	future lookups.  only a read lock is held on map,
1113 * 	so make sure the store is atomic... OSCompareAndSwap
1114 *	guarantees this... also, we don't care if we collide
1115 *	and someone else wins and stores their 'hint'
1116 */
1117#define	SAVE_HINT_MAP_READ(map,value) \
1118	MACRO_BEGIN							\
1119	OSCompareAndSwap((UInt32)((map)->hint), (UInt32)value, (UInt32 *)(&(map)->hint)); \
1120	MACRO_END
1121
1122
1123/*
1124 *	SAVE_HINT_MAP_WRITE:
1125 *
1126 *	Saves the specified entry as the hint for
1127 *	future lookups.  write lock held on map,
1128 * 	so no one else can be writing or looking
1129 * 	until the lock is dropped, so it's safe
1130 * 	to just do an assignment
1131 */
1132#define	SAVE_HINT_MAP_WRITE(map,value) \
1133	MACRO_BEGIN		       \
1134	(map)->hint = (value);	       \
1135	MACRO_END
1136
1137/*
1138 *	vm_map_lookup_entry:	[ internal use only ]
1139 *
1140 *	Finds the map entry containing (or
1141 *	immediately preceding) the specified address
1142 *	in the given map; the entry is returned
1143 *	in the "entry" parameter.  The boolean
1144 *	result indicates whether the address is
1145 *	actually contained in the map.
1146 */
1147boolean_t
1148vm_map_lookup_entry(
1149	register vm_map_t		map,
1150	register vm_map_offset_t	address,
1151	vm_map_entry_t		*entry)		/* OUT */
1152{
1153	register vm_map_entry_t		cur;
1154	register vm_map_entry_t		last;
1155
1156	/*
1157	 *	Start looking either from the head of the
1158	 *	list, or from the hint.
1159	 */
1160	cur = map->hint;
1161
1162	if (cur == vm_map_to_entry(map))
1163		cur = cur->vme_next;
1164
1165	if (address >= cur->vme_start) {
1166		/*
1167		 *	Go from hint to end of list.
1168		 *
1169		 *	But first, make a quick check to see if
1170		 *	we are already looking at the entry we
1171		 *	want (which is usually the case).
1172		 *	Note also that we don't need to save the hint
1173		 *	here... it is the same hint (unless we are
1174		 *	at the header, in which case the hint didn't
1175		 *	buy us anything anyway).
1176		 */
1177		last = vm_map_to_entry(map);
1178		if ((cur != last) && (cur->vme_end > address)) {
1179			*entry = cur;
1180			return(TRUE);
1181		}
1182	}
1183	else {
1184		/*
1185		 *	Go from start to hint, *inclusively*
1186		 */
1187		last = cur->vme_next;
1188		cur = vm_map_first_entry(map);
1189	}
1190
1191	/*
1192	 *	Search linearly
1193	 */
1194
1195	while (cur != last) {
1196		if (cur->vme_end > address) {
1197			if (address >= cur->vme_start) {
1198				/*
1199				 *	Save this lookup for future
1200				 *	hints, and return
1201				 */
1202
1203				*entry = cur;
1204				SAVE_HINT_MAP_READ(map, cur);
1205
1206				return(TRUE);
1207			}
1208			break;
1209		}
1210		cur = cur->vme_next;
1211	}
1212	*entry = cur->vme_prev;
1213	SAVE_HINT_MAP_READ(map, *entry);
1214
1215	return(FALSE);
1216}
1217
1218/*
1219 *	Routine:	vm_map_find_space
1220 *	Purpose:
1221 *		Allocate a range in the specified virtual address map,
1222 *		returning the entry allocated for that range.
1223 *		Used by kmem_alloc, etc.
1224 *
1225 *		The map must be NOT be locked. It will be returned locked
1226 *		on KERN_SUCCESS, unlocked on failure.
1227 *
1228 *		If an entry is allocated, the object/offset fields
1229 *		are initialized to zero.
1230 */
1231kern_return_t
1232vm_map_find_space(
1233	register vm_map_t	map,
1234	vm_map_offset_t		*address,	/* OUT */
1235	vm_map_size_t		size,
1236	vm_map_offset_t		mask,
1237	int			flags,
1238	vm_map_entry_t		*o_entry)	/* OUT */
1239{
1240	register vm_map_entry_t	entry, new_entry;
1241	register vm_map_offset_t	start;
1242	register vm_map_offset_t	end;
1243
1244	if (size == 0) {
1245		*address = 0;
1246		return KERN_INVALID_ARGUMENT;
1247	}
1248
1249	if (flags & VM_FLAGS_GUARD_AFTER) {
1250		/* account for the back guard page in the size */
1251		size += PAGE_SIZE_64;
1252	}
1253
1254	new_entry = vm_map_entry_create(map);
1255
1256	/*
1257	 *	Look for the first possible address; if there's already
1258	 *	something at this address, we have to start after it.
1259	 */
1260
1261	vm_map_lock(map);
1262
1263	assert(first_free_is_valid(map));
1264	if ((entry = map->first_free) == vm_map_to_entry(map))
1265		start = map->min_offset;
1266	else
1267		start = entry->vme_end;
1268
1269	/*
1270	 *	In any case, the "entry" always precedes
1271	 *	the proposed new region throughout the loop:
1272	 */
1273
1274	while (TRUE) {
1275		register vm_map_entry_t	next;
1276
1277		/*
1278		 *	Find the end of the proposed new region.
1279		 *	Be sure we didn't go beyond the end, or
1280		 *	wrap around the address.
1281		 */
1282
1283		if (flags & VM_FLAGS_GUARD_BEFORE) {
1284			/* reserve space for the front guard page */
1285			start += PAGE_SIZE_64;
1286		}
1287		end = ((start + mask) & ~mask);
1288
1289		if (end < start) {
1290			vm_map_entry_dispose(map, new_entry);
1291			vm_map_unlock(map);
1292			return(KERN_NO_SPACE);
1293		}
1294		start = end;
1295		end += size;
1296
1297		if ((end > map->max_offset) || (end < start)) {
1298			vm_map_entry_dispose(map, new_entry);
1299			vm_map_unlock(map);
1300			return(KERN_NO_SPACE);
1301		}
1302
1303		/*
1304		 *	If there are no more entries, we must win.
1305		 */
1306
1307		next = entry->vme_next;
1308		if (next == vm_map_to_entry(map))
1309			break;
1310
1311		/*
1312		 *	If there is another entry, it must be
1313		 *	after the end of the potential new region.
1314		 */
1315
1316		if (next->vme_start >= end)
1317			break;
1318
1319		/*
1320		 *	Didn't fit -- move to the next entry.
1321		 */
1322
1323		entry = next;
1324		start = entry->vme_end;
1325	}
1326
1327	/*
1328	 *	At this point,
1329	 *		"start" and "end" should define the endpoints of the
1330	 *			available new range, and
1331	 *		"entry" should refer to the region before the new
1332	 *			range, and
1333	 *
1334	 *		the map should be locked.
1335	 */
1336
1337	if (flags & VM_FLAGS_GUARD_BEFORE) {
1338		/* go back for the front guard page */
1339		start -= PAGE_SIZE_64;
1340	}
1341	*address = start;
1342
1343	new_entry->vme_start = start;
1344	new_entry->vme_end = end;
1345	assert(page_aligned(new_entry->vme_start));
1346	assert(page_aligned(new_entry->vme_end));
1347
1348	new_entry->is_shared = FALSE;
1349	new_entry->is_sub_map = FALSE;
1350	new_entry->use_pmap = FALSE;
1351	new_entry->object.vm_object = VM_OBJECT_NULL;
1352	new_entry->offset = (vm_object_offset_t) 0;
1353
1354	new_entry->needs_copy = FALSE;
1355
1356	new_entry->inheritance = VM_INHERIT_DEFAULT;
1357	new_entry->protection = VM_PROT_DEFAULT;
1358	new_entry->max_protection = VM_PROT_ALL;
1359	new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1360	new_entry->wired_count = 0;
1361	new_entry->user_wired_count = 0;
1362
1363	new_entry->in_transition = FALSE;
1364	new_entry->needs_wakeup = FALSE;
1365	new_entry->no_cache = FALSE;
1366
1367	new_entry->alias = 0;
1368
1369	VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1370
1371	/*
1372	 *	Insert the new entry into the list
1373	 */
1374
1375	vm_map_entry_link(map, entry, new_entry);
1376
1377	map->size += size;
1378
1379	/*
1380	 *	Update the lookup hint
1381	 */
1382	SAVE_HINT_MAP_WRITE(map, new_entry);
1383
1384	*o_entry = new_entry;
1385	return(KERN_SUCCESS);
1386}
1387
1388int vm_map_pmap_enter_print = FALSE;
1389int vm_map_pmap_enter_enable = FALSE;
1390
1391/*
1392 *	Routine:	vm_map_pmap_enter [internal only]
1393 *
1394 *	Description:
1395 *		Force pages from the specified object to be entered into
1396 *		the pmap at the specified address if they are present.
1397 *		As soon as a page not found in the object the scan ends.
1398 *
1399 *	Returns:
1400 *		Nothing.
1401 *
1402 *	In/out conditions:
1403 *		The source map should not be locked on entry.
1404 */
1405static void
1406vm_map_pmap_enter(
1407	vm_map_t		map,
1408	register vm_map_offset_t 	addr,
1409	register vm_map_offset_t	end_addr,
1410	register vm_object_t 	object,
1411	vm_object_offset_t	offset,
1412	vm_prot_t		protection)
1413{
1414	int			type_of_fault;
1415	kern_return_t		kr;
1416
1417	if(map->pmap == 0)
1418		return;
1419
1420	while (addr < end_addr) {
1421		register vm_page_t	m;
1422
1423		vm_object_lock(object);
1424
1425		m = vm_page_lookup(object, offset);
1426		/*
1427		 * ENCRYPTED SWAP:
1428		 * The user should never see encrypted data, so do not
1429		 * enter an encrypted page in the page table.
1430		 */
1431		if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1432		    m->fictitious ||
1433		    (m->unusual && ( m->error || m->restart || m->absent))) {
1434			vm_object_unlock(object);
1435			return;
1436		}
1437
1438		if (vm_map_pmap_enter_print) {
1439			printf("vm_map_pmap_enter:");
1440			printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1441			       map, (unsigned long long)addr, object, (unsigned long long)offset);
1442		}
1443		type_of_fault = DBG_CACHE_HIT_FAULT;
1444		kr = vm_fault_enter(m, map->pmap, addr, protection,
1445				    m->wire_count != 0, FALSE, FALSE,
1446				    &type_of_fault);
1447
1448		vm_object_unlock(object);
1449
1450		offset += PAGE_SIZE_64;
1451		addr += PAGE_SIZE;
1452	}
1453}
1454
1455boolean_t vm_map_pmap_is_empty(
1456	vm_map_t	map,
1457	vm_map_offset_t	start,
1458	vm_map_offset_t end);
1459boolean_t vm_map_pmap_is_empty(
1460	vm_map_t	map,
1461	vm_map_offset_t	start,
1462	vm_map_offset_t	end)
1463{
1464#ifdef MACHINE_PMAP_IS_EMPTY
1465	return pmap_is_empty(map->pmap, start, end);
1466#else 	/* MACHINE_PMAP_IS_EMPTY */
1467	vm_map_offset_t	offset;
1468	ppnum_t		phys_page;
1469
1470	if (map->pmap == NULL) {
1471		return TRUE;
1472	}
1473
1474	for (offset = start;
1475	     offset < end;
1476	     offset += PAGE_SIZE) {
1477		phys_page = pmap_find_phys(map->pmap, offset);
1478		if (phys_page) {
1479			kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1480				"page %d at 0x%llx\n",
1481				map, (long long)start, (long long)end,
1482				phys_page, (long long)offset);
1483			return FALSE;
1484		}
1485	}
1486	return TRUE;
1487#endif	/* MACHINE_PMAP_IS_EMPTY */
1488}
1489
1490/*
1491 *	Routine:	vm_map_enter
1492 *
1493 *	Description:
1494 *		Allocate a range in the specified virtual address map.
1495 *		The resulting range will refer to memory defined by
1496 *		the given memory object and offset into that object.
1497 *
1498 *		Arguments are as defined in the vm_map call.
1499 */
1500int _map_enter_debug = 0;
1501static unsigned int vm_map_enter_restore_successes = 0;
1502static unsigned int vm_map_enter_restore_failures = 0;
1503kern_return_t
1504vm_map_enter(
1505	vm_map_t		map,
1506	vm_map_offset_t		*address,	/* IN/OUT */
1507	vm_map_size_t		size,
1508	vm_map_offset_t		mask,
1509	int			flags,
1510	vm_object_t		object,
1511	vm_object_offset_t	offset,
1512	boolean_t		needs_copy,
1513	vm_prot_t		cur_protection,
1514	vm_prot_t		max_protection,
1515	vm_inherit_t		inheritance)
1516{
1517	vm_map_entry_t		entry, new_entry;
1518	vm_map_offset_t		start, tmp_start, tmp_offset;
1519	vm_map_offset_t		end, tmp_end;
1520	kern_return_t		result = KERN_SUCCESS;
1521	vm_map_t		zap_old_map = VM_MAP_NULL;
1522	vm_map_t		zap_new_map = VM_MAP_NULL;
1523	boolean_t		map_locked = FALSE;
1524	boolean_t		pmap_empty = TRUE;
1525	boolean_t		new_mapping_established = FALSE;
1526	boolean_t		anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1527	boolean_t		purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1528	boolean_t		overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1529	boolean_t		no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1530	boolean_t		is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1531	char			alias;
1532	vm_map_offset_t		effective_min_offset, effective_max_offset;
1533	kern_return_t		kr;
1534
1535#if CONFIG_EMBEDDED
1536	if (cur_protection & VM_PROT_WRITE) {
1537		if (cur_protection & VM_PROT_EXECUTE) {
1538			printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__);
1539			cur_protection &= ~VM_PROT_EXECUTE;
1540		}
1541	}
1542	if (max_protection & VM_PROT_WRITE) {
1543		if (max_protection & VM_PROT_EXECUTE) {
1544			/* Right now all kinds of data segments are RWX. No point in logging that. */
1545			/* printf("EMBEDDED: %s maxprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__); */
1546
1547			/* Try to take a hint from curprot. If curprot is not writable,
1548			 * make maxprot not writable. Otherwise make it not executable.
1549			 */
1550			if((cur_protection & VM_PROT_WRITE) == 0) {
1551				max_protection &= ~VM_PROT_WRITE;
1552			} else {
1553				max_protection &= ~VM_PROT_EXECUTE;
1554			}
1555		}
1556	}
1557	assert ((cur_protection | max_protection) == max_protection);
1558#endif /* CONFIG_EMBEDDED */
1559
1560	if (is_submap) {
1561		if (purgable) {
1562			/* submaps can not be purgeable */
1563			return KERN_INVALID_ARGUMENT;
1564		}
1565		if (object == VM_OBJECT_NULL) {
1566			/* submaps can not be created lazily */
1567			return KERN_INVALID_ARGUMENT;
1568		}
1569	}
1570	if (flags & VM_FLAGS_ALREADY) {
1571		/*
1572		 * VM_FLAGS_ALREADY says that it's OK if the same mapping
1573		 * is already present.  For it to be meaningul, the requested
1574		 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1575		 * we shouldn't try and remove what was mapped there first
1576		 * (!VM_FLAGS_OVERWRITE).
1577		 */
1578		if ((flags & VM_FLAGS_ANYWHERE) ||
1579		    (flags & VM_FLAGS_OVERWRITE)) {
1580			return KERN_INVALID_ARGUMENT;
1581		}
1582	}
1583
1584	effective_min_offset = map->min_offset;
1585	if (flags & VM_FLAGS_BEYOND_MAX) {
1586		/*
1587		 * Allow an insertion beyond the map's official top boundary.
1588		 */
1589		if (vm_map_is_64bit(map))
1590			effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1591		else
1592			effective_max_offset = 0x00000000FFFFF000ULL;
1593	} else {
1594		effective_max_offset = map->max_offset;
1595	}
1596
1597	if (size == 0 ||
1598	    (offset & PAGE_MASK_64) != 0) {
1599		*address = 0;
1600		return KERN_INVALID_ARGUMENT;
1601	}
1602
1603	VM_GET_FLAGS_ALIAS(flags, alias);
1604
1605#define	RETURN(value)	{ result = value; goto BailOut; }
1606
1607	assert(page_aligned(*address));
1608	assert(page_aligned(size));
1609
1610	/*
1611	 * Only zero-fill objects are allowed to be purgable.
1612	 * LP64todo - limit purgable objects to 32-bits for now
1613	 */
1614	if (purgable &&
1615	    (offset != 0 ||
1616	     (object != VM_OBJECT_NULL &&
1617	      (object->size != size ||
1618	       object->purgable == VM_PURGABLE_DENY))
1619	     || size > VM_MAX_ADDRESS)) /* LP64todo: remove when dp capable */
1620		return KERN_INVALID_ARGUMENT;
1621
1622	if (!anywhere && overwrite) {
1623		/*
1624		 * Create a temporary VM map to hold the old mappings in the
1625		 * affected area while we create the new one.
1626		 * This avoids releasing the VM map lock in
1627		 * vm_map_entry_delete() and allows atomicity
1628		 * when we want to replace some mappings with a new one.
1629		 * It also allows us to restore the old VM mappings if the
1630		 * new mapping fails.
1631		 */
1632		zap_old_map = vm_map_create(PMAP_NULL,
1633					    *address,
1634					    *address + size,
1635					    TRUE);
1636	}
1637
1638StartAgain: ;
1639
1640	start = *address;
1641
1642	if (anywhere) {
1643		vm_map_lock(map);
1644		map_locked = TRUE;
1645
1646		/*
1647		 *	Calculate the first possible address.
1648		 */
1649
1650		if (start < effective_min_offset)
1651			start = effective_min_offset;
1652		if (start > effective_max_offset)
1653			RETURN(KERN_NO_SPACE);
1654
1655		/*
1656		 *	Look for the first possible address;
1657		 *	if there's already something at this
1658		 *	address, we have to start after it.
1659		 */
1660
1661		assert(first_free_is_valid(map));
1662		if (start == effective_min_offset) {
1663			if ((entry = map->first_free) != vm_map_to_entry(map))
1664				start = entry->vme_end;
1665		} else {
1666			vm_map_entry_t	tmp_entry;
1667			if (vm_map_lookup_entry(map, start, &tmp_entry))
1668				start = tmp_entry->vme_end;
1669			entry = tmp_entry;
1670		}
1671
1672		/*
1673		 *	In any case, the "entry" always precedes
1674		 *	the proposed new region throughout the
1675		 *	loop:
1676		 */
1677
1678		while (TRUE) {
1679			register vm_map_entry_t	next;
1680
1681			/*
1682			 *	Find the end of the proposed new region.
1683			 *	Be sure we didn't go beyond the end, or
1684			 *	wrap around the address.
1685			 */
1686
1687			end = ((start + mask) & ~mask);
1688			if (end < start)
1689				RETURN(KERN_NO_SPACE);
1690			start = end;
1691			end += size;
1692
1693			if ((end > effective_max_offset) || (end < start)) {
1694				if (map->wait_for_space) {
1695					if (size <= (effective_max_offset -
1696						     effective_min_offset)) {
1697						assert_wait((event_t)map,
1698							    THREAD_ABORTSAFE);
1699						vm_map_unlock(map);
1700						map_locked = FALSE;
1701						thread_block(THREAD_CONTINUE_NULL);
1702						goto StartAgain;
1703					}
1704				}
1705				RETURN(KERN_NO_SPACE);
1706			}
1707
1708			/*
1709			 *	If there are no more entries, we must win.
1710			 */
1711
1712			next = entry->vme_next;
1713			if (next == vm_map_to_entry(map))
1714				break;
1715
1716			/*
1717			 *	If there is another entry, it must be
1718			 *	after the end of the potential new region.
1719			 */
1720
1721			if (next->vme_start >= end)
1722				break;
1723
1724			/*
1725			 *	Didn't fit -- move to the next entry.
1726			 */
1727
1728			entry = next;
1729			start = entry->vme_end;
1730		}
1731		*address = start;
1732	} else {
1733		/*
1734		 *	Verify that:
1735		 *		the address doesn't itself violate
1736		 *		the mask requirement.
1737		 */
1738
1739		vm_map_lock(map);
1740		map_locked = TRUE;
1741		if ((start & mask) != 0)
1742			RETURN(KERN_NO_SPACE);
1743
1744		/*
1745		 *	...	the address is within bounds
1746		 */
1747
1748		end = start + size;
1749
1750		if ((start < effective_min_offset) ||
1751		    (end > effective_max_offset) ||
1752		    (start >= end)) {
1753			RETURN(KERN_INVALID_ADDRESS);
1754		}
1755
1756		if (overwrite && zap_old_map != VM_MAP_NULL) {
1757			/*
1758			 * Fixed mapping and "overwrite" flag: attempt to
1759			 * remove all existing mappings in the specified
1760			 * address range, saving them in our "zap_old_map".
1761			 */
1762			(void) vm_map_delete(map, start, end,
1763					     VM_MAP_REMOVE_SAVE_ENTRIES,
1764					     zap_old_map);
1765		}
1766
1767		/*
1768		 *	...	the starting address isn't allocated
1769		 */
1770
1771		if (vm_map_lookup_entry(map, start, &entry)) {
1772			if (! (flags & VM_FLAGS_ALREADY)) {
1773				RETURN(KERN_NO_SPACE);
1774			}
1775			/*
1776			 * Check if what's already there is what we want.
1777			 */
1778			tmp_start = start;
1779			tmp_offset = offset;
1780			if (entry->vme_start < start) {
1781				tmp_start -= start - entry->vme_start;
1782				tmp_offset -= start - entry->vme_start;
1783
1784			}
1785			for (; entry->vme_start < end;
1786			     entry = entry->vme_next) {
1787				/*
1788				 * Check if the mapping's attributes
1789				 * match the existing map entry.
1790				 */
1791				if (entry == vm_map_to_entry(map) ||
1792				    entry->vme_start != tmp_start ||
1793				    entry->is_sub_map != is_submap ||
1794				    entry->offset != tmp_offset ||
1795				    entry->needs_copy != needs_copy ||
1796				    entry->protection != cur_protection ||
1797				    entry->max_protection != max_protection ||
1798				    entry->inheritance != inheritance ||
1799				    entry->alias != alias) {
1800					/* not the same mapping ! */
1801					RETURN(KERN_NO_SPACE);
1802				}
1803				/*
1804				 * Check if the same object is being mapped.
1805				 */
1806				if (is_submap) {
1807					if (entry->object.sub_map !=
1808					    (vm_map_t) object) {
1809						/* not the same submap */
1810						RETURN(KERN_NO_SPACE);
1811					}
1812				} else {
1813					if (entry->object.vm_object != object) {
1814						/* not the same VM object... */
1815						vm_object_t obj2;
1816
1817						obj2 = entry->object.vm_object;
1818						if ((obj2 == VM_OBJECT_NULL ||
1819						     obj2->internal) &&
1820						    (object == VM_OBJECT_NULL ||
1821						     object->internal)) {
1822							/*
1823							 * ... but both are
1824							 * anonymous memory,
1825							 * so equivalent.
1826							 */
1827						} else {
1828							RETURN(KERN_NO_SPACE);
1829						}
1830					}
1831				}
1832
1833				tmp_offset += entry->vme_end - entry->vme_start;
1834				tmp_start += entry->vme_end - entry->vme_start;
1835				if (entry->vme_end >= end) {
1836					/* reached the end of our mapping */
1837					break;
1838				}
1839			}
1840			/* it all matches:  let's use what's already there ! */
1841			RETURN(KERN_MEMORY_PRESENT);
1842		}
1843
1844		/*
1845		 *	...	the next region doesn't overlap the
1846		 *		end point.
1847		 */
1848
1849		if ((entry->vme_next != vm_map_to_entry(map)) &&
1850		    (entry->vme_next->vme_start < end))
1851			RETURN(KERN_NO_SPACE);
1852	}
1853
1854	/*
1855	 *	At this point,
1856	 *		"start" and "end" should define the endpoints of the
1857	 *			available new range, and
1858	 *		"entry" should refer to the region before the new
1859	 *			range, and
1860	 *
1861	 *		the map should be locked.
1862	 */
1863
1864	/*
1865	 *	See whether we can avoid creating a new entry (and object) by
1866	 *	extending one of our neighbors.  [So far, we only attempt to
1867	 *	extend from below.]  Note that we can never extend/join
1868	 *	purgable objects because they need to remain distinct
1869	 *	entities in order to implement their "volatile object"
1870	 *	semantics.
1871	 */
1872
1873	if (purgable) {
1874		if (object == VM_OBJECT_NULL) {
1875			object = vm_object_allocate(size);
1876			object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1877			object->purgable = VM_PURGABLE_NONVOLATILE;
1878			offset = (vm_object_offset_t)0;
1879		}
1880	} else if ((is_submap == FALSE) &&
1881		   (object == VM_OBJECT_NULL) &&
1882		   (entry != vm_map_to_entry(map)) &&
1883		   (entry->vme_end == start) &&
1884		   (!entry->is_shared) &&
1885		   (!entry->is_sub_map) &&
1886		   (entry->alias == alias) &&
1887		   (entry->inheritance == inheritance) &&
1888		   (entry->protection == cur_protection) &&
1889		   (entry->max_protection == max_protection) &&
1890		   (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
1891		   (entry->in_transition == 0) &&
1892		   (entry->no_cache == no_cache) &&
1893		   ((alias == VM_MEMORY_REALLOC) ||
1894		    ((entry->vme_end - entry->vme_start) + size < NO_COALESCE_LIMIT)) &&
1895		   (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
1896		if (vm_object_coalesce(entry->object.vm_object,
1897				       VM_OBJECT_NULL,
1898				       entry->offset,
1899				       (vm_object_offset_t) 0,
1900				       (vm_map_size_t)(entry->vme_end - entry->vme_start),
1901				       (vm_map_size_t)(end - entry->vme_end))) {
1902
1903			/*
1904			 *	Coalesced the two objects - can extend
1905			 *	the previous map entry to include the
1906			 *	new range.
1907			 */
1908			map->size += (end - entry->vme_end);
1909			entry->vme_end = end;
1910			UPDATE_FIRST_FREE(map, map->first_free);
1911			RETURN(KERN_SUCCESS);
1912		}
1913	}
1914
1915	/*
1916	 *	Create a new entry
1917	 *	LP64todo - for now, we can only allocate 4GB internal objects
1918	 *	because the default pager can't page bigger ones.  Remove this
1919	 *	when it can.
1920	 *
1921	 * XXX FBDP
1922	 * The reserved "page zero" in each process's address space can
1923	 * be arbitrarily large.  Splitting it into separate 4GB objects and
1924	 * therefore different VM map entries serves no purpose and just
1925	 * slows down operations on the VM map, so let's not split the
1926	 * allocation into 4GB chunks if the max protection is NONE.  That
1927	 * memory should never be accessible, so it will never get to the
1928	 * default pager.
1929	 */
1930	tmp_start = start;
1931	if (object == VM_OBJECT_NULL &&
1932	    size > (vm_map_size_t)VM_MAX_ADDRESS &&
1933	    max_protection != VM_PROT_NONE)
1934		tmp_end = tmp_start + (vm_map_size_t)VM_MAX_ADDRESS;
1935	else
1936		tmp_end = end;
1937	do {
1938		new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
1939						object,	offset, needs_copy,
1940						FALSE, FALSE,
1941						cur_protection, max_protection,
1942						VM_BEHAVIOR_DEFAULT,
1943						inheritance, 0, no_cache);
1944		new_entry->alias = alias;
1945		if (is_submap) {
1946			vm_map_t	submap;
1947			boolean_t	submap_is_64bit;
1948			boolean_t	use_pmap;
1949
1950			new_entry->is_sub_map = TRUE;
1951			submap = (vm_map_t) object;
1952			submap_is_64bit = vm_map_is_64bit(submap);
1953			use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
1954#ifndef NO_NESTED_PMAP
1955			if (use_pmap && submap->pmap == NULL) {
1956				/* we need a sub pmap to nest... */
1957				submap->pmap = pmap_create(0, submap_is_64bit);
1958				if (submap->pmap == NULL) {
1959					/* let's proceed without nesting... */
1960				}
1961			}
1962			if (use_pmap && submap->pmap != NULL) {
1963				kr = pmap_nest(map->pmap,
1964					       submap->pmap,
1965					       tmp_start,
1966					       tmp_start,
1967					       tmp_end - tmp_start);
1968				if (kr != KERN_SUCCESS) {
1969					printf("vm_map_enter: "
1970					       "pmap_nest(0x%llx,0x%llx) "
1971					       "error 0x%x\n",
1972					       (long long)tmp_start,
1973					       (long long)tmp_end,
1974					       kr);
1975				} else {
1976					/* we're now nested ! */
1977					new_entry->use_pmap = TRUE;
1978					pmap_empty = FALSE;
1979				}
1980			}
1981#endif /* NO_NESTED_PMAP */
1982		}
1983		entry = new_entry;
1984	} while (tmp_end != end &&
1985		 (tmp_start = tmp_end) &&
1986		 (tmp_end = (end - tmp_end > (vm_map_size_t)VM_MAX_ADDRESS) ?
1987		  tmp_end + (vm_map_size_t)VM_MAX_ADDRESS : end));
1988
1989	vm_map_unlock(map);
1990	map_locked = FALSE;
1991
1992	new_mapping_established = TRUE;
1993
1994	/*	Wire down the new entry if the user
1995	 *	requested all new map entries be wired.
1996	 */
1997	if (map->wiring_required) {
1998		pmap_empty = FALSE; /* pmap won't be empty */
1999		result = vm_map_wire(map, start, end,
2000				     new_entry->protection, TRUE);
2001		RETURN(result);
2002	}
2003
2004	if ((object != VM_OBJECT_NULL) &&
2005	    (vm_map_pmap_enter_enable) &&
2006	    (!anywhere)	 &&
2007	    (!needs_copy) &&
2008	    (size < (128*1024))) {
2009		pmap_empty = FALSE; /* pmap won't be empty */
2010
2011		if (override_nx(map, alias) && cur_protection)
2012		        cur_protection |= VM_PROT_EXECUTE;
2013
2014		vm_map_pmap_enter(map, start, end,
2015				  object, offset, cur_protection);
2016	}
2017
2018BailOut: ;
2019	if (result == KERN_SUCCESS) {
2020		vm_prot_t pager_prot;
2021		memory_object_t pager;
2022
2023		if (pmap_empty &&
2024		    !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2025			assert(vm_map_pmap_is_empty(map,
2026						    *address,
2027						    *address+size));
2028		}
2029
2030		/*
2031		 * For "named" VM objects, let the pager know that the
2032		 * memory object is being mapped.  Some pagers need to keep
2033		 * track of this, to know when they can reclaim the memory
2034		 * object, for example.
2035		 * VM calls memory_object_map() for each mapping (specifying
2036		 * the protection of each mapping) and calls
2037		 * memory_object_last_unmap() when all the mappings are gone.
2038		 */
2039		pager_prot = max_protection;
2040		if (needs_copy) {
2041			/*
2042			 * Copy-On-Write mapping: won't modify
2043			 * the memory object.
2044			 */
2045			pager_prot &= ~VM_PROT_WRITE;
2046		}
2047		if (!is_submap &&
2048		    object != VM_OBJECT_NULL &&
2049		    object->named &&
2050		    object->pager != MEMORY_OBJECT_NULL) {
2051			vm_object_lock(object);
2052			pager = object->pager;
2053			if (object->named &&
2054			    pager != MEMORY_OBJECT_NULL) {
2055				assert(object->pager_ready);
2056				vm_object_mapping_wait(object, THREAD_UNINT);
2057				vm_object_mapping_begin(object);
2058				vm_object_unlock(object);
2059
2060				kr = memory_object_map(pager, pager_prot);
2061				assert(kr == KERN_SUCCESS);
2062
2063				vm_object_lock(object);
2064				vm_object_mapping_end(object);
2065			}
2066			vm_object_unlock(object);
2067		}
2068	} else {
2069		if (new_mapping_established) {
2070			/*
2071			 * We have to get rid of the new mappings since we
2072			 * won't make them available to the user.
2073			 * Try and do that atomically, to minimize the risk
2074			 * that someone else create new mappings that range.
2075			 */
2076			zap_new_map = vm_map_create(PMAP_NULL,
2077						    *address,
2078						    *address + size,
2079						    TRUE);
2080			if (!map_locked) {
2081				vm_map_lock(map);
2082				map_locked = TRUE;
2083			}
2084			(void) vm_map_delete(map, *address, *address+size,
2085					     VM_MAP_REMOVE_SAVE_ENTRIES,
2086					     zap_new_map);
2087		}
2088		if (zap_old_map != VM_MAP_NULL &&
2089		    zap_old_map->hdr.nentries != 0) {
2090			vm_map_entry_t	entry1, entry2;
2091
2092			/*
2093			 * The new mapping failed.  Attempt to restore
2094			 * the old mappings, saved in the "zap_old_map".
2095			 */
2096			if (!map_locked) {
2097				vm_map_lock(map);
2098				map_locked = TRUE;
2099			}
2100
2101			/* first check if the coast is still clear */
2102			start = vm_map_first_entry(zap_old_map)->vme_start;
2103			end = vm_map_last_entry(zap_old_map)->vme_end;
2104			if (vm_map_lookup_entry(map, start, &entry1) ||
2105			    vm_map_lookup_entry(map, end, &entry2) ||
2106			    entry1 != entry2) {
2107				/*
2108				 * Part of that range has already been
2109				 * re-mapped:  we can't restore the old
2110				 * mappings...
2111				 */
2112				vm_map_enter_restore_failures++;
2113			} else {
2114				/*
2115				 * Transfer the saved map entries from
2116				 * "zap_old_map" to the original "map",
2117				 * inserting them all after "entry1".
2118				 */
2119				for (entry2 = vm_map_first_entry(zap_old_map);
2120				     entry2 != vm_map_to_entry(zap_old_map);
2121				     entry2 = vm_map_first_entry(zap_old_map)) {
2122					vm_map_size_t entry_size;
2123
2124					entry_size = (entry2->vme_end -
2125						      entry2->vme_start);
2126					vm_map_entry_unlink(zap_old_map,
2127							    entry2);
2128					zap_old_map->size -= entry_size;
2129					vm_map_entry_link(map, entry1, entry2);
2130					map->size += entry_size;
2131					entry1 = entry2;
2132				}
2133				if (map->wiring_required) {
2134					/*
2135					 * XXX TODO: we should rewire the
2136					 * old pages here...
2137					 */
2138				}
2139				vm_map_enter_restore_successes++;
2140			}
2141		}
2142	}
2143
2144	if (map_locked) {
2145		vm_map_unlock(map);
2146	}
2147
2148	/*
2149	 * Get rid of the "zap_maps" and all the map entries that
2150	 * they may still contain.
2151	 */
2152	if (zap_old_map != VM_MAP_NULL) {
2153		vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2154		zap_old_map = VM_MAP_NULL;
2155	}
2156	if (zap_new_map != VM_MAP_NULL) {
2157		vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2158		zap_new_map = VM_MAP_NULL;
2159	}
2160
2161	return result;
2162
2163#undef	RETURN
2164}
2165
2166kern_return_t
2167vm_map_enter_mem_object(
2168	vm_map_t		target_map,
2169	vm_map_offset_t		*address,
2170	vm_map_size_t		initial_size,
2171	vm_map_offset_t		mask,
2172	int			flags,
2173	ipc_port_t		port,
2174	vm_object_offset_t	offset,
2175	boolean_t		copy,
2176	vm_prot_t		cur_protection,
2177	vm_prot_t		max_protection,
2178	vm_inherit_t		inheritance)
2179{
2180	vm_map_address_t	map_addr;
2181	vm_map_size_t		map_size;
2182	vm_object_t		object;
2183	vm_object_size_t	size;
2184	kern_return_t		result;
2185
2186	/*
2187	 * Check arguments for validity
2188	 */
2189	if ((target_map == VM_MAP_NULL) ||
2190	    (cur_protection & ~VM_PROT_ALL) ||
2191	    (max_protection & ~VM_PROT_ALL) ||
2192	    (inheritance > VM_INHERIT_LAST_VALID) ||
2193	    initial_size == 0)
2194		return KERN_INVALID_ARGUMENT;
2195
2196	map_addr = vm_map_trunc_page(*address);
2197	map_size = vm_map_round_page(initial_size);
2198	size = vm_object_round_page(initial_size);
2199
2200	/*
2201	 * Find the vm object (if any) corresponding to this port.
2202	 */
2203	if (!IP_VALID(port)) {
2204		object = VM_OBJECT_NULL;
2205		offset = 0;
2206		copy = FALSE;
2207	} else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2208		vm_named_entry_t	named_entry;
2209
2210		named_entry = (vm_named_entry_t) port->ip_kobject;
2211		/* a few checks to make sure user is obeying rules */
2212		if (size == 0) {
2213			if (offset >= named_entry->size)
2214				return KERN_INVALID_RIGHT;
2215			size = named_entry->size - offset;
2216		}
2217		if ((named_entry->protection & max_protection) !=
2218		    max_protection)
2219			return KERN_INVALID_RIGHT;
2220		if ((named_entry->protection & cur_protection) !=
2221		    cur_protection)
2222			return KERN_INVALID_RIGHT;
2223		if (named_entry->size < (offset + size))
2224			return KERN_INVALID_ARGUMENT;
2225
2226		/* the callers parameter offset is defined to be the */
2227		/* offset from beginning of named entry offset in object */
2228		offset = offset + named_entry->offset;
2229
2230		named_entry_lock(named_entry);
2231		if (named_entry->is_sub_map) {
2232			vm_map_t		submap;
2233
2234			submap = named_entry->backing.map;
2235			vm_map_lock(submap);
2236			vm_map_reference(submap);
2237			vm_map_unlock(submap);
2238			named_entry_unlock(named_entry);
2239
2240			result = vm_map_enter(target_map,
2241					      &map_addr,
2242					      map_size,
2243					      mask,
2244					      flags | VM_FLAGS_SUBMAP,
2245					      (vm_object_t) submap,
2246					      offset,
2247					      copy,
2248					      cur_protection,
2249					      max_protection,
2250					      inheritance);
2251			if (result != KERN_SUCCESS) {
2252				vm_map_deallocate(submap);
2253			} else {
2254				/*
2255				 * No need to lock "submap" just to check its
2256				 * "mapped" flag: that flag is never reset
2257				 * once it's been set and if we race, we'll
2258				 * just end up setting it twice, which is OK.
2259				 */
2260				if (submap->mapped == FALSE) {
2261					/*
2262					 * This submap has never been mapped.
2263					 * Set its "mapped" flag now that it
2264					 * has been mapped.
2265					 * This happens only for the first ever
2266					 * mapping of a "submap".
2267					 */
2268					vm_map_lock(submap);
2269					submap->mapped = TRUE;
2270					vm_map_unlock(submap);
2271				}
2272				*address = map_addr;
2273			}
2274			return result;
2275
2276		} else if (named_entry->is_pager) {
2277			unsigned int	access;
2278			vm_prot_t	protections;
2279			unsigned int	wimg_mode;
2280			boolean_t	cache_attr;
2281
2282			protections = named_entry->protection & VM_PROT_ALL;
2283			access = GET_MAP_MEM(named_entry->protection);
2284
2285			object = vm_object_enter(named_entry->backing.pager,
2286						 named_entry->size,
2287						 named_entry->internal,
2288						 FALSE,
2289						 FALSE);
2290			if (object == VM_OBJECT_NULL) {
2291				named_entry_unlock(named_entry);
2292				return KERN_INVALID_OBJECT;
2293			}
2294
2295			/* JMM - drop reference on pager here */
2296
2297			/* create an extra ref for the named entry */
2298			vm_object_lock(object);
2299			vm_object_reference_locked(object);
2300			named_entry->backing.object = object;
2301			named_entry->is_pager = FALSE;
2302			named_entry_unlock(named_entry);
2303
2304			wimg_mode = object->wimg_bits;
2305			if (access == MAP_MEM_IO) {
2306				wimg_mode = VM_WIMG_IO;
2307			} else if (access == MAP_MEM_COPYBACK) {
2308				wimg_mode = VM_WIMG_USE_DEFAULT;
2309			} else if (access == MAP_MEM_WTHRU) {
2310				wimg_mode = VM_WIMG_WTHRU;
2311			} else if (access == MAP_MEM_WCOMB) {
2312				wimg_mode = VM_WIMG_WCOMB;
2313			}
2314			if (wimg_mode == VM_WIMG_IO ||
2315			    wimg_mode == VM_WIMG_WCOMB)
2316				cache_attr = TRUE;
2317			else
2318				cache_attr = FALSE;
2319
2320			/* wait for object (if any) to be ready */
2321			if (!named_entry->internal) {
2322				while (!object->pager_ready) {
2323					vm_object_wait(
2324						object,
2325						VM_OBJECT_EVENT_PAGER_READY,
2326						THREAD_UNINT);
2327					vm_object_lock(object);
2328				}
2329			}
2330
2331			if (object->wimg_bits != wimg_mode) {
2332				vm_page_t p;
2333
2334				vm_object_paging_wait(object, THREAD_UNINT);
2335
2336				object->wimg_bits = wimg_mode;
2337				queue_iterate(&object->memq, p, vm_page_t, listq) {
2338					if (!p->fictitious) {
2339					        if (p->pmapped)
2340						        pmap_disconnect(p->phys_page);
2341						if (cache_attr)
2342						        pmap_sync_page_attributes_phys(p->phys_page);
2343					}
2344				}
2345			}
2346			object->true_share = TRUE;
2347			if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2348				object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2349			vm_object_unlock(object);
2350		} else {
2351			/* This is the case where we are going to map */
2352			/* an already mapped object.  If the object is */
2353			/* not ready it is internal.  An external     */
2354			/* object cannot be mapped until it is ready  */
2355			/* we can therefore avoid the ready check     */
2356			/* in this case.  */
2357			object = named_entry->backing.object;
2358			assert(object != VM_OBJECT_NULL);
2359			named_entry_unlock(named_entry);
2360			vm_object_reference(object);
2361		}
2362	} else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2363		/*
2364		 * JMM - This is temporary until we unify named entries
2365		 * and raw memory objects.
2366		 *
2367		 * Detected fake ip_kotype for a memory object.  In
2368		 * this case, the port isn't really a port at all, but
2369		 * instead is just a raw memory object.
2370		 */
2371
2372		object = vm_object_enter((memory_object_t)port,
2373					 size, FALSE, FALSE, FALSE);
2374		if (object == VM_OBJECT_NULL)
2375			return KERN_INVALID_OBJECT;
2376
2377		/* wait for object (if any) to be ready */
2378		if (object != VM_OBJECT_NULL) {
2379			if (object == kernel_object) {
2380				printf("Warning: Attempt to map kernel object"
2381					" by a non-private kernel entity\n");
2382				return KERN_INVALID_OBJECT;
2383			}
2384			vm_object_lock(object);
2385			while (!object->pager_ready) {
2386				vm_object_wait(object,
2387					       VM_OBJECT_EVENT_PAGER_READY,
2388					       THREAD_UNINT);
2389				vm_object_lock(object);
2390			}
2391			vm_object_unlock(object);
2392		}
2393	} else {
2394		return KERN_INVALID_OBJECT;
2395	}
2396
2397	if (object != VM_OBJECT_NULL &&
2398	    object->named &&
2399	    object->pager != MEMORY_OBJECT_NULL &&
2400	    object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2401		memory_object_t pager;
2402		vm_prot_t	pager_prot;
2403		kern_return_t	kr;
2404
2405		/*
2406		 * For "named" VM objects, let the pager know that the
2407		 * memory object is being mapped.  Some pagers need to keep
2408		 * track of this, to know when they can reclaim the memory
2409		 * object, for example.
2410		 * VM calls memory_object_map() for each mapping (specifying
2411		 * the protection of each mapping) and calls
2412		 * memory_object_last_unmap() when all the mappings are gone.
2413		 */
2414		pager_prot = max_protection;
2415		if (copy) {
2416			/*
2417			 * Copy-On-Write mapping: won't modify the
2418			 * memory object.
2419			 */
2420			pager_prot &= ~VM_PROT_WRITE;
2421		}
2422		vm_object_lock(object);
2423		pager = object->pager;
2424		if (object->named &&
2425		    pager != MEMORY_OBJECT_NULL &&
2426		    object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2427			assert(object->pager_ready);
2428			vm_object_mapping_wait(object, THREAD_UNINT);
2429			vm_object_mapping_begin(object);
2430			vm_object_unlock(object);
2431
2432			kr = memory_object_map(pager, pager_prot);
2433			assert(kr == KERN_SUCCESS);
2434
2435			vm_object_lock(object);
2436			vm_object_mapping_end(object);
2437		}
2438		vm_object_unlock(object);
2439	}
2440
2441	/*
2442	 *	Perform the copy if requested
2443	 */
2444
2445	if (copy) {
2446		vm_object_t		new_object;
2447		vm_object_offset_t	new_offset;
2448
2449		result = vm_object_copy_strategically(object, offset, size,
2450						      &new_object, &new_offset,
2451						      &copy);
2452
2453
2454		if (result == KERN_MEMORY_RESTART_COPY) {
2455			boolean_t success;
2456			boolean_t src_needs_copy;
2457
2458			/*
2459			 * XXX
2460			 * We currently ignore src_needs_copy.
2461			 * This really is the issue of how to make
2462			 * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2463			 * non-kernel users to use. Solution forthcoming.
2464			 * In the meantime, since we don't allow non-kernel
2465			 * memory managers to specify symmetric copy,
2466			 * we won't run into problems here.
2467			 */
2468			new_object = object;
2469			new_offset = offset;
2470			success = vm_object_copy_quickly(&new_object,
2471							 new_offset, size,
2472							 &src_needs_copy,
2473							 &copy);
2474			assert(success);
2475			result = KERN_SUCCESS;
2476		}
2477		/*
2478		 *	Throw away the reference to the
2479		 *	original object, as it won't be mapped.
2480		 */
2481
2482		vm_object_deallocate(object);
2483
2484		if (result != KERN_SUCCESS)
2485			return result;
2486
2487		object = new_object;
2488		offset = new_offset;
2489	}
2490
2491	result = vm_map_enter(target_map,
2492			      &map_addr, map_size,
2493			      (vm_map_offset_t)mask,
2494			      flags,
2495			      object, offset,
2496			      copy,
2497			      cur_protection, max_protection, inheritance);
2498	if (result != KERN_SUCCESS)
2499		vm_object_deallocate(object);
2500	*address = map_addr;
2501	return result;
2502}
2503
2504#if	VM_CPM
2505
2506#ifdef MACH_ASSERT
2507extern pmap_paddr_t	avail_start, avail_end;
2508#endif
2509
2510/*
2511 *	Allocate memory in the specified map, with the caveat that
2512 *	the memory is physically contiguous.  This call may fail
2513 *	if the system can't find sufficient contiguous memory.
2514 *	This call may cause or lead to heart-stopping amounts of
2515 *	paging activity.
2516 *
2517 *	Memory obtained from this call should be freed in the
2518 *	normal way, viz., via vm_deallocate.
2519 */
2520kern_return_t
2521vm_map_enter_cpm(
2522	vm_map_t		map,
2523	vm_map_offset_t	*addr,
2524	vm_map_size_t		size,
2525	int			flags)
2526{
2527	vm_object_t		cpm_obj;
2528	pmap_t			pmap;
2529	vm_page_t		m, pages;
2530	kern_return_t		kr;
2531	vm_map_offset_t		va, start, end, offset;
2532#if	MACH_ASSERT
2533	vm_map_offset_t		prev_addr;
2534#endif	/* MACH_ASSERT */
2535
2536	boolean_t		anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
2537
2538	if (!vm_allocate_cpm_enabled)
2539		return KERN_FAILURE;
2540
2541	if (size == 0) {
2542		*addr = 0;
2543		return KERN_SUCCESS;
2544	}
2545	if (anywhere)
2546		*addr = vm_map_min(map);
2547	else
2548		*addr = vm_map_trunc_page(*addr);
2549	size = vm_map_round_page(size);
2550
2551	/*
2552	 * LP64todo - cpm_allocate should probably allow
2553	 * allocations of >4GB, but not with the current
2554	 * algorithm, so just cast down the size for now.
2555	 */
2556	if (size > VM_MAX_ADDRESS)
2557		return KERN_RESOURCE_SHORTAGE;
2558	if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
2559			       &pages, 0, TRUE)) != KERN_SUCCESS)
2560		return kr;
2561
2562	cpm_obj = vm_object_allocate((vm_object_size_t)size);
2563	assert(cpm_obj != VM_OBJECT_NULL);
2564	assert(cpm_obj->internal);
2565	assert(cpm_obj->size == (vm_object_size_t)size);
2566	assert(cpm_obj->can_persist == FALSE);
2567	assert(cpm_obj->pager_created == FALSE);
2568	assert(cpm_obj->pageout == FALSE);
2569	assert(cpm_obj->shadow == VM_OBJECT_NULL);
2570
2571	/*
2572	 *	Insert pages into object.
2573	 */
2574
2575	vm_object_lock(cpm_obj);
2576	for (offset = 0; offset < size; offset += PAGE_SIZE) {
2577		m = pages;
2578		pages = NEXT_PAGE(m);
2579		*(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2580
2581		assert(!m->gobbled);
2582		assert(!m->wanted);
2583		assert(!m->pageout);
2584		assert(!m->tabled);
2585		assert(m->wire_count);
2586		/*
2587		 * ENCRYPTED SWAP:
2588		 * "m" is not supposed to be pageable, so it
2589		 * should not be encrypted.  It wouldn't be safe
2590		 * to enter it in a new VM object while encrypted.
2591		 */
2592		ASSERT_PAGE_DECRYPTED(m);
2593		assert(m->busy);
2594		assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
2595
2596		m->busy = FALSE;
2597		vm_page_insert(m, cpm_obj, offset);
2598	}
2599	assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
2600	vm_object_unlock(cpm_obj);
2601
2602	/*
2603	 *	Hang onto a reference on the object in case a
2604	 *	multi-threaded application for some reason decides
2605	 *	to deallocate the portion of the address space into
2606	 *	which we will insert this object.
2607	 *
2608	 *	Unfortunately, we must insert the object now before
2609	 *	we can talk to the pmap module about which addresses
2610	 *	must be wired down.  Hence, the race with a multi-
2611	 *	threaded app.
2612	 */
2613	vm_object_reference(cpm_obj);
2614
2615	/*
2616	 *	Insert object into map.
2617	 */
2618
2619	kr = vm_map_enter(
2620		map,
2621		addr,
2622		size,
2623		(vm_map_offset_t)0,
2624		flags,
2625		cpm_obj,
2626		(vm_object_offset_t)0,
2627		FALSE,
2628		VM_PROT_ALL,
2629		VM_PROT_ALL,
2630		VM_INHERIT_DEFAULT);
2631
2632	if (kr != KERN_SUCCESS) {
2633		/*
2634		 *	A CPM object doesn't have can_persist set,
2635		 *	so all we have to do is deallocate it to
2636		 *	free up these pages.
2637		 */
2638		assert(cpm_obj->pager_created == FALSE);
2639		assert(cpm_obj->can_persist == FALSE);
2640		assert(cpm_obj->pageout == FALSE);
2641		assert(cpm_obj->shadow == VM_OBJECT_NULL);
2642		vm_object_deallocate(cpm_obj); /* kill acquired ref */
2643		vm_object_deallocate(cpm_obj); /* kill creation ref */
2644	}
2645
2646	/*
2647	 *	Inform the physical mapping system that the
2648	 *	range of addresses may not fault, so that
2649	 *	page tables and such can be locked down as well.
2650	 */
2651	start = *addr;
2652	end = start + size;
2653	pmap = vm_map_pmap(map);
2654	pmap_pageable(pmap, start, end, FALSE);
2655
2656	/*
2657	 *	Enter each page into the pmap, to avoid faults.
2658	 *	Note that this loop could be coded more efficiently,
2659	 *	if the need arose, rather than looking up each page
2660	 *	again.
2661	 */
2662	for (offset = 0, va = start; offset < size;
2663	     va += PAGE_SIZE, offset += PAGE_SIZE) {
2664	        int type_of_fault;
2665
2666		vm_object_lock(cpm_obj);
2667		m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2668		assert(m != VM_PAGE_NULL);
2669
2670		vm_page_zero_fill(m);
2671
2672		type_of_fault = DBG_ZERO_FILL_FAULT;
2673
2674		vm_fault_enter(m, pmap, va, VM_PROT_ALL,
2675			       m->wire_count != 0, FALSE, FALSE,
2676			       &type_of_fault);
2677
2678		vm_object_unlock(cpm_obj);
2679	}
2680
2681#if	MACH_ASSERT
2682	/*
2683	 *	Verify ordering in address space.
2684	 */
2685	for (offset = 0; offset < size; offset += PAGE_SIZE) {
2686		vm_object_lock(cpm_obj);
2687		m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2688		vm_object_unlock(cpm_obj);
2689		if (m == VM_PAGE_NULL)
2690			panic("vm_allocate_cpm:  obj 0x%x off 0x%x no page",
2691			      cpm_obj, offset);
2692		assert(m->tabled);
2693		assert(!m->busy);
2694		assert(!m->wanted);
2695		assert(!m->fictitious);
2696		assert(!m->private);
2697		assert(!m->absent);
2698		assert(!m->error);
2699		assert(!m->cleaning);
2700		assert(!m->precious);
2701		assert(!m->clustered);
2702		if (offset != 0) {
2703			if (m->phys_page != prev_addr + 1) {
2704				printf("start 0x%x end 0x%x va 0x%x\n",
2705				       start, end, va);
2706				printf("obj 0x%x off 0x%x\n", cpm_obj, offset);
2707				printf("m 0x%x prev_address 0x%x\n", m,
2708				       prev_addr);
2709				panic("vm_allocate_cpm:  pages not contig!");
2710			}
2711		}
2712		prev_addr = m->phys_page;
2713	}
2714#endif	/* MACH_ASSERT */
2715
2716	vm_object_deallocate(cpm_obj); /* kill extra ref */
2717
2718	return kr;
2719}
2720
2721
2722#else	/* VM_CPM */
2723
2724/*
2725 *	Interface is defined in all cases, but unless the kernel
2726 *	is built explicitly for this option, the interface does
2727 *	nothing.
2728 */
2729
2730kern_return_t
2731vm_map_enter_cpm(
2732	__unused vm_map_t	map,
2733	__unused vm_map_offset_t	*addr,
2734	__unused vm_map_size_t	size,
2735	__unused int		flags)
2736{
2737	return KERN_FAILURE;
2738}
2739#endif /* VM_CPM */
2740
2741/*
2742 * Clip and unnest a portion of a nested submap mapping.
2743 */
2744static void
2745vm_map_clip_unnest(
2746	vm_map_t	map,
2747	vm_map_entry_t	entry,
2748	vm_map_offset_t	start_unnest,
2749	vm_map_offset_t	end_unnest)
2750{
2751	assert(entry->is_sub_map);
2752	assert(entry->object.sub_map != NULL);
2753
2754	if (entry->vme_start > start_unnest ||
2755	    entry->vme_end < end_unnest) {
2756		panic("vm_map_clip_unnest(0x%llx,0x%llx): "
2757		      "bad nested entry: start=0x%llx end=0x%llx\n",
2758		      (long long)start_unnest, (long long)end_unnest,
2759		      (long long)entry->vme_start, (long long)entry->vme_end);
2760	}
2761	if (start_unnest > entry->vme_start) {
2762		_vm_map_clip_start(&map->hdr,
2763				   entry,
2764				   start_unnest);
2765		UPDATE_FIRST_FREE(map, map->first_free);
2766	}
2767	if (entry->vme_end > end_unnest) {
2768		_vm_map_clip_end(&map->hdr,
2769				 entry,
2770				 end_unnest);
2771		UPDATE_FIRST_FREE(map, map->first_free);
2772	}
2773
2774	pmap_unnest(map->pmap,
2775		    entry->vme_start,
2776		    entry->vme_end - entry->vme_start);
2777	if ((map->mapped) && (map->ref_count)) {
2778		/* clean up parent map/maps */
2779		vm_map_submap_pmap_clean(
2780			map, entry->vme_start,
2781			entry->vme_end,
2782			entry->object.sub_map,
2783			entry->offset);
2784	}
2785	entry->use_pmap = FALSE;
2786}
2787
2788/*
2789 *	vm_map_clip_start:	[ internal use only ]
2790 *
2791 *	Asserts that the given entry begins at or after
2792 *	the specified address; if necessary,
2793 *	it splits the entry into two.
2794 */
2795static void
2796vm_map_clip_start(
2797	vm_map_t	map,
2798	vm_map_entry_t	entry,
2799	vm_map_offset_t	startaddr)
2800{
2801#ifndef NO_NESTED_PMAP
2802	if (entry->use_pmap &&
2803	    startaddr >= entry->vme_start) {
2804		vm_map_offset_t	start_unnest, end_unnest;
2805
2806		/*
2807		 * Make sure "startaddr" is no longer in a nested range
2808		 * before we clip.  Unnest only the minimum range the platform
2809		 * can handle.
2810		 */
2811		start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
2812		end_unnest = start_unnest + pmap_nesting_size_min;
2813		vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
2814	}
2815#endif /* NO_NESTED_PMAP */
2816	if (startaddr > entry->vme_start) {
2817		if (entry->object.vm_object &&
2818		    !entry->is_sub_map &&
2819		    entry->object.vm_object->phys_contiguous) {
2820			pmap_remove(map->pmap,
2821				    (addr64_t)(entry->vme_start),
2822				    (addr64_t)(entry->vme_end));
2823		}
2824		_vm_map_clip_start(&map->hdr, entry, startaddr);
2825		UPDATE_FIRST_FREE(map, map->first_free);
2826	}
2827}
2828
2829
2830#define vm_map_copy_clip_start(copy, entry, startaddr) \
2831	MACRO_BEGIN \
2832	if ((startaddr) > (entry)->vme_start) \
2833		_vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
2834	MACRO_END
2835
2836/*
2837 *	This routine is called only when it is known that
2838 *	the entry must be split.
2839 */
2840static void
2841_vm_map_clip_start(
2842	register struct vm_map_header	*map_header,
2843	register vm_map_entry_t		entry,
2844	register vm_map_offset_t		start)
2845{
2846	register vm_map_entry_t	new_entry;
2847
2848	/*
2849	 *	Split off the front portion --
2850	 *	note that we must insert the new
2851	 *	entry BEFORE this one, so that
2852	 *	this entry has the specified starting
2853	 *	address.
2854	 */
2855
2856	new_entry = _vm_map_entry_create(map_header);
2857	vm_map_entry_copy_full(new_entry, entry);
2858
2859	new_entry->vme_end = start;
2860	entry->offset += (start - entry->vme_start);
2861	entry->vme_start = start;
2862
2863	_vm_map_entry_link(map_header, entry->vme_prev, new_entry);
2864
2865	if (entry->is_sub_map)
2866		vm_map_reference(new_entry->object.sub_map);
2867	else
2868		vm_object_reference(new_entry->object.vm_object);
2869}
2870
2871
2872/*
2873 *	vm_map_clip_end:	[ internal use only ]
2874 *
2875 *	Asserts that the given entry ends at or before
2876 *	the specified address; if necessary,
2877 *	it splits the entry into two.
2878 */
2879static void
2880vm_map_clip_end(
2881	vm_map_t	map,
2882	vm_map_entry_t	entry,
2883	vm_map_offset_t	endaddr)
2884{
2885	if (endaddr > entry->vme_end) {
2886		/*
2887		 * Within the scope of this clipping, limit "endaddr" to
2888		 * the end of this map entry...
2889		 */
2890		endaddr = entry->vme_end;
2891	}
2892#ifndef NO_NESTED_PMAP
2893	if (entry->use_pmap) {
2894		vm_map_offset_t	start_unnest, end_unnest;
2895
2896		/*
2897		 * Make sure the range between the start of this entry and
2898		 * the new "endaddr" is no longer nested before we clip.
2899		 * Unnest only the minimum range the platform can handle.
2900		 */
2901		start_unnest = entry->vme_start;
2902		end_unnest =
2903			(endaddr + pmap_nesting_size_min - 1) &
2904			~(pmap_nesting_size_min - 1);
2905		vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
2906	}
2907#endif /* NO_NESTED_PMAP */
2908	if (endaddr < entry->vme_end) {
2909		if (entry->object.vm_object &&
2910		    !entry->is_sub_map &&
2911		    entry->object.vm_object->phys_contiguous) {
2912			pmap_remove(map->pmap,
2913				    (addr64_t)(entry->vme_start),
2914				    (addr64_t)(entry->vme_end));
2915		}
2916		_vm_map_clip_end(&map->hdr, entry, endaddr);
2917		UPDATE_FIRST_FREE(map, map->first_free);
2918	}
2919}
2920
2921
2922#define vm_map_copy_clip_end(copy, entry, endaddr) \
2923	MACRO_BEGIN \
2924	if ((endaddr) < (entry)->vme_end) \
2925		_vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
2926	MACRO_END
2927
2928/*
2929 *	This routine is called only when it is known that
2930 *	the entry must be split.
2931 */
2932static void
2933_vm_map_clip_end(
2934	register struct vm_map_header	*map_header,
2935	register vm_map_entry_t		entry,
2936	register vm_map_offset_t	end)
2937{
2938	register vm_map_entry_t	new_entry;
2939
2940	/*
2941	 *	Create a new entry and insert it
2942	 *	AFTER the specified entry
2943	 */
2944
2945	new_entry = _vm_map_entry_create(map_header);
2946	vm_map_entry_copy_full(new_entry, entry);
2947
2948	new_entry->vme_start = entry->vme_end = end;
2949	new_entry->offset += (end - entry->vme_start);
2950
2951	_vm_map_entry_link(map_header, entry, new_entry);
2952
2953	if (entry->is_sub_map)
2954		vm_map_reference(new_entry->object.sub_map);
2955	else
2956		vm_object_reference(new_entry->object.vm_object);
2957}
2958
2959
2960/*
2961 *	VM_MAP_RANGE_CHECK:	[ internal use only ]
2962 *
2963 *	Asserts that the starting and ending region
2964 *	addresses fall within the valid range of the map.
2965 */
2966#define	VM_MAP_RANGE_CHECK(map, start, end)	\
2967	MACRO_BEGIN				\
2968	if (start < vm_map_min(map))		\
2969		start = vm_map_min(map);	\
2970	if (end > vm_map_max(map))		\
2971		end = vm_map_max(map);		\
2972	if (start > end)			\
2973		start = end;			\
2974	MACRO_END
2975
2976/*
2977 *	vm_map_range_check:	[ internal use only ]
2978 *
2979 *	Check that the region defined by the specified start and
2980 *	end addresses are wholly contained within a single map
2981 *	entry or set of adjacent map entries of the spacified map,
2982 *	i.e. the specified region contains no unmapped space.
2983 *	If any or all of the region is unmapped, FALSE is returned.
2984 *	Otherwise, TRUE is returned and if the output argument 'entry'
2985 *	is not NULL it points to the map entry containing the start
2986 *	of the region.
2987 *
2988 *	The map is locked for reading on entry and is left locked.
2989 */
2990static boolean_t
2991vm_map_range_check(
2992	register vm_map_t	map,
2993	register vm_map_offset_t	start,
2994	register vm_map_offset_t	end,
2995	vm_map_entry_t		*entry)
2996{
2997	vm_map_entry_t		cur;
2998	register vm_map_offset_t	prev;
2999
3000	/*
3001	 * 	Basic sanity checks first
3002	 */
3003	if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
3004		return (FALSE);
3005
3006	/*
3007	 * 	Check first if the region starts within a valid
3008	 *	mapping for the map.
3009	 */
3010	if (!vm_map_lookup_entry(map, start, &cur))
3011		return (FALSE);
3012
3013	/*
3014	 *	Optimize for the case that the region is contained
3015	 *	in a single map entry.
3016	 */
3017	if (entry != (vm_map_entry_t *) NULL)
3018		*entry = cur;
3019	if (end <= cur->vme_end)
3020		return (TRUE);
3021
3022	/*
3023	 * 	If the region is not wholly contained within a
3024	 * 	single entry, walk the entries looking for holes.
3025	 */
3026	prev = cur->vme_end;
3027	cur = cur->vme_next;
3028	while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
3029		if (end <= cur->vme_end)
3030			return (TRUE);
3031		prev = cur->vme_end;
3032		cur = cur->vme_next;
3033	}
3034	return (FALSE);
3035}
3036
3037/*
3038 *	vm_map_submap:		[ kernel use only ]
3039 *
3040 *	Mark the given range as handled by a subordinate map.
3041 *
3042 *	This range must have been created with vm_map_find using
3043 *	the vm_submap_object, and no other operations may have been
3044 *	performed on this range prior to calling vm_map_submap.
3045 *
3046 *	Only a limited number of operations can be performed
3047 *	within this rage after calling vm_map_submap:
3048 *		vm_fault
3049 *	[Don't try vm_map_copyin!]
3050 *
3051 *	To remove a submapping, one must first remove the
3052 *	range from the superior map, and then destroy the
3053 *	submap (if desired).  [Better yet, don't try it.]
3054 */
3055kern_return_t
3056vm_map_submap(
3057	vm_map_t		map,
3058	vm_map_offset_t	start,
3059	vm_map_offset_t	end,
3060	vm_map_t		submap,
3061	vm_map_offset_t	offset,
3062#ifdef NO_NESTED_PMAP
3063	__unused
3064#endif	/* NO_NESTED_PMAP */
3065	boolean_t		use_pmap)
3066{
3067	vm_map_entry_t		entry;
3068	register kern_return_t	result = KERN_INVALID_ARGUMENT;
3069	register vm_object_t	object;
3070
3071	vm_map_lock(map);
3072
3073	if (! vm_map_lookup_entry(map, start, &entry)) {
3074		entry = entry->vme_next;
3075	}
3076
3077	if (entry == vm_map_to_entry(map) ||
3078	    entry->is_sub_map) {
3079		vm_map_unlock(map);
3080		return KERN_INVALID_ARGUMENT;
3081	}
3082
3083	assert(!entry->use_pmap); /* we don't want to unnest anything here */
3084	vm_map_clip_start(map, entry, start);
3085	vm_map_clip_end(map, entry, end);
3086
3087	if ((entry->vme_start == start) && (entry->vme_end == end) &&
3088	    (!entry->is_sub_map) &&
3089	    ((object = entry->object.vm_object) == vm_submap_object) &&
3090	    (object->resident_page_count == 0) &&
3091	    (object->copy == VM_OBJECT_NULL) &&
3092	    (object->shadow == VM_OBJECT_NULL) &&
3093	    (!object->pager_created)) {
3094		entry->offset = (vm_object_offset_t)offset;
3095		entry->object.vm_object = VM_OBJECT_NULL;
3096		vm_object_deallocate(object);
3097		entry->is_sub_map = TRUE;
3098		entry->object.sub_map = submap;
3099		vm_map_reference(submap);
3100		submap->mapped = TRUE;
3101
3102#ifndef NO_NESTED_PMAP
3103		if (use_pmap) {
3104			/* nest if platform code will allow */
3105			if(submap->pmap == NULL) {
3106				submap->pmap = pmap_create((vm_map_size_t) 0, FALSE);
3107				if(submap->pmap == PMAP_NULL) {
3108					vm_map_unlock(map);
3109					return(KERN_NO_SPACE);
3110				}
3111			}
3112			result = pmap_nest(map->pmap,
3113					   (entry->object.sub_map)->pmap,
3114					   (addr64_t)start,
3115					   (addr64_t)start,
3116					   (uint64_t)(end - start));
3117			if(result)
3118				panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
3119			entry->use_pmap = TRUE;
3120		}
3121#else	/* NO_NESTED_PMAP */
3122		pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
3123#endif	/* NO_NESTED_PMAP */
3124		result = KERN_SUCCESS;
3125	}
3126	vm_map_unlock(map);
3127
3128	return(result);
3129}
3130
3131/*
3132 *	vm_map_protect:
3133 *
3134 *	Sets the protection of the specified address
3135 *	region in the target map.  If "set_max" is
3136 *	specified, the maximum protection is to be set;
3137 *	otherwise, only the current protection is affected.
3138 */
3139kern_return_t
3140vm_map_protect(
3141	register vm_map_t	map,
3142	register vm_map_offset_t	start,
3143	register vm_map_offset_t	end,
3144	register vm_prot_t	new_prot,
3145	register boolean_t	set_max)
3146{
3147	register vm_map_entry_t		current;
3148	register vm_map_offset_t	prev;
3149	vm_map_entry_t			entry;
3150	vm_prot_t			new_max;
3151
3152	XPR(XPR_VM_MAP,
3153	    "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
3154	    (integer_t)map, start, end, new_prot, set_max);
3155
3156	vm_map_lock(map);
3157
3158	if ((new_prot & VM_PROT_COPY) && !map->prot_copy_allow) {
3159		vm_map_unlock(map);
3160		return(KERN_PROTECTION_FAILURE);
3161	}
3162
3163	/* LP64todo - remove this check when vm_map_commpage64()
3164	 * no longer has to stuff in a map_entry for the commpage
3165	 * above the map's max_offset.
3166	 */
3167	if (start >= map->max_offset) {
3168		vm_map_unlock(map);
3169		return(KERN_INVALID_ADDRESS);
3170	}
3171
3172	/*
3173	 * 	Lookup the entry.  If it doesn't start in a valid
3174	 *	entry, return an error.
3175	 */
3176	if (! vm_map_lookup_entry(map, start, &entry)) {
3177		vm_map_unlock(map);
3178		return(KERN_INVALID_ADDRESS);
3179	}
3180
3181	/*
3182	 *	Make a first pass to check for protection and address
3183	 *	violations.
3184	 */
3185
3186	current = entry;
3187	prev = current->vme_start;
3188	while ((current != vm_map_to_entry(map)) &&
3189	       (current->vme_start < end)) {
3190
3191		/*
3192		 * If there is a hole, return an error.
3193		 */
3194		if (current->vme_start != prev) {
3195			vm_map_unlock(map);
3196			return(KERN_INVALID_ADDRESS);
3197		}
3198
3199		new_max = current->max_protection;
3200		if(new_prot & VM_PROT_COPY) {
3201			new_max |= VM_PROT_WRITE;
3202			if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
3203				vm_map_unlock(map);
3204				return(KERN_PROTECTION_FAILURE);
3205			}
3206		} else {
3207			if ((new_prot & new_max) != new_prot) {
3208				vm_map_unlock(map);
3209				return(KERN_PROTECTION_FAILURE);
3210			}
3211		}
3212
3213#if CONFIG_EMBEDDED
3214		if (new_prot & VM_PROT_WRITE) {
3215			if (new_prot & VM_PROT_EXECUTE) {
3216				printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
3217				new_prot &= ~VM_PROT_EXECUTE;
3218			}
3219		}
3220#endif
3221
3222		prev = current->vme_end;
3223		current = current->vme_next;
3224	}
3225	if (end > prev) {
3226		vm_map_unlock(map);
3227		return(KERN_INVALID_ADDRESS);
3228	}
3229
3230	/*
3231	 *	Go back and fix up protections.
3232	 *	Clip to start here if the range starts within
3233	 *	the entry.
3234	 */
3235
3236	current = entry;
3237	if (current != vm_map_to_entry(map)) {
3238		/* clip and unnest if necessary */
3239		vm_map_clip_start(map, current, start);
3240	}
3241
3242	while ((current != vm_map_to_entry(map)) &&
3243	       (current->vme_start < end)) {
3244
3245		vm_prot_t	old_prot;
3246
3247		vm_map_clip_end(map, current, end);
3248
3249		assert(!current->use_pmap); /* clipping did unnest if needed */
3250
3251		old_prot = current->protection;
3252
3253		if(new_prot & VM_PROT_COPY) {
3254			/* caller is asking specifically to copy the      */
3255			/* mapped data, this implies that max protection  */
3256			/* will include write.  Caller must be prepared   */
3257			/* for loss of shared memory communication in the */
3258			/* target area after taking this step */
3259			current->needs_copy = TRUE;
3260			current->max_protection |= VM_PROT_WRITE;
3261		}
3262
3263		if (set_max)
3264			current->protection =
3265				(current->max_protection =
3266				 new_prot & ~VM_PROT_COPY) &
3267				old_prot;
3268		else
3269			current->protection = new_prot & ~VM_PROT_COPY;
3270
3271		/*
3272		 *	Update physical map if necessary.
3273		 *	If the request is to turn off write protection,
3274		 *	we won't do it for real (in pmap). This is because
3275		 *	it would cause copy-on-write to fail.  We've already
3276		 *	set, the new protection in the map, so if a
3277		 *	write-protect fault occurred, it will be fixed up
3278		 *	properly, COW or not.
3279		 */
3280		if (current->protection != old_prot) {
3281			/* Look one level in we support nested pmaps */
3282			/* from mapped submaps which are direct entries */
3283			/* in our map */
3284
3285			vm_prot_t prot;
3286
3287			prot = current->protection & ~VM_PROT_WRITE;
3288
3289			if (override_nx(map, current->alias) && prot)
3290			        prot |= VM_PROT_EXECUTE;
3291
3292			if (current->is_sub_map && current->use_pmap) {
3293				pmap_protect(current->object.sub_map->pmap,
3294					     current->vme_start,
3295					     current->vme_end,
3296					     prot);
3297			} else {
3298				pmap_protect(map->pmap,
3299					     current->vme_start,
3300					     current->vme_end,
3301					     prot);
3302			}
3303		}
3304		current = current->vme_next;
3305	}
3306
3307	current = entry;
3308	while ((current != vm_map_to_entry(map)) &&
3309	       (current->vme_start <= end)) {
3310		vm_map_simplify_entry(map, current);
3311		current = current->vme_next;
3312	}
3313
3314	vm_map_unlock(map);
3315	return(KERN_SUCCESS);
3316}
3317
3318/*
3319 *	vm_map_inherit:
3320 *
3321 *	Sets the inheritance of the specified address
3322 *	range in the target map.  Inheritance
3323 *	affects how the map will be shared with
3324 *	child maps at the time of vm_map_fork.
3325 */
3326kern_return_t
3327vm_map_inherit(
3328	register vm_map_t	map,
3329	register vm_map_offset_t	start,
3330	register vm_map_offset_t	end,
3331	register vm_inherit_t	new_inheritance)
3332{
3333	register vm_map_entry_t	entry;
3334	vm_map_entry_t	temp_entry;
3335
3336	vm_map_lock(map);
3337
3338	VM_MAP_RANGE_CHECK(map, start, end);
3339
3340	if (vm_map_lookup_entry(map, start, &temp_entry)) {
3341		entry = temp_entry;
3342	}
3343	else {
3344		temp_entry = temp_entry->vme_next;
3345		entry = temp_entry;
3346	}
3347
3348	/* first check entire range for submaps which can't support the */
3349	/* given inheritance. */
3350	while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3351		if(entry->is_sub_map) {
3352			if(new_inheritance == VM_INHERIT_COPY) {
3353				vm_map_unlock(map);
3354				return(KERN_INVALID_ARGUMENT);
3355			}
3356		}
3357
3358		entry = entry->vme_next;
3359	}
3360
3361	entry = temp_entry;
3362	if (entry != vm_map_to_entry(map)) {
3363		/* clip and unnest if necessary */
3364		vm_map_clip_start(map, entry, start);
3365	}
3366
3367	while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3368		vm_map_clip_end(map, entry, end);
3369		assert(!entry->use_pmap); /* clip did unnest if needed */
3370
3371		entry->inheritance = new_inheritance;
3372
3373		entry = entry->vme_next;
3374	}
3375
3376	vm_map_unlock(map);
3377	return(KERN_SUCCESS);
3378}
3379
3380/*
3381 * Update the accounting for the amount of wired memory in this map.  If the user has
3382 * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
3383 */
3384
3385static kern_return_t
3386add_wire_counts(
3387	vm_map_t	map,
3388	vm_map_entry_t	entry,
3389	boolean_t	user_wire)
3390{
3391	vm_map_size_t	size;
3392
3393	if (user_wire) {
3394
3395		/*
3396		 * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
3397		 * this map entry.
3398		 */
3399
3400		if (entry->user_wired_count == 0) {
3401			size = entry->vme_end - entry->vme_start;
3402
3403			/*
3404			 * Since this is the first time the user is wiring this map entry, check to see if we're
3405			 * exceeding the user wire limits.  There is a per map limit which is the smaller of either
3406			 * the process's rlimit or the global vm_user_wire_limit which caps this value.  There is also
3407			 * a system-wide limit on the amount of memory all users can wire.  If the user is over either
3408			 * limit, then we fail.
3409			 */
3410
3411			if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
3412		    	   size + ptoa_64(vm_page_wire_count) > vm_global_user_wire_limit)
3413				return KERN_RESOURCE_SHORTAGE;
3414
3415			/*
3416			 * The first time the user wires an entry, we also increment the wired_count and add this to
3417			 * the total that has been wired in the map.
3418			 */
3419
3420			if (entry->wired_count >= MAX_WIRE_COUNT)
3421				return KERN_FAILURE;
3422
3423			entry->wired_count++;
3424			map->user_wire_size += size;
3425		}
3426
3427		if (entry->user_wired_count >= MAX_WIRE_COUNT)
3428			return KERN_FAILURE;
3429
3430		entry->user_wired_count++;
3431
3432	} else {
3433
3434		/*
3435		 * The kernel's wiring the memory.  Just bump the count and continue.
3436		 */
3437
3438		if (entry->wired_count >= MAX_WIRE_COUNT)
3439			panic("vm_map_wire: too many wirings");
3440
3441		entry->wired_count++;
3442	}
3443
3444	return KERN_SUCCESS;
3445}
3446
3447/*
3448 * Update the memory wiring accounting now that the given map entry is being unwired.
3449 */
3450
3451static void
3452subtract_wire_counts(
3453	vm_map_t	map,
3454	vm_map_entry_t	entry,
3455	boolean_t	user_wire)
3456{
3457
3458	if (user_wire) {
3459
3460		/*
3461		 * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
3462		 */
3463
3464		if (entry->user_wired_count == 1) {
3465
3466			/*
3467			 * We're removing the last user wire reference.  Decrement the wired_count and the total
3468			 * user wired memory for this map.
3469			 */
3470
3471			assert(entry->wired_count >= 1);
3472			entry->wired_count--;
3473			map->user_wire_size -= entry->vme_end - entry->vme_start;
3474		}
3475
3476		assert(entry->user_wired_count >= 1);
3477		entry->user_wired_count--;
3478
3479	} else {
3480
3481		/*
3482		 * The kernel is unwiring the memory.   Just update the count.
3483		 */
3484
3485		assert(entry->wired_count >= 1);
3486		entry->wired_count--;
3487	}
3488}
3489
3490/*
3491 *	vm_map_wire:
3492 *
3493 *	Sets the pageability of the specified address range in the
3494 *	target map as wired.  Regions specified as not pageable require
3495 *	locked-down physical memory and physical page maps.  The
3496 *	access_type variable indicates types of accesses that must not
3497 *	generate page faults.  This is checked against protection of
3498 *	memory being locked-down.
3499 *
3500 *	The map must not be locked, but a reference must remain to the
3501 *	map throughout the call.
3502 */
3503static kern_return_t
3504vm_map_wire_nested(
3505	register vm_map_t	map,
3506	register vm_map_offset_t	start,
3507	register vm_map_offset_t	end,
3508	register vm_prot_t	access_type,
3509	boolean_t		user_wire,
3510	pmap_t			map_pmap,
3511	vm_map_offset_t		pmap_addr)
3512{
3513	register vm_map_entry_t	entry;
3514	struct vm_map_entry	*first_entry, tmp_entry;
3515	vm_map_t		real_map;
3516	register vm_map_offset_t	s,e;
3517	kern_return_t		rc;
3518	boolean_t		need_wakeup;
3519	boolean_t		main_map = FALSE;
3520	wait_interrupt_t	interruptible_state;
3521	thread_t		cur_thread;
3522	unsigned int		last_timestamp;
3523	vm_map_size_t		size;
3524
3525	vm_map_lock(map);
3526	if(map_pmap == NULL)
3527		main_map = TRUE;
3528	last_timestamp = map->timestamp;
3529
3530	VM_MAP_RANGE_CHECK(map, start, end);
3531	assert(page_aligned(start));
3532	assert(page_aligned(end));
3533	if (start == end) {
3534		/* We wired what the caller asked for, zero pages */
3535		vm_map_unlock(map);
3536		return KERN_SUCCESS;
3537	}
3538
3539	need_wakeup = FALSE;
3540	cur_thread = current_thread();
3541
3542	s = start;
3543	rc = KERN_SUCCESS;
3544
3545	if (vm_map_lookup_entry(map, s, &first_entry)) {
3546		entry = first_entry;
3547		/*
3548		 * vm_map_clip_start will be done later.
3549		 * We don't want to unnest any nested submaps here !
3550		 */
3551	} else {
3552		/* Start address is not in map */
3553		rc = KERN_INVALID_ADDRESS;
3554		goto done;
3555	}
3556
3557	while ((entry != vm_map_to_entry(map)) && (s < end)) {
3558		/*
3559		 * At this point, we have wired from "start" to "s".
3560		 * We still need to wire from "s" to "end".
3561		 *
3562		 * "entry" hasn't been clipped, so it could start before "s"
3563		 * and/or end after "end".
3564		 */
3565
3566		/* "e" is how far we want to wire in this entry */
3567		e = entry->vme_end;
3568		if (e > end)
3569			e = end;
3570
3571		/*
3572		 * If another thread is wiring/unwiring this entry then
3573		 * block after informing other thread to wake us up.
3574		 */
3575		if (entry->in_transition) {
3576			wait_result_t wait_result;
3577
3578			/*
3579			 * We have not clipped the entry.  Make sure that
3580			 * the start address is in range so that the lookup
3581			 * below will succeed.
3582			 * "s" is the current starting point: we've already
3583			 * wired from "start" to "s" and we still have
3584			 * to wire from "s" to "end".
3585			 */
3586
3587			entry->needs_wakeup = TRUE;
3588
3589			/*
3590			 * wake up anybody waiting on entries that we have
3591			 * already wired.
3592			 */
3593			if (need_wakeup) {
3594				vm_map_entry_wakeup(map);
3595				need_wakeup = FALSE;
3596			}
3597			/*
3598			 * User wiring is interruptible
3599			 */
3600			wait_result = vm_map_entry_wait(map,
3601							(user_wire) ? THREAD_ABORTSAFE :
3602							THREAD_UNINT);
3603			if (user_wire && wait_result ==	THREAD_INTERRUPTED) {
3604				/*
3605				 * undo the wirings we have done so far
3606				 * We do not clear the needs_wakeup flag,
3607				 * because we cannot tell if we were the
3608				 * only one waiting.
3609				 */
3610				rc = KERN_FAILURE;
3611				goto done;
3612			}
3613
3614			/*
3615			 * Cannot avoid a lookup here. reset timestamp.
3616			 */
3617			last_timestamp = map->timestamp;
3618
3619			/*
3620			 * The entry could have been clipped, look it up again.
3621			 * Worse that can happen is, it may not exist anymore.
3622			 */
3623			if (!vm_map_lookup_entry(map, s, &first_entry)) {
3624				if (!user_wire)
3625					panic("vm_map_wire: re-lookup failed");
3626
3627				/*
3628				 * User: undo everything upto the previous
3629				 * entry.  let vm_map_unwire worry about
3630				 * checking the validity of the range.
3631				 */
3632				rc = KERN_FAILURE;
3633				goto done;
3634			}
3635			entry = first_entry;
3636			continue;
3637		}
3638
3639		if (entry->is_sub_map) {
3640			vm_map_offset_t	sub_start;
3641			vm_map_offset_t	sub_end;
3642			vm_map_offset_t	local_start;
3643			vm_map_offset_t	local_end;
3644			pmap_t		pmap;
3645
3646			vm_map_clip_start(map, entry, s);
3647			vm_map_clip_end(map, entry, end);
3648
3649			sub_start = entry->offset;
3650			sub_end = entry->vme_end;
3651			sub_end += entry->offset - entry->vme_start;
3652
3653			local_end = entry->vme_end;
3654			if(map_pmap == NULL) {
3655				vm_object_t		object;
3656				vm_object_offset_t	offset;
3657				vm_prot_t		prot;
3658				boolean_t		wired;
3659				vm_map_entry_t		local_entry;
3660				vm_map_version_t	 version;
3661				vm_map_t		lookup_map;
3662
3663				if(entry->use_pmap) {
3664					pmap = entry->object.sub_map->pmap;
3665					/* ppc implementation requires that */
3666					/* submaps pmap address ranges line */
3667					/* up with parent map */
3668#ifdef notdef
3669					pmap_addr = sub_start;
3670#endif
3671					pmap_addr = s;
3672				} else {
3673					pmap = map->pmap;
3674					pmap_addr = s;
3675				}
3676
3677				if (entry->wired_count) {
3678					if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3679						goto done;
3680
3681					/*
3682					 * The map was not unlocked:
3683					 * no need to goto re-lookup.
3684					 * Just go directly to next entry.
3685					 */
3686					entry = entry->vme_next;
3687					s = entry->vme_start;
3688					continue;
3689
3690				}
3691
3692				/* call vm_map_lookup_locked to */
3693				/* cause any needs copy to be   */
3694				/* evaluated */
3695				local_start = entry->vme_start;
3696				lookup_map = map;
3697				vm_map_lock_write_to_read(map);
3698				if(vm_map_lookup_locked(
3699					   &lookup_map, local_start,
3700					   access_type,
3701					   OBJECT_LOCK_EXCLUSIVE,
3702					   &version, &object,
3703					   &offset, &prot, &wired,
3704					   NULL,
3705					   &real_map)) {
3706
3707					vm_map_unlock_read(lookup_map);
3708					vm_map_unwire(map, start,
3709						      s, user_wire);
3710					return(KERN_FAILURE);
3711				}
3712				if(real_map != lookup_map)
3713					vm_map_unlock(real_map);
3714				vm_map_unlock_read(lookup_map);
3715				vm_map_lock(map);
3716				vm_object_unlock(object);
3717
3718				/* we unlocked, so must re-lookup */
3719				if (!vm_map_lookup_entry(map,
3720							 local_start,
3721							 &local_entry)) {
3722					rc = KERN_FAILURE;
3723					goto done;
3724				}
3725
3726				/*
3727				 * entry could have been "simplified",
3728				 * so re-clip
3729				 */
3730				entry = local_entry;
3731				assert(s == local_start);
3732				vm_map_clip_start(map, entry, s);
3733				vm_map_clip_end(map, entry, end);
3734				/* re-compute "e" */
3735				e = entry->vme_end;
3736				if (e > end)
3737					e = end;
3738
3739				/* did we have a change of type? */
3740				if (!entry->is_sub_map) {
3741					last_timestamp = map->timestamp;
3742					continue;
3743				}
3744			} else {
3745				local_start = entry->vme_start;
3746				pmap = map_pmap;
3747			}
3748
3749			if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3750				goto done;
3751
3752			entry->in_transition = TRUE;
3753
3754			vm_map_unlock(map);
3755			rc = vm_map_wire_nested(entry->object.sub_map,
3756						sub_start, sub_end,
3757						access_type,
3758						user_wire, pmap, pmap_addr);
3759			vm_map_lock(map);
3760
3761			/*
3762			 * Find the entry again.  It could have been clipped
3763			 * after we unlocked the map.
3764			 */
3765			if (!vm_map_lookup_entry(map, local_start,
3766						 &first_entry))
3767				panic("vm_map_wire: re-lookup failed");
3768			entry = first_entry;
3769
3770			assert(local_start == s);
3771			/* re-compute "e" */
3772			e = entry->vme_end;
3773			if (e > end)
3774				e = end;
3775
3776			last_timestamp = map->timestamp;
3777			while ((entry != vm_map_to_entry(map)) &&
3778			       (entry->vme_start < e)) {
3779				assert(entry->in_transition);
3780				entry->in_transition = FALSE;
3781				if (entry->needs_wakeup) {
3782					entry->needs_wakeup = FALSE;
3783					need_wakeup = TRUE;
3784				}
3785				if (rc != KERN_SUCCESS) {/* from vm_*_wire */
3786					subtract_wire_counts(map, entry, user_wire);
3787				}
3788				entry = entry->vme_next;
3789			}
3790			if (rc != KERN_SUCCESS) {	/* from vm_*_wire */
3791				goto done;
3792			}
3793
3794			/* no need to relookup again */
3795			s = entry->vme_start;
3796			continue;
3797		}
3798
3799		/*
3800		 * If this entry is already wired then increment
3801		 * the appropriate wire reference count.
3802		 */
3803		if (entry->wired_count) {
3804			/*
3805			 * entry is already wired down, get our reference
3806			 * after clipping to our range.
3807			 */
3808			vm_map_clip_start(map, entry, s);
3809			vm_map_clip_end(map, entry, end);
3810
3811			if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3812				goto done;
3813
3814			/* map was not unlocked: no need to relookup */
3815			entry = entry->vme_next;
3816			s = entry->vme_start;
3817			continue;
3818		}
3819
3820		/*
3821		 * Unwired entry or wire request transmitted via submap
3822		 */
3823
3824
3825		/*
3826		 * Perform actions of vm_map_lookup that need the write
3827		 * lock on the map: create a shadow object for a
3828		 * copy-on-write region, or an object for a zero-fill
3829		 * region.
3830		 */
3831		size = entry->vme_end - entry->vme_start;
3832		/*
3833		 * If wiring a copy-on-write page, we need to copy it now
3834		 * even if we're only (currently) requesting read access.
3835		 * This is aggressive, but once it's wired we can't move it.
3836		 */
3837		if (entry->needs_copy) {
3838			vm_object_shadow(&entry->object.vm_object,
3839					 &entry->offset, size);
3840			entry->needs_copy = FALSE;
3841		} else if (entry->object.vm_object == VM_OBJECT_NULL) {
3842			entry->object.vm_object = vm_object_allocate(size);
3843			entry->offset = (vm_object_offset_t)0;
3844		}
3845
3846		vm_map_clip_start(map, entry, s);
3847		vm_map_clip_end(map, entry, end);
3848
3849		/* re-compute "e" */
3850		e = entry->vme_end;
3851		if (e > end)
3852			e = end;
3853
3854		/*
3855		 * Check for holes and protection mismatch.
3856		 * Holes: Next entry should be contiguous unless this
3857		 *	  is the end of the region.
3858		 * Protection: Access requested must be allowed, unless
3859		 *	wiring is by protection class
3860		 */
3861		if ((entry->vme_end < end) &&
3862		    ((entry->vme_next == vm_map_to_entry(map)) ||
3863		     (entry->vme_next->vme_start > entry->vme_end))) {
3864			/* found a hole */
3865			rc = KERN_INVALID_ADDRESS;
3866			goto done;
3867		}
3868		if ((entry->protection & access_type) != access_type) {
3869			/* found a protection problem */
3870			rc = KERN_PROTECTION_FAILURE;
3871			goto done;
3872		}
3873
3874		assert(entry->wired_count == 0 && entry->user_wired_count == 0);
3875
3876		if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3877			goto done;
3878
3879		entry->in_transition = TRUE;
3880
3881		/*
3882		 * This entry might get split once we unlock the map.
3883		 * In vm_fault_wire(), we need the current range as
3884		 * defined by this entry.  In order for this to work
3885		 * along with a simultaneous clip operation, we make a
3886		 * temporary copy of this entry and use that for the
3887		 * wiring.  Note that the underlying objects do not
3888		 * change during a clip.
3889		 */
3890		tmp_entry = *entry;
3891
3892		/*
3893		 * The in_transition state guarentees that the entry
3894		 * (or entries for this range, if split occured) will be
3895		 * there when the map lock is acquired for the second time.
3896		 */
3897		vm_map_unlock(map);
3898
3899		if (!user_wire && cur_thread != THREAD_NULL)
3900			interruptible_state = thread_interrupt_level(THREAD_UNINT);
3901		else
3902			interruptible_state = THREAD_UNINT;
3903
3904		if(map_pmap)
3905			rc = vm_fault_wire(map,
3906					   &tmp_entry, map_pmap, pmap_addr);
3907		else
3908			rc = vm_fault_wire(map,
3909					   &tmp_entry, map->pmap,
3910					   tmp_entry.vme_start);
3911
3912		if (!user_wire && cur_thread != THREAD_NULL)
3913			thread_interrupt_level(interruptible_state);
3914
3915		vm_map_lock(map);
3916
3917		if (last_timestamp+1 != map->timestamp) {
3918			/*
3919			 * Find the entry again.  It could have been clipped
3920			 * after we unlocked the map.
3921			 */
3922			if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
3923						 &first_entry))
3924				panic("vm_map_wire: re-lookup failed");
3925
3926			entry = first_entry;
3927		}
3928
3929		last_timestamp = map->timestamp;
3930
3931		while ((entry != vm_map_to_entry(map)) &&
3932		       (entry->vme_start < tmp_entry.vme_end)) {
3933			assert(entry->in_transition);
3934			entry->in_transition = FALSE;
3935			if (entry->needs_wakeup) {
3936				entry->needs_wakeup = FALSE;
3937				need_wakeup = TRUE;
3938			}
3939			if (rc != KERN_SUCCESS) {	/* from vm_*_wire */
3940				subtract_wire_counts(map, entry, user_wire);
3941			}
3942			entry = entry->vme_next;
3943		}
3944
3945		if (rc != KERN_SUCCESS) {		/* from vm_*_wire */
3946			goto done;
3947		}
3948
3949		s = entry->vme_start;
3950	} /* end while loop through map entries */
3951
3952done:
3953	if (rc == KERN_SUCCESS) {
3954		/* repair any damage we may have made to the VM map */
3955		vm_map_simplify_range(map, start, end);
3956	}
3957
3958	vm_map_unlock(map);
3959
3960	/*
3961	 * wake up anybody waiting on entries we wired.
3962	 */
3963	if (need_wakeup)
3964		vm_map_entry_wakeup(map);
3965
3966	if (rc != KERN_SUCCESS) {
3967		/* undo what has been wired so far */
3968		vm_map_unwire(map, start, s, user_wire);
3969	}
3970
3971	return rc;
3972
3973}
3974
3975kern_return_t
3976vm_map_wire(
3977	register vm_map_t	map,
3978	register vm_map_offset_t	start,
3979	register vm_map_offset_t	end,
3980	register vm_prot_t	access_type,
3981	boolean_t		user_wire)
3982{
3983
3984	kern_return_t	kret;
3985
3986#ifdef ppc
3987        /*
3988	 * the calls to mapping_prealloc and mapping_relpre
3989	 * (along with the VM_MAP_RANGE_CHECK to insure a
3990	 * resonable range was passed in) are
3991	 * currently necessary because
3992	 * we haven't enabled kernel pre-emption
3993	 * and/or the pmap_enter cannot purge and re-use
3994	 * existing mappings
3995	 */
3996	VM_MAP_RANGE_CHECK(map, start, end);
3997	mapping_prealloc(end - start);
3998#endif
3999	kret = vm_map_wire_nested(map, start, end, access_type,
4000				  user_wire, (pmap_t)NULL, 0);
4001#ifdef ppc
4002	mapping_relpre();
4003#endif
4004	return kret;
4005}
4006
4007/*
4008 *	vm_map_unwire:
4009 *
4010 *	Sets the pageability of the specified address range in the target
4011 *	as pageable.  Regions specified must have been wired previously.
4012 *
4013 *	The map must not be locked, but a reference must remain to the map
4014 *	throughout the call.
4015 *
4016 *	Kernel will panic on failures.  User unwire ignores holes and
4017 *	unwired and intransition entries to avoid losing memory by leaving
4018 *	it unwired.
4019 */
4020static kern_return_t
4021vm_map_unwire_nested(
4022	register vm_map_t	map,
4023	register vm_map_offset_t	start,
4024	register vm_map_offset_t	end,
4025	boolean_t		user_wire,
4026	pmap_t			map_pmap,
4027	vm_map_offset_t		pmap_addr)
4028{
4029	register vm_map_entry_t	entry;
4030	struct vm_map_entry	*first_entry, tmp_entry;
4031	boolean_t		need_wakeup;
4032	boolean_t		main_map = FALSE;
4033	unsigned int		last_timestamp;
4034
4035	vm_map_lock(map);
4036	if(map_pmap == NULL)
4037		main_map = TRUE;
4038	last_timestamp = map->timestamp;
4039
4040	VM_MAP_RANGE_CHECK(map, start, end);
4041	assert(page_aligned(start));
4042	assert(page_aligned(end));
4043
4044	if (start == end) {
4045		/* We unwired what the caller asked for: zero pages */
4046		vm_map_unlock(map);
4047		return KERN_SUCCESS;
4048	}
4049
4050	if (vm_map_lookup_entry(map, start, &first_entry)) {
4051		entry = first_entry;
4052		/*
4053		 * vm_map_clip_start will be done later.
4054		 * We don't want to unnest any nested sub maps here !
4055		 */
4056	}
4057	else {
4058		if (!user_wire) {
4059			panic("vm_map_unwire: start not found");
4060		}
4061		/*	Start address is not in map. */
4062		vm_map_unlock(map);
4063		return(KERN_INVALID_ADDRESS);
4064	}
4065
4066	need_wakeup = FALSE;
4067	while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4068		if (entry->in_transition) {
4069			/*
4070			 * 1)
4071			 * Another thread is wiring down this entry. Note
4072			 * that if it is not for the other thread we would
4073			 * be unwiring an unwired entry.  This is not
4074			 * permitted.  If we wait, we will be unwiring memory
4075			 * we did not wire.
4076			 *
4077			 * 2)
4078			 * Another thread is unwiring this entry.  We did not
4079			 * have a reference to it, because if we did, this
4080			 * entry will not be getting unwired now.
4081			 */
4082			if (!user_wire) {
4083				/*
4084				 * XXX FBDP
4085				 * This could happen:  there could be some
4086				 * overlapping vslock/vsunlock operations
4087				 * going on.
4088				 * We should probably just wait and retry,
4089				 * but then we have to be careful that this
4090				 * entry could get "simplified" after
4091				 * "in_transition" gets unset and before
4092				 * we re-lookup the entry, so we would
4093				 * have to re-clip the entry to avoid
4094				 * re-unwiring what we have already unwired...
4095				 * See vm_map_wire_nested().
4096				 *
4097				 * Or we could just ignore "in_transition"
4098				 * here and proceed to decement the wired
4099				 * count(s) on this entry.  That should be fine
4100				 * as long as "wired_count" doesn't drop all
4101				 * the way to 0 (and we should panic if THAT
4102				 * happens).
4103				 */
4104				panic("vm_map_unwire: in_transition entry");
4105			}
4106
4107			entry = entry->vme_next;
4108			continue;
4109		}
4110
4111		if (entry->is_sub_map) {
4112			vm_map_offset_t	sub_start;
4113			vm_map_offset_t	sub_end;
4114			vm_map_offset_t	local_end;
4115			pmap_t		pmap;
4116
4117			vm_map_clip_start(map, entry, start);
4118			vm_map_clip_end(map, entry, end);
4119
4120			sub_start = entry->offset;
4121			sub_end = entry->vme_end - entry->vme_start;
4122			sub_end += entry->offset;
4123			local_end = entry->vme_end;
4124			if(map_pmap == NULL) {
4125				if(entry->use_pmap) {
4126					pmap = entry->object.sub_map->pmap;
4127					pmap_addr = sub_start;
4128				} else {
4129					pmap = map->pmap;
4130					pmap_addr = start;
4131				}
4132				if (entry->wired_count == 0 ||
4133				    (user_wire && entry->user_wired_count == 0)) {
4134					if (!user_wire)
4135						panic("vm_map_unwire: entry is unwired");
4136					entry = entry->vme_next;
4137					continue;
4138				}
4139
4140				/*
4141				 * Check for holes
4142				 * Holes: Next entry should be contiguous unless
4143				 * this is the end of the region.
4144				 */
4145				if (((entry->vme_end < end) &&
4146				     ((entry->vme_next == vm_map_to_entry(map)) ||
4147				      (entry->vme_next->vme_start
4148				       > entry->vme_end)))) {
4149					if (!user_wire)
4150						panic("vm_map_unwire: non-contiguous region");
4151/*
4152					entry = entry->vme_next;
4153					continue;
4154*/
4155				}
4156
4157				subtract_wire_counts(map, entry, user_wire);
4158
4159				if (entry->wired_count != 0) {
4160					entry = entry->vme_next;
4161					continue;
4162				}
4163
4164				entry->in_transition = TRUE;
4165				tmp_entry = *entry;/* see comment in vm_map_wire() */
4166
4167				/*
4168				 * We can unlock the map now. The in_transition state
4169				 * guarantees existance of the entry.
4170				 */
4171				vm_map_unlock(map);
4172				vm_map_unwire_nested(entry->object.sub_map,
4173						     sub_start, sub_end, user_wire, pmap, pmap_addr);
4174				vm_map_lock(map);
4175
4176				if (last_timestamp+1 != map->timestamp) {
4177					/*
4178					 * Find the entry again.  It could have been
4179					 * clipped or deleted after we unlocked the map.
4180					 */
4181					if (!vm_map_lookup_entry(map,
4182								 tmp_entry.vme_start,
4183								 &first_entry)) {
4184						if (!user_wire)
4185							panic("vm_map_unwire: re-lookup failed");
4186						entry = first_entry->vme_next;
4187					} else
4188						entry = first_entry;
4189				}
4190				last_timestamp = map->timestamp;
4191
4192				/*
4193				 * clear transition bit for all constituent entries
4194				 * that were in the original entry (saved in
4195				 * tmp_entry).  Also check for waiters.
4196				 */
4197				while ((entry != vm_map_to_entry(map)) &&
4198				       (entry->vme_start < tmp_entry.vme_end)) {
4199					assert(entry->in_transition);
4200					entry->in_transition = FALSE;
4201					if (entry->needs_wakeup) {
4202						entry->needs_wakeup = FALSE;
4203						need_wakeup = TRUE;
4204					}
4205					entry = entry->vme_next;
4206				}
4207				continue;
4208			} else {
4209				vm_map_unlock(map);
4210				vm_map_unwire_nested(entry->object.sub_map,
4211						     sub_start, sub_end, user_wire, map_pmap,
4212						     pmap_addr);
4213				vm_map_lock(map);
4214
4215				if (last_timestamp+1 != map->timestamp) {
4216					/*
4217					 * Find the entry again.  It could have been
4218					 * clipped or deleted after we unlocked the map.
4219					 */
4220					if (!vm_map_lookup_entry(map,
4221								 tmp_entry.vme_start,
4222								 &first_entry)) {
4223						if (!user_wire)
4224							panic("vm_map_unwire: re-lookup failed");
4225						entry = first_entry->vme_next;
4226					} else
4227						entry = first_entry;
4228				}
4229				last_timestamp = map->timestamp;
4230			}
4231		}
4232
4233
4234		if ((entry->wired_count == 0) ||
4235		    (user_wire && entry->user_wired_count == 0)) {
4236			if (!user_wire)
4237				panic("vm_map_unwire: entry is unwired");
4238
4239			entry = entry->vme_next;
4240			continue;
4241		}
4242
4243		assert(entry->wired_count > 0 &&
4244		       (!user_wire || entry->user_wired_count > 0));
4245
4246		vm_map_clip_start(map, entry, start);
4247		vm_map_clip_end(map, entry, end);
4248
4249		/*
4250		 * Check for holes
4251		 * Holes: Next entry should be contiguous unless
4252		 *	  this is the end of the region.
4253		 */
4254		if (((entry->vme_end < end) &&
4255		     ((entry->vme_next == vm_map_to_entry(map)) ||
4256		      (entry->vme_next->vme_start > entry->vme_end)))) {
4257
4258			if (!user_wire)
4259				panic("vm_map_unwire: non-contiguous region");
4260			entry = entry->vme_next;
4261			continue;
4262		}
4263
4264		subtract_wire_counts(map, entry, user_wire);
4265
4266		if (entry->wired_count != 0) {
4267			entry = entry->vme_next;
4268			continue;
4269		}
4270
4271		entry->in_transition = TRUE;
4272		tmp_entry = *entry;	/* see comment in vm_map_wire() */
4273
4274		/*
4275		 * We can unlock the map now. The in_transition state
4276		 * guarantees existance of the entry.
4277		 */
4278		vm_map_unlock(map);
4279		if(map_pmap) {
4280			vm_fault_unwire(map,
4281					&tmp_entry, FALSE, map_pmap, pmap_addr);
4282		} else {
4283			vm_fault_unwire(map,
4284					&tmp_entry, FALSE, map->pmap,
4285					tmp_entry.vme_start);
4286		}
4287		vm_map_lock(map);
4288
4289		if (last_timestamp+1 != map->timestamp) {
4290			/*
4291			 * Find the entry again.  It could have been clipped
4292			 * or deleted after we unlocked the map.
4293			 */
4294			if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4295						 &first_entry)) {
4296				if (!user_wire)
4297					panic("vm_map_unwire: re-lookup failed");
4298				entry = first_entry->vme_next;
4299			} else
4300				entry = first_entry;
4301		}
4302		last_timestamp = map->timestamp;
4303
4304		/*
4305		 * clear transition bit for all constituent entries that
4306		 * were in the original entry (saved in tmp_entry).  Also
4307		 * check for waiters.
4308		 */
4309		while ((entry != vm_map_to_entry(map)) &&
4310		       (entry->vme_start < tmp_entry.vme_end)) {
4311			assert(entry->in_transition);
4312			entry->in_transition = FALSE;
4313			if (entry->needs_wakeup) {
4314				entry->needs_wakeup = FALSE;
4315				need_wakeup = TRUE;
4316			}
4317			entry = entry->vme_next;
4318		}
4319	}
4320
4321	/*
4322	 * We might have fragmented the address space when we wired this
4323	 * range of addresses.  Attempt to re-coalesce these VM map entries
4324	 * with their neighbors now that they're no longer wired.
4325	 * Under some circumstances, address space fragmentation can
4326	 * prevent VM object shadow chain collapsing, which can cause
4327	 * swap space leaks.
4328	 */
4329	vm_map_simplify_range(map, start, end);
4330
4331	vm_map_unlock(map);
4332	/*
4333	 * wake up anybody waiting on entries that we have unwired.
4334	 */
4335	if (need_wakeup)
4336		vm_map_entry_wakeup(map);
4337	return(KERN_SUCCESS);
4338
4339}
4340
4341kern_return_t
4342vm_map_unwire(
4343	register vm_map_t	map,
4344	register vm_map_offset_t	start,
4345	register vm_map_offset_t	end,
4346	boolean_t		user_wire)
4347{
4348	return vm_map_unwire_nested(map, start, end,
4349				    user_wire, (pmap_t)NULL, 0);
4350}
4351
4352
4353/*
4354 *	vm_map_entry_delete:	[ internal use only ]
4355 *
4356 *	Deallocate the given entry from the target map.
4357 */
4358static void
4359vm_map_entry_delete(
4360	register vm_map_t	map,
4361	register vm_map_entry_t	entry)
4362{
4363	register vm_map_offset_t	s, e;
4364	register vm_object_t	object;
4365	register vm_map_t	submap;
4366
4367	s = entry->vme_start;
4368	e = entry->vme_end;
4369	assert(page_aligned(s));
4370	assert(page_aligned(e));
4371	assert(entry->wired_count == 0);
4372	assert(entry->user_wired_count == 0);
4373
4374	if (entry->is_sub_map) {
4375		object = NULL;
4376		submap = entry->object.sub_map;
4377	} else {
4378		submap = NULL;
4379		object = entry->object.vm_object;
4380	}
4381
4382	vm_map_entry_unlink(map, entry);
4383	map->size -= e - s;
4384
4385	vm_map_entry_dispose(map, entry);
4386
4387	vm_map_unlock(map);
4388	/*
4389	 *	Deallocate the object only after removing all
4390	 *	pmap entries pointing to its pages.
4391	 */
4392	if (submap)
4393		vm_map_deallocate(submap);
4394	else
4395		vm_object_deallocate(object);
4396
4397}
4398
4399void
4400vm_map_submap_pmap_clean(
4401	vm_map_t	map,
4402	vm_map_offset_t	start,
4403	vm_map_offset_t	end,
4404	vm_map_t	sub_map,
4405	vm_map_offset_t	offset)
4406{
4407	vm_map_offset_t	submap_start;
4408	vm_map_offset_t	submap_end;
4409	vm_map_size_t	remove_size;
4410	vm_map_entry_t	entry;
4411
4412	submap_end = offset + (end - start);
4413	submap_start = offset;
4414	if(vm_map_lookup_entry(sub_map, offset, &entry)) {
4415
4416		remove_size = (entry->vme_end - entry->vme_start);
4417		if(offset > entry->vme_start)
4418			remove_size -= offset - entry->vme_start;
4419
4420
4421		if(submap_end < entry->vme_end) {
4422			remove_size -=
4423				entry->vme_end - submap_end;
4424		}
4425		if(entry->is_sub_map) {
4426			vm_map_submap_pmap_clean(
4427				sub_map,
4428				start,
4429				start + remove_size,
4430				entry->object.sub_map,
4431				entry->offset);
4432		} else {
4433
4434			if((map->mapped) && (map->ref_count)
4435			   && (entry->object.vm_object != NULL)) {
4436				vm_object_pmap_protect(
4437					entry->object.vm_object,
4438					entry->offset,
4439					remove_size,
4440					PMAP_NULL,
4441					entry->vme_start,
4442					VM_PROT_NONE);
4443			} else {
4444				pmap_remove(map->pmap,
4445					    (addr64_t)start,
4446					    (addr64_t)(start + remove_size));
4447			}
4448		}
4449	}
4450
4451	entry = entry->vme_next;
4452
4453	while((entry != vm_map_to_entry(sub_map))
4454	      && (entry->vme_start < submap_end)) {
4455		remove_size = (entry->vme_end - entry->vme_start);
4456		if(submap_end < entry->vme_end) {
4457			remove_size -= entry->vme_end - submap_end;
4458		}
4459		if(entry->is_sub_map) {
4460			vm_map_submap_pmap_clean(
4461				sub_map,
4462				(start + entry->vme_start) - offset,
4463				((start + entry->vme_start) - offset) + remove_size,
4464				entry->object.sub_map,
4465				entry->offset);
4466		} else {
4467			if((map->mapped) && (map->ref_count)
4468			   && (entry->object.vm_object != NULL)) {
4469				vm_object_pmap_protect(
4470					entry->object.vm_object,
4471					entry->offset,
4472					remove_size,
4473					PMAP_NULL,
4474					entry->vme_start,
4475					VM_PROT_NONE);
4476			} else {
4477				pmap_remove(map->pmap,
4478					    (addr64_t)((start + entry->vme_start)
4479						       - offset),
4480					    (addr64_t)(((start + entry->vme_start)
4481							- offset) + remove_size));
4482			}
4483		}
4484		entry = entry->vme_next;
4485	}
4486	return;
4487}
4488
4489/*
4490 *	vm_map_delete:	[ internal use only ]
4491 *
4492 *	Deallocates the given address range from the target map.
4493 *	Removes all user wirings. Unwires one kernel wiring if
4494 *	VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
4495 *	away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
4496 *	interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
4497 *
4498 *	This routine is called with map locked and leaves map locked.
4499 */
4500static kern_return_t
4501vm_map_delete(
4502	vm_map_t		map,
4503	vm_map_offset_t		start,
4504	vm_map_offset_t		end,
4505	int			flags,
4506	vm_map_t		zap_map)
4507{
4508	vm_map_entry_t		entry, next;
4509	struct	 vm_map_entry	*first_entry, tmp_entry;
4510	register vm_map_offset_t s;
4511	register vm_object_t	object;
4512	boolean_t		need_wakeup;
4513	unsigned int		last_timestamp = ~0; /* unlikely value */
4514	int			interruptible;
4515
4516	interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
4517		THREAD_ABORTSAFE : THREAD_UNINT;
4518
4519	/*
4520	 * All our DMA I/O operations in IOKit are currently done by
4521	 * wiring through the map entries of the task requesting the I/O.
4522	 * Because of this, we must always wait for kernel wirings
4523	 * to go away on the entries before deleting them.
4524	 *
4525	 * Any caller who wants to actually remove a kernel wiring
4526	 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
4527	 * properly remove one wiring instead of blasting through
4528	 * them all.
4529	 */
4530	flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
4531
4532	/*
4533	 *	Find the start of the region, and clip it
4534	 */
4535	if (vm_map_lookup_entry(map, start, &first_entry)) {
4536		entry = first_entry;
4537		if (start == entry->vme_start) {
4538			/*
4539			 * No need to clip.  We don't want to cause
4540			 * any unnecessary unnesting in this case...
4541			 */
4542		} else {
4543			vm_map_clip_start(map, entry, start);
4544		}
4545
4546		/*
4547		 *	Fix the lookup hint now, rather than each
4548		 *	time through the loop.
4549		 */
4550		SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4551	} else {
4552		entry = first_entry->vme_next;
4553	}
4554
4555	need_wakeup = FALSE;
4556	/*
4557	 *	Step through all entries in this region
4558	 */
4559	s = entry->vme_start;
4560	while ((entry != vm_map_to_entry(map)) && (s < end)) {
4561		/*
4562		 * At this point, we have deleted all the memory entries
4563		 * between "start" and "s".  We still need to delete
4564		 * all memory entries between "s" and "end".
4565		 * While we were blocked and the map was unlocked, some
4566		 * new memory entries could have been re-allocated between
4567		 * "start" and "s" and we don't want to mess with those.
4568		 * Some of those entries could even have been re-assembled
4569		 * with an entry after "s" (in vm_map_simplify_entry()), so
4570		 * we may have to vm_map_clip_start() again.
4571		 */
4572
4573		if (entry->vme_start >= s) {
4574			/*
4575			 * This entry starts on or after "s"
4576			 * so no need to clip its start.
4577			 */
4578		} else {
4579			/*
4580			 * This entry has been re-assembled by a
4581			 * vm_map_simplify_entry().  We need to
4582			 * re-clip its start.
4583			 */
4584			vm_map_clip_start(map, entry, s);
4585		}
4586		if (entry->vme_end <= end) {
4587			/*
4588			 * This entry is going away completely, so no need
4589			 * to clip and possibly cause an unnecessary unnesting.
4590			 */
4591		} else {
4592			vm_map_clip_end(map, entry, end);
4593		}
4594		if (entry->in_transition) {
4595			wait_result_t wait_result;
4596
4597			/*
4598			 * Another thread is wiring/unwiring this entry.
4599			 * Let the other thread know we are waiting.
4600			 */
4601			assert(s == entry->vme_start);
4602			entry->needs_wakeup = TRUE;
4603
4604			/*
4605			 * wake up anybody waiting on entries that we have
4606			 * already unwired/deleted.
4607			 */
4608			if (need_wakeup) {
4609				vm_map_entry_wakeup(map);
4610				need_wakeup = FALSE;
4611			}
4612
4613			wait_result = vm_map_entry_wait(map, interruptible);
4614
4615			if (interruptible &&
4616			    wait_result == THREAD_INTERRUPTED) {
4617				/*
4618				 * We do not clear the needs_wakeup flag,
4619				 * since we cannot tell if we were the only one.
4620				 */
4621				vm_map_unlock(map);
4622				return KERN_ABORTED;
4623			}
4624
4625			/*
4626			 * The entry could have been clipped or it
4627			 * may not exist anymore.  Look it up again.
4628			 */
4629			if (!vm_map_lookup_entry(map, s, &first_entry)) {
4630				assert((map != kernel_map) &&
4631				       (!entry->is_sub_map));
4632				/*
4633				 * User: use the next entry
4634				 */
4635				entry = first_entry->vme_next;
4636				s = entry->vme_start;
4637			} else {
4638				entry = first_entry;
4639				SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4640			}
4641			last_timestamp = map->timestamp;
4642			continue;
4643		} /* end in_transition */
4644
4645		if (entry->wired_count) {
4646			boolean_t	user_wire;
4647
4648			user_wire = entry->user_wired_count > 0;
4649
4650			/*
4651			 * 	Remove a kernel wiring if requested or if
4652			 *	there are user wirings.
4653			 */
4654			if ((flags & VM_MAP_REMOVE_KUNWIRE) ||
4655			    (entry->user_wired_count > 0))
4656				entry->wired_count--;
4657
4658			/* remove all user wire references */
4659			entry->user_wired_count = 0;
4660
4661			if (entry->wired_count != 0) {
4662				assert(map != kernel_map);
4663				/*
4664				 * Cannot continue.  Typical case is when
4665				 * a user thread has physical io pending on
4666				 * on this page.  Either wait for the
4667				 * kernel wiring to go away or return an
4668				 * error.
4669				 */
4670				if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
4671					wait_result_t wait_result;
4672
4673					assert(s == entry->vme_start);
4674					entry->needs_wakeup = TRUE;
4675					wait_result = vm_map_entry_wait(map,
4676									interruptible);
4677
4678					if (interruptible &&
4679					    wait_result == THREAD_INTERRUPTED) {
4680						/*
4681						 * We do not clear the
4682						 * needs_wakeup flag, since we
4683						 * cannot tell if we were the
4684						 * only one.
4685						 */
4686						vm_map_unlock(map);
4687						return KERN_ABORTED;
4688					}
4689
4690					/*
4691					 * The entry could have been clipped or
4692					 * it may not exist anymore.  Look it
4693					 * up again.
4694					 */
4695					if (!vm_map_lookup_entry(map, s,
4696								 &first_entry)) {
4697						assert(map != kernel_map);
4698						/*
4699						 * User: use the next entry
4700						 */
4701						entry = first_entry->vme_next;
4702						s = entry->vme_start;
4703					} else {
4704						entry = first_entry;
4705						SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4706					}
4707					last_timestamp = map->timestamp;
4708					continue;
4709				}
4710				else {
4711					return KERN_FAILURE;
4712				}
4713			}
4714
4715			entry->in_transition = TRUE;
4716			/*
4717			 * copy current entry.  see comment in vm_map_wire()
4718			 */
4719			tmp_entry = *entry;
4720			assert(s == entry->vme_start);
4721
4722			/*
4723			 * We can unlock the map now. The in_transition
4724			 * state guarentees existance of the entry.
4725			 */
4726			vm_map_unlock(map);
4727
4728			if (tmp_entry.is_sub_map) {
4729				vm_map_t sub_map;
4730				vm_map_offset_t sub_start, sub_end;
4731				pmap_t pmap;
4732				vm_map_offset_t pmap_addr;
4733
4734
4735				sub_map = tmp_entry.object.sub_map;
4736				sub_start = tmp_entry.offset;
4737				sub_end = sub_start + (tmp_entry.vme_end -
4738						       tmp_entry.vme_start);
4739				if (tmp_entry.use_pmap) {
4740					pmap = sub_map->pmap;
4741					pmap_addr = tmp_entry.vme_start;
4742				} else {
4743					pmap = map->pmap;
4744					pmap_addr = tmp_entry.vme_start;
4745				}
4746				(void) vm_map_unwire_nested(sub_map,
4747							    sub_start, sub_end,
4748							    user_wire,
4749							    pmap, pmap_addr);
4750			} else {
4751
4752				vm_fault_unwire(map, &tmp_entry,
4753						tmp_entry.object.vm_object == kernel_object,
4754						map->pmap, tmp_entry.vme_start);
4755			}
4756
4757			vm_map_lock(map);
4758
4759			if (last_timestamp+1 != map->timestamp) {
4760				/*
4761				 * Find the entry again.  It could have
4762				 * been clipped after we unlocked the map.
4763				 */
4764				if (!vm_map_lookup_entry(map, s, &first_entry)){
4765					assert((map != kernel_map) &&
4766					       (!entry->is_sub_map));
4767					first_entry = first_entry->vme_next;
4768					s = first_entry->vme_start;
4769				} else {
4770					SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4771				}
4772			} else {
4773				SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4774				first_entry = entry;
4775			}
4776
4777			last_timestamp = map->timestamp;
4778
4779			entry = first_entry;
4780			while ((entry != vm_map_to_entry(map)) &&
4781			       (entry->vme_start < tmp_entry.vme_end)) {
4782				assert(entry->in_transition);
4783				entry->in_transition = FALSE;
4784				if (entry->needs_wakeup) {
4785					entry->needs_wakeup = FALSE;
4786					need_wakeup = TRUE;
4787				}
4788				entry = entry->vme_next;
4789			}
4790			/*
4791			 * We have unwired the entry(s).  Go back and
4792			 * delete them.
4793			 */
4794			entry = first_entry;
4795			continue;
4796		}
4797
4798		/* entry is unwired */
4799		assert(entry->wired_count == 0);
4800		assert(entry->user_wired_count == 0);
4801
4802		assert(s == entry->vme_start);
4803
4804		if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
4805			/*
4806			 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
4807			 * vm_map_delete(), some map entries might have been
4808			 * transferred to a "zap_map", which doesn't have a
4809			 * pmap.  The original pmap has already been flushed
4810			 * in the vm_map_delete() call targeting the original
4811			 * map, but when we get to destroying the "zap_map",
4812			 * we don't have any pmap to flush, so let's just skip
4813			 * all this.
4814			 */
4815		} else if (entry->is_sub_map) {
4816			if (entry->use_pmap) {
4817#ifndef NO_NESTED_PMAP
4818				pmap_unnest(map->pmap,
4819					    (addr64_t)entry->vme_start,
4820					    entry->vme_end - entry->vme_start);
4821#endif	/* NO_NESTED_PMAP */
4822				if ((map->mapped) && (map->ref_count)) {
4823					/* clean up parent map/maps */
4824					vm_map_submap_pmap_clean(
4825						map, entry->vme_start,
4826						entry->vme_end,
4827						entry->object.sub_map,
4828						entry->offset);
4829				}
4830			} else {
4831				vm_map_submap_pmap_clean(
4832					map, entry->vme_start, entry->vme_end,
4833					entry->object.sub_map,
4834					entry->offset);
4835			}
4836		} else if (entry->object.vm_object != kernel_object) {
4837			object = entry->object.vm_object;
4838			if((map->mapped) && (map->ref_count)) {
4839				vm_object_pmap_protect(
4840					object, entry->offset,
4841					entry->vme_end - entry->vme_start,
4842					PMAP_NULL,
4843					entry->vme_start,
4844					VM_PROT_NONE);
4845			} else {
4846				pmap_remove(map->pmap,
4847					    (addr64_t)entry->vme_start,
4848					    (addr64_t)entry->vme_end);
4849			}
4850		}
4851
4852		/*
4853		 * All pmap mappings for this map entry must have been
4854		 * cleared by now.
4855		 */
4856		assert(vm_map_pmap_is_empty(map,
4857					    entry->vme_start,
4858					    entry->vme_end));
4859
4860		next = entry->vme_next;
4861		s = next->vme_start;
4862		last_timestamp = map->timestamp;
4863
4864		if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
4865		    zap_map != VM_MAP_NULL) {
4866			vm_map_size_t entry_size;
4867			/*
4868			 * The caller wants to save the affected VM map entries
4869			 * into the "zap_map".  The caller will take care of
4870			 * these entries.
4871			 */
4872			/* unlink the entry from "map" ... */
4873			vm_map_entry_unlink(map, entry);
4874			/* ... and add it to the end of the "zap_map" */
4875			vm_map_entry_link(zap_map,
4876					  vm_map_last_entry(zap_map),
4877					  entry);
4878			entry_size = entry->vme_end - entry->vme_start;
4879			map->size -= entry_size;
4880			zap_map->size += entry_size;
4881			/* we didn't unlock the map, so no timestamp increase */
4882			last_timestamp--;
4883		} else {
4884			vm_map_entry_delete(map, entry);
4885			/* vm_map_entry_delete unlocks the map */
4886			vm_map_lock(map);
4887		}
4888
4889		entry = next;
4890
4891		if(entry == vm_map_to_entry(map)) {
4892			break;
4893		}
4894		if (last_timestamp+1 != map->timestamp) {
4895			/*
4896			 * we are responsible for deleting everything
4897			 * from the give space, if someone has interfered
4898			 * we pick up where we left off, back fills should
4899			 * be all right for anyone except map_delete and
4900			 * we have to assume that the task has been fully
4901			 * disabled before we get here
4902			 */
4903        		if (!vm_map_lookup_entry(map, s, &entry)){
4904	               		entry = entry->vme_next;
4905				s = entry->vme_start;
4906        		} else {
4907				SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4908       		 	}
4909			/*
4910			 * others can not only allocate behind us, we can
4911			 * also see coalesce while we don't have the map lock
4912			 */
4913			if(entry == vm_map_to_entry(map)) {
4914				break;
4915			}
4916		}
4917		last_timestamp = map->timestamp;
4918	}
4919
4920	if (map->wait_for_space)
4921		thread_wakeup((event_t) map);
4922	/*
4923	 * wake up anybody waiting on entries that we have already deleted.
4924	 */
4925	if (need_wakeup)
4926		vm_map_entry_wakeup(map);
4927
4928	return KERN_SUCCESS;
4929}
4930
4931/*
4932 *	vm_map_remove:
4933 *
4934 *	Remove the given address range from the target map.
4935 *	This is the exported form of vm_map_delete.
4936 */
4937kern_return_t
4938vm_map_remove(
4939	register vm_map_t	map,
4940	register vm_map_offset_t	start,
4941	register vm_map_offset_t	end,
4942	register boolean_t	flags)
4943{
4944	register kern_return_t	result;
4945
4946	vm_map_lock(map);
4947	VM_MAP_RANGE_CHECK(map, start, end);
4948	result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
4949	vm_map_unlock(map);
4950
4951	return(result);
4952}
4953
4954
4955/*
4956 *	Routine:	vm_map_copy_discard
4957 *
4958 *	Description:
4959 *		Dispose of a map copy object (returned by
4960 *		vm_map_copyin).
4961 */
4962void
4963vm_map_copy_discard(
4964	vm_map_copy_t	copy)
4965{
4966	TR_DECL("vm_map_copy_discard");
4967
4968/*	tr3("enter: copy 0x%x type %d", copy, copy->type);*/
4969
4970	if (copy == VM_MAP_COPY_NULL)
4971		return;
4972
4973	switch (copy->type) {
4974	case VM_MAP_COPY_ENTRY_LIST:
4975		while (vm_map_copy_first_entry(copy) !=
4976		       vm_map_copy_to_entry(copy)) {
4977			vm_map_entry_t	entry = vm_map_copy_first_entry(copy);
4978
4979			vm_map_copy_entry_unlink(copy, entry);
4980			vm_object_deallocate(entry->object.vm_object);
4981			vm_map_copy_entry_dispose(copy, entry);
4982		}
4983		break;
4984        case VM_MAP_COPY_OBJECT:
4985		vm_object_deallocate(copy->cpy_object);
4986		break;
4987	case VM_MAP_COPY_KERNEL_BUFFER:
4988
4989		/*
4990		 * The vm_map_copy_t and possibly the data buffer were
4991		 * allocated by a single call to kalloc(), i.e. the
4992		 * vm_map_copy_t was not allocated out of the zone.
4993		 */
4994		kfree(copy, copy->cpy_kalloc_size);
4995		return;
4996	}
4997	zfree(vm_map_copy_zone, copy);
4998}
4999
5000/*
5001 *	Routine:	vm_map_copy_copy
5002 *
5003 *	Description:
5004 *			Move the information in a map copy object to
5005 *			a new map copy object, leaving the old one
5006 *			empty.
5007 *
5008 *			This is used by kernel routines that need
5009 *			to look at out-of-line data (in copyin form)
5010 *			before deciding whether to return SUCCESS.
5011 *			If the routine returns FAILURE, the original
5012 *			copy object will be deallocated; therefore,
5013 *			these routines must make a copy of the copy
5014 *			object and leave the original empty so that
5015 *			deallocation will not fail.
5016 */
5017vm_map_copy_t
5018vm_map_copy_copy(
5019	vm_map_copy_t	copy)
5020{
5021	vm_map_copy_t	new_copy;
5022
5023	if (copy == VM_MAP_COPY_NULL)
5024		return VM_MAP_COPY_NULL;
5025
5026	/*
5027	 * Allocate a new copy object, and copy the information
5028	 * from the old one into it.
5029	 */
5030
5031	new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5032	*new_copy = *copy;
5033
5034	if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
5035		/*
5036		 * The links in the entry chain must be
5037		 * changed to point to the new copy object.
5038		 */
5039		vm_map_copy_first_entry(copy)->vme_prev
5040			= vm_map_copy_to_entry(new_copy);
5041		vm_map_copy_last_entry(copy)->vme_next
5042			= vm_map_copy_to_entry(new_copy);
5043	}
5044
5045	/*
5046	 * Change the old copy object into one that contains
5047	 * nothing to be deallocated.
5048	 */
5049	copy->type = VM_MAP_COPY_OBJECT;
5050	copy->cpy_object = VM_OBJECT_NULL;
5051
5052	/*
5053	 * Return the new object.
5054	 */
5055	return new_copy;
5056}
5057
5058static kern_return_t
5059vm_map_overwrite_submap_recurse(
5060	vm_map_t	dst_map,
5061	vm_map_offset_t	dst_addr,
5062	vm_map_size_t	dst_size)
5063{
5064	vm_map_offset_t	dst_end;
5065	vm_map_entry_t	tmp_entry;
5066	vm_map_entry_t	entry;
5067	kern_return_t	result;
5068	boolean_t	encountered_sub_map = FALSE;
5069
5070
5071
5072	/*
5073	 *	Verify that the destination is all writeable
5074	 *	initially.  We have to trunc the destination
5075	 *	address and round the copy size or we'll end up
5076	 *	splitting entries in strange ways.
5077	 */
5078
5079	dst_end = vm_map_round_page(dst_addr + dst_size);
5080	vm_map_lock(dst_map);
5081
5082start_pass_1:
5083	if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5084		vm_map_unlock(dst_map);
5085		return(KERN_INVALID_ADDRESS);
5086	}
5087
5088	vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5089	assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */
5090
5091	for (entry = tmp_entry;;) {
5092		vm_map_entry_t	next;
5093
5094		next = entry->vme_next;
5095		while(entry->is_sub_map) {
5096			vm_map_offset_t	sub_start;
5097			vm_map_offset_t	sub_end;
5098			vm_map_offset_t	local_end;
5099
5100			if (entry->in_transition) {
5101				/*
5102				 * Say that we are waiting, and wait for entry.
5103				 */
5104                        	entry->needs_wakeup = TRUE;
5105                        	vm_map_entry_wait(dst_map, THREAD_UNINT);
5106
5107				goto start_pass_1;
5108			}
5109
5110			encountered_sub_map = TRUE;
5111			sub_start = entry->offset;
5112
5113			if(entry->vme_end < dst_end)
5114				sub_end = entry->vme_end;
5115			else
5116				sub_end = dst_end;
5117			sub_end -= entry->vme_start;
5118			sub_end += entry->offset;
5119			local_end = entry->vme_end;
5120			vm_map_unlock(dst_map);
5121
5122			result = vm_map_overwrite_submap_recurse(
5123				entry->object.sub_map,
5124				sub_start,
5125				sub_end - sub_start);
5126
5127			if(result != KERN_SUCCESS)
5128				return result;
5129			if (dst_end <= entry->vme_end)
5130				return KERN_SUCCESS;
5131			vm_map_lock(dst_map);
5132			if(!vm_map_lookup_entry(dst_map, local_end,
5133						&tmp_entry)) {
5134				vm_map_unlock(dst_map);
5135				return(KERN_INVALID_ADDRESS);
5136			}
5137			entry = tmp_entry;
5138			next = entry->vme_next;
5139		}
5140
5141		if ( ! (entry->protection & VM_PROT_WRITE)) {
5142			vm_map_unlock(dst_map);
5143			return(KERN_PROTECTION_FAILURE);
5144		}
5145
5146		/*
5147		 *	If the entry is in transition, we must wait
5148		 *	for it to exit that state.  Anything could happen
5149		 *	when we unlock the map, so start over.
5150		 */
5151                if (entry->in_transition) {
5152
5153                        /*
5154                         * Say that we are waiting, and wait for entry.
5155                         */
5156                        entry->needs_wakeup = TRUE;
5157                        vm_map_entry_wait(dst_map, THREAD_UNINT);
5158
5159			goto start_pass_1;
5160		}
5161
5162/*
5163 *		our range is contained completely within this map entry
5164 */
5165		if (dst_end <= entry->vme_end) {
5166			vm_map_unlock(dst_map);
5167			return KERN_SUCCESS;
5168		}
5169/*
5170 *		check that range specified is contiguous region
5171 */
5172		if ((next == vm_map_to_entry(dst_map)) ||
5173		    (next->vme_start != entry->vme_end)) {
5174			vm_map_unlock(dst_map);
5175			return(KERN_INVALID_ADDRESS);
5176		}
5177
5178		/*
5179		 *	Check for permanent objects in the destination.
5180		 */
5181		if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5182		    ((!entry->object.vm_object->internal) ||
5183		     (entry->object.vm_object->true_share))) {
5184			if(encountered_sub_map) {
5185				vm_map_unlock(dst_map);
5186				return(KERN_FAILURE);
5187			}
5188		}
5189
5190
5191		entry = next;
5192	}/* for */
5193	vm_map_unlock(dst_map);
5194	return(KERN_SUCCESS);
5195}
5196
5197/*
5198 *	Routine:	vm_map_copy_overwrite
5199 *
5200 *	Description:
5201 *		Copy the memory described by the map copy
5202 *		object (copy; returned by vm_map_copyin) onto
5203 *		the specified destination region (dst_map, dst_addr).
5204 *		The destination must be writeable.
5205 *
5206 *		Unlike vm_map_copyout, this routine actually
5207 *		writes over previously-mapped memory.  If the
5208 *		previous mapping was to a permanent (user-supplied)
5209 *		memory object, it is preserved.
5210 *
5211 *		The attributes (protection and inheritance) of the
5212 *		destination region are preserved.
5213 *
5214 *		If successful, consumes the copy object.
5215 *		Otherwise, the caller is responsible for it.
5216 *
5217 *	Implementation notes:
5218 *		To overwrite aligned temporary virtual memory, it is
5219 *		sufficient to remove the previous mapping and insert
5220 *		the new copy.  This replacement is done either on
5221 *		the whole region (if no permanent virtual memory
5222 *		objects are embedded in the destination region) or
5223 *		in individual map entries.
5224 *
5225 *		To overwrite permanent virtual memory , it is necessary
5226 *		to copy each page, as the external memory management
5227 *		interface currently does not provide any optimizations.
5228 *
5229 *		Unaligned memory also has to be copied.  It is possible
5230 *		to use 'vm_trickery' to copy the aligned data.  This is
5231 *		not done but not hard to implement.
5232 *
5233 *		Once a page of permanent memory has been overwritten,
5234 *		it is impossible to interrupt this function; otherwise,
5235 *		the call would be neither atomic nor location-independent.
5236 *		The kernel-state portion of a user thread must be
5237 *		interruptible.
5238 *
5239 *		It may be expensive to forward all requests that might
5240 *		overwrite permanent memory (vm_write, vm_copy) to
5241 *		uninterruptible kernel threads.  This routine may be
5242 *		called by interruptible threads; however, success is
5243 *		not guaranteed -- if the request cannot be performed
5244 *		atomically and interruptibly, an error indication is
5245 *		returned.
5246 */
5247
5248static kern_return_t
5249vm_map_copy_overwrite_nested(
5250	vm_map_t		dst_map,
5251	vm_map_address_t	dst_addr,
5252	vm_map_copy_t		copy,
5253	boolean_t		interruptible,
5254	pmap_t			pmap)
5255{
5256	vm_map_offset_t		dst_end;
5257	vm_map_entry_t		tmp_entry;
5258	vm_map_entry_t		entry;
5259	kern_return_t		kr;
5260	boolean_t		aligned = TRUE;
5261	boolean_t		contains_permanent_objects = FALSE;
5262	boolean_t		encountered_sub_map = FALSE;
5263	vm_map_offset_t		base_addr;
5264	vm_map_size_t		copy_size;
5265	vm_map_size_t		total_size;
5266
5267
5268	/*
5269	 *	Check for null copy object.
5270	 */
5271
5272	if (copy == VM_MAP_COPY_NULL)
5273		return(KERN_SUCCESS);
5274
5275	/*
5276	 *	Check for special kernel buffer allocated
5277	 *	by new_ipc_kmsg_copyin.
5278	 */
5279
5280	if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
5281		return(vm_map_copyout_kernel_buffer(
5282			       dst_map, &dst_addr,
5283			       copy, TRUE));
5284	}
5285
5286	/*
5287	 *      Only works for entry lists at the moment.  Will
5288	 *	support page lists later.
5289	 */
5290
5291	assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
5292
5293	if (copy->size == 0) {
5294		vm_map_copy_discard(copy);
5295		return(KERN_SUCCESS);
5296	}
5297
5298	/*
5299	 *	Verify that the destination is all writeable
5300	 *	initially.  We have to trunc the destination
5301	 *	address and round the copy size or we'll end up
5302	 *	splitting entries in strange ways.
5303	 */
5304
5305	if (!page_aligned(copy->size) ||
5306	    !page_aligned (copy->offset) ||
5307	    !page_aligned (dst_addr))
5308	{
5309		aligned = FALSE;
5310		dst_end = vm_map_round_page(dst_addr + copy->size);
5311	} else {
5312		dst_end = dst_addr + copy->size;
5313	}
5314
5315	vm_map_lock(dst_map);
5316
5317	/* LP64todo - remove this check when vm_map_commpage64()
5318	 * no longer has to stuff in a map_entry for the commpage
5319	 * above the map's max_offset.
5320	 */
5321	if (dst_addr >= dst_map->max_offset) {
5322		vm_map_unlock(dst_map);
5323		return(KERN_INVALID_ADDRESS);
5324	}
5325
5326start_pass_1:
5327	if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5328		vm_map_unlock(dst_map);
5329		return(KERN_INVALID_ADDRESS);
5330	}
5331	vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5332	for (entry = tmp_entry;;) {
5333		vm_map_entry_t	next = entry->vme_next;
5334
5335		while(entry->is_sub_map) {
5336			vm_map_offset_t	sub_start;
5337			vm_map_offset_t	sub_end;
5338			vm_map_offset_t	local_end;
5339
5340                	if (entry->in_transition) {
5341
5342				/*
5343				 * Say that we are waiting, and wait for entry.
5344				 */
5345                        	entry->needs_wakeup = TRUE;
5346                        	vm_map_entry_wait(dst_map, THREAD_UNINT);
5347
5348				goto start_pass_1;
5349			}
5350
5351			local_end = entry->vme_end;
5352		        if (!(entry->needs_copy)) {
5353				/* if needs_copy we are a COW submap */
5354				/* in such a case we just replace so */
5355				/* there is no need for the follow-  */
5356				/* ing check.                        */
5357				encountered_sub_map = TRUE;
5358				sub_start = entry->offset;
5359
5360				if(entry->vme_end < dst_end)
5361					sub_end = entry->vme_end;
5362				else
5363					sub_end = dst_end;
5364				sub_end -= entry->vme_start;
5365				sub_end += entry->offset;
5366				vm_map_unlock(dst_map);
5367
5368				kr = vm_map_overwrite_submap_recurse(
5369					entry->object.sub_map,
5370					sub_start,
5371					sub_end - sub_start);
5372				if(kr != KERN_SUCCESS)
5373					return kr;
5374				vm_map_lock(dst_map);
5375			}
5376
5377			if (dst_end <= entry->vme_end)
5378				goto start_overwrite;
5379			if(!vm_map_lookup_entry(dst_map, local_end,
5380						&entry)) {
5381				vm_map_unlock(dst_map);
5382				return(KERN_INVALID_ADDRESS);
5383			}
5384			next = entry->vme_next;
5385		}
5386
5387		if ( ! (entry->protection & VM_PROT_WRITE)) {
5388			vm_map_unlock(dst_map);
5389			return(KERN_PROTECTION_FAILURE);
5390		}
5391
5392		/*
5393		 *	If the entry is in transition, we must wait
5394		 *	for it to exit that state.  Anything could happen
5395		 *	when we unlock the map, so start over.
5396		 */
5397                if (entry->in_transition) {
5398
5399                        /*
5400                         * Say that we are waiting, and wait for entry.
5401                         */
5402                        entry->needs_wakeup = TRUE;
5403                        vm_map_entry_wait(dst_map, THREAD_UNINT);
5404
5405			goto start_pass_1;
5406		}
5407
5408/*
5409 *		our range is contained completely within this map entry
5410 */
5411		if (dst_end <= entry->vme_end)
5412			break;
5413/*
5414 *		check that range specified is contiguous region
5415 */
5416		if ((next == vm_map_to_entry(dst_map)) ||
5417		    (next->vme_start != entry->vme_end)) {
5418			vm_map_unlock(dst_map);
5419			return(KERN_INVALID_ADDRESS);
5420		}
5421
5422
5423		/*
5424		 *	Check for permanent objects in the destination.
5425		 */
5426		if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5427		    ((!entry->object.vm_object->internal) ||
5428		     (entry->object.vm_object->true_share))) {
5429			contains_permanent_objects = TRUE;
5430		}
5431
5432		entry = next;
5433	}/* for */
5434
5435start_overwrite:
5436	/*
5437	 *	If there are permanent objects in the destination, then
5438	 *	the copy cannot be interrupted.
5439	 */
5440
5441	if (interruptible && contains_permanent_objects) {
5442		vm_map_unlock(dst_map);
5443		return(KERN_FAILURE);	/* XXX */
5444	}
5445
5446	/*
5447 	 *
5448	 *	Make a second pass, overwriting the data
5449	 *	At the beginning of each loop iteration,
5450	 *	the next entry to be overwritten is "tmp_entry"
5451	 *	(initially, the value returned from the lookup above),
5452	 *	and the starting address expected in that entry
5453	 *	is "start".
5454	 */
5455
5456	total_size = copy->size;
5457	if(encountered_sub_map) {
5458		copy_size = 0;
5459		/* re-calculate tmp_entry since we've had the map */
5460		/* unlocked */
5461		if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
5462			vm_map_unlock(dst_map);
5463			return(KERN_INVALID_ADDRESS);
5464		}
5465	} else {
5466		copy_size = copy->size;
5467	}
5468
5469	base_addr = dst_addr;
5470	while(TRUE) {
5471		/* deconstruct the copy object and do in parts */
5472		/* only in sub_map, interruptable case */
5473		vm_map_entry_t	copy_entry;
5474		vm_map_entry_t	previous_prev = VM_MAP_ENTRY_NULL;
5475		vm_map_entry_t	next_copy = VM_MAP_ENTRY_NULL;
5476		int		nentries;
5477		int		remaining_entries = 0;
5478		int		new_offset = 0;
5479
5480		for (entry = tmp_entry; copy_size == 0;) {
5481			vm_map_entry_t	next;
5482
5483			next = entry->vme_next;
5484
5485			/* tmp_entry and base address are moved along */
5486			/* each time we encounter a sub-map.  Otherwise */
5487			/* entry can outpase tmp_entry, and the copy_size */
5488			/* may reflect the distance between them */
5489			/* if the current entry is found to be in transition */
5490			/* we will start over at the beginning or the last */
5491			/* encounter of a submap as dictated by base_addr */
5492			/* we will zero copy_size accordingly. */
5493			if (entry->in_transition) {
5494                       		/*
5495                       		 * Say that we are waiting, and wait for entry.
5496                       		 */
5497                       		entry->needs_wakeup = TRUE;
5498                       		vm_map_entry_wait(dst_map, THREAD_UNINT);
5499
5500				if(!vm_map_lookup_entry(dst_map, base_addr,
5501							&tmp_entry)) {
5502					vm_map_unlock(dst_map);
5503					return(KERN_INVALID_ADDRESS);
5504				}
5505				copy_size = 0;
5506				entry = tmp_entry;
5507				continue;
5508			}
5509			if(entry->is_sub_map) {
5510				vm_map_offset_t	sub_start;
5511				vm_map_offset_t	sub_end;
5512				vm_map_offset_t	local_end;
5513
5514		        	if (entry->needs_copy) {
5515					/* if this is a COW submap */
5516					/* just back the range with a */
5517					/* anonymous entry */
5518					if(entry->vme_end < dst_end)
5519						sub_end = entry->vme_end;
5520					else
5521						sub_end = dst_end;
5522					if(entry->vme_start < base_addr)
5523						sub_start = base_addr;
5524					else
5525						sub_start = entry->vme_start;
5526					vm_map_clip_end(
5527						dst_map, entry, sub_end);
5528					vm_map_clip_start(
5529						dst_map, entry, sub_start);
5530					assert(!entry->use_pmap);
5531					entry->is_sub_map = FALSE;
5532					vm_map_deallocate(
5533						entry->object.sub_map);
5534					entry->object.sub_map = NULL;
5535					entry->is_shared = FALSE;
5536					entry->needs_copy = FALSE;
5537					entry->offset = 0;
5538					/*
5539					 * XXX FBDP
5540					 * We should propagate the protections
5541					 * of the submap entry here instead
5542					 * of forcing them to VM_PROT_ALL...
5543					 * Or better yet, we should inherit
5544					 * the protection of the copy_entry.
5545					 */
5546					entry->protection = VM_PROT_ALL;
5547					entry->max_protection = VM_PROT_ALL;
5548					entry->wired_count = 0;
5549					entry->user_wired_count = 0;
5550					if(entry->inheritance
5551					   == VM_INHERIT_SHARE)
5552						entry->inheritance = VM_INHERIT_COPY;
5553					continue;
5554				}
5555				/* first take care of any non-sub_map */
5556				/* entries to send */
5557				if(base_addr < entry->vme_start) {
5558					/* stuff to send */
5559					copy_size =
5560						entry->vme_start - base_addr;
5561					break;
5562				}
5563				sub_start = entry->offset;
5564
5565				if(entry->vme_end < dst_end)
5566					sub_end = entry->vme_end;
5567				else
5568					sub_end = dst_end;
5569				sub_end -= entry->vme_start;
5570				sub_end += entry->offset;
5571				local_end = entry->vme_end;
5572				vm_map_unlock(dst_map);
5573				copy_size = sub_end - sub_start;
5574
5575				/* adjust the copy object */
5576				if (total_size > copy_size) {
5577					vm_map_size_t	local_size = 0;
5578					vm_map_size_t	entry_size;
5579
5580					nentries = 1;
5581					new_offset = copy->offset;
5582					copy_entry = vm_map_copy_first_entry(copy);
5583					while(copy_entry !=
5584					      vm_map_copy_to_entry(copy)){
5585						entry_size = copy_entry->vme_end -
5586							copy_entry->vme_start;
5587						if((local_size < copy_size) &&
5588						   ((local_size + entry_size)
5589						    >= copy_size)) {
5590							vm_map_copy_clip_end(copy,
5591									     copy_entry,
5592									     copy_entry->vme_start +
5593									     (copy_size - local_size));
5594							entry_size = copy_entry->vme_end -
5595								copy_entry->vme_start;
5596							local_size += entry_size;
5597							new_offset += entry_size;
5598						}
5599						if(local_size >= copy_size) {
5600							next_copy = copy_entry->vme_next;
5601							copy_entry->vme_next =
5602								vm_map_copy_to_entry(copy);
5603							previous_prev =
5604								copy->cpy_hdr.links.prev;
5605							copy->cpy_hdr.links.prev = copy_entry;
5606							copy->size = copy_size;
5607							remaining_entries =
5608								copy->cpy_hdr.nentries;
5609							remaining_entries -= nentries;
5610							copy->cpy_hdr.nentries = nentries;
5611							break;
5612						} else {
5613							local_size += entry_size;
5614							new_offset += entry_size;
5615							nentries++;
5616						}
5617						copy_entry = copy_entry->vme_next;
5618					}
5619				}
5620
5621				if((entry->use_pmap) && (pmap == NULL)) {
5622					kr = vm_map_copy_overwrite_nested(
5623						entry->object.sub_map,
5624						sub_start,
5625						copy,
5626						interruptible,
5627						entry->object.sub_map->pmap);
5628				} else if (pmap != NULL) {
5629					kr = vm_map_copy_overwrite_nested(
5630						entry->object.sub_map,
5631						sub_start,
5632						copy,
5633						interruptible, pmap);
5634				} else {
5635					kr = vm_map_copy_overwrite_nested(
5636						entry->object.sub_map,
5637						sub_start,
5638						copy,
5639						interruptible,
5640						dst_map->pmap);
5641				}
5642				if(kr != KERN_SUCCESS) {
5643					if(next_copy != NULL) {
5644						copy->cpy_hdr.nentries +=
5645							remaining_entries;
5646						copy->cpy_hdr.links.prev->vme_next =
5647							next_copy;
5648						copy->cpy_hdr.links.prev
5649							= previous_prev;
5650						copy->size = total_size;
5651					}
5652					return kr;
5653				}
5654				if (dst_end <= local_end) {
5655					return(KERN_SUCCESS);
5656				}
5657				/* otherwise copy no longer exists, it was */
5658				/* destroyed after successful copy_overwrite */
5659			        copy = (vm_map_copy_t)
5660					zalloc(vm_map_copy_zone);
5661				vm_map_copy_first_entry(copy) =
5662					vm_map_copy_last_entry(copy) =
5663					vm_map_copy_to_entry(copy);
5664				copy->type = VM_MAP_COPY_ENTRY_LIST;
5665				copy->offset = new_offset;
5666
5667				total_size -= copy_size;
5668				copy_size = 0;
5669				/* put back remainder of copy in container */
5670				if(next_copy != NULL) {
5671					copy->cpy_hdr.nentries = remaining_entries;
5672					copy->cpy_hdr.links.next = next_copy;
5673					copy->cpy_hdr.links.prev = previous_prev;
5674					copy->size = total_size;
5675					next_copy->vme_prev =
5676						vm_map_copy_to_entry(copy);
5677					next_copy = NULL;
5678				}
5679				base_addr = local_end;
5680				vm_map_lock(dst_map);
5681				if(!vm_map_lookup_entry(dst_map,
5682							local_end, &tmp_entry)) {
5683					vm_map_unlock(dst_map);
5684					return(KERN_INVALID_ADDRESS);
5685				}
5686				entry = tmp_entry;
5687				continue;
5688			}
5689			if (dst_end <= entry->vme_end) {
5690				copy_size = dst_end - base_addr;
5691				break;
5692			}
5693
5694			if ((next == vm_map_to_entry(dst_map)) ||
5695			    (next->vme_start != entry->vme_end)) {
5696				vm_map_unlock(dst_map);
5697				return(KERN_INVALID_ADDRESS);
5698			}
5699
5700			entry = next;
5701		}/* for */
5702
5703		next_copy = NULL;
5704		nentries = 1;
5705
5706		/* adjust the copy object */
5707		if (total_size > copy_size) {
5708			vm_map_size_t	local_size = 0;
5709			vm_map_size_t	entry_size;
5710
5711			new_offset = copy->offset;
5712			copy_entry = vm_map_copy_first_entry(copy);
5713			while(copy_entry != vm_map_copy_to_entry(copy)) {
5714				entry_size = copy_entry->vme_end -
5715					copy_entry->vme_start;
5716				if((local_size < copy_size) &&
5717				   ((local_size + entry_size)
5718				    >= copy_size)) {
5719					vm_map_copy_clip_end(copy, copy_entry,
5720							     copy_entry->vme_start +
5721							     (copy_size - local_size));
5722					entry_size = copy_entry->vme_end -
5723						copy_entry->vme_start;
5724					local_size += entry_size;
5725					new_offset += entry_size;
5726				}
5727				if(local_size >= copy_size) {
5728					next_copy = copy_entry->vme_next;
5729					copy_entry->vme_next =
5730						vm_map_copy_to_entry(copy);
5731					previous_prev =
5732						copy->cpy_hdr.links.prev;
5733					copy->cpy_hdr.links.prev = copy_entry;
5734					copy->size = copy_size;
5735					remaining_entries =
5736						copy->cpy_hdr.nentries;
5737					remaining_entries -= nentries;
5738					copy->cpy_hdr.nentries = nentries;
5739					break;
5740				} else {
5741					local_size += entry_size;
5742					new_offset += entry_size;
5743					nentries++;
5744				}
5745				copy_entry = copy_entry->vme_next;
5746			}
5747		}
5748
5749		if (aligned) {
5750			pmap_t	local_pmap;
5751
5752			if(pmap)
5753				local_pmap = pmap;
5754			else
5755				local_pmap = dst_map->pmap;
5756
5757			if ((kr =  vm_map_copy_overwrite_aligned(
5758				     dst_map, tmp_entry, copy,
5759				     base_addr, local_pmap)) != KERN_SUCCESS) {
5760				if(next_copy != NULL) {
5761					copy->cpy_hdr.nentries +=
5762						remaining_entries;
5763				        copy->cpy_hdr.links.prev->vme_next =
5764						next_copy;
5765			       		copy->cpy_hdr.links.prev =
5766						previous_prev;
5767					copy->size += copy_size;
5768				}
5769				return kr;
5770			}
5771			vm_map_unlock(dst_map);
5772		} else {
5773			/*
5774			 * Performance gain:
5775			 *
5776			 * if the copy and dst address are misaligned but the same
5777			 * offset within the page we can copy_not_aligned the
5778			 * misaligned parts and copy aligned the rest.  If they are
5779			 * aligned but len is unaligned we simply need to copy
5780			 * the end bit unaligned.  We'll need to split the misaligned
5781			 * bits of the region in this case !
5782			 */
5783			/* ALWAYS UNLOCKS THE dst_map MAP */
5784			if ((kr =  vm_map_copy_overwrite_unaligned( dst_map,
5785								    tmp_entry, copy, base_addr)) != KERN_SUCCESS) {
5786				if(next_copy != NULL) {
5787					copy->cpy_hdr.nentries +=
5788						remaining_entries;
5789			       		copy->cpy_hdr.links.prev->vme_next =
5790						next_copy;
5791			       		copy->cpy_hdr.links.prev =
5792						previous_prev;
5793					copy->size += copy_size;
5794				}
5795				return kr;
5796			}
5797		}
5798		total_size -= copy_size;
5799		if(total_size == 0)
5800			break;
5801		base_addr += copy_size;
5802		copy_size = 0;
5803		copy->offset = new_offset;
5804		if(next_copy != NULL) {
5805			copy->cpy_hdr.nentries = remaining_entries;
5806			copy->cpy_hdr.links.next = next_copy;
5807			copy->cpy_hdr.links.prev = previous_prev;
5808			next_copy->vme_prev = vm_map_copy_to_entry(copy);
5809			copy->size = total_size;
5810		}
5811		vm_map_lock(dst_map);
5812		while(TRUE) {
5813			if (!vm_map_lookup_entry(dst_map,
5814						 base_addr, &tmp_entry)) {
5815				vm_map_unlock(dst_map);
5816				return(KERN_INVALID_ADDRESS);
5817			}
5818                	if (tmp_entry->in_transition) {
5819                       		entry->needs_wakeup = TRUE;
5820                       		vm_map_entry_wait(dst_map, THREAD_UNINT);
5821			} else {
5822				break;
5823			}
5824		}
5825		vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr));
5826
5827		entry = tmp_entry;
5828	} /* while */
5829
5830	/*
5831	 *	Throw away the vm_map_copy object
5832	 */
5833	vm_map_copy_discard(copy);
5834
5835	return(KERN_SUCCESS);
5836}/* vm_map_copy_overwrite */
5837
5838kern_return_t
5839vm_map_copy_overwrite(
5840	vm_map_t	dst_map,
5841	vm_map_offset_t	dst_addr,
5842	vm_map_copy_t	copy,
5843	boolean_t	interruptible)
5844{
5845	return vm_map_copy_overwrite_nested(
5846		dst_map, dst_addr, copy, interruptible, (pmap_t) NULL);
5847}
5848
5849
5850/*
5851 *	Routine: vm_map_copy_overwrite_unaligned	[internal use only]
5852 *
5853 *	Decription:
5854 *	Physically copy unaligned data
5855 *
5856 *	Implementation:
5857 *	Unaligned parts of pages have to be physically copied.  We use
5858 *	a modified form of vm_fault_copy (which understands none-aligned
5859 *	page offsets and sizes) to do the copy.  We attempt to copy as
5860 *	much memory in one go as possibly, however vm_fault_copy copies
5861 *	within 1 memory object so we have to find the smaller of "amount left"
5862 *	"source object data size" and "target object data size".  With
5863 *	unaligned data we don't need to split regions, therefore the source
5864 *	(copy) object should be one map entry, the target range may be split
5865 *	over multiple map entries however.  In any event we are pessimistic
5866 *	about these assumptions.
5867 *
5868 *	Assumptions:
5869 *	dst_map is locked on entry and is return locked on success,
5870 *	unlocked on error.
5871 */
5872
5873static kern_return_t
5874vm_map_copy_overwrite_unaligned(
5875	vm_map_t	dst_map,
5876	vm_map_entry_t	entry,
5877	vm_map_copy_t	copy,
5878	vm_map_offset_t	start)
5879{
5880	vm_map_entry_t		copy_entry = vm_map_copy_first_entry(copy);
5881	vm_map_version_t	version;
5882	vm_object_t		dst_object;
5883	vm_object_offset_t	dst_offset;
5884	vm_object_offset_t	src_offset;
5885	vm_object_offset_t	entry_offset;
5886	vm_map_offset_t		entry_end;
5887	vm_map_size_t		src_size,
5888				dst_size,
5889				copy_size,
5890				amount_left;
5891	kern_return_t		kr = KERN_SUCCESS;
5892
5893	vm_map_lock_write_to_read(dst_map);
5894
5895	src_offset = copy->offset - vm_object_trunc_page(copy->offset);
5896	amount_left = copy->size;
5897/*
5898 *	unaligned so we never clipped this entry, we need the offset into
5899 *	the vm_object not just the data.
5900 */
5901	while (amount_left > 0) {
5902
5903		if (entry == vm_map_to_entry(dst_map)) {
5904			vm_map_unlock_read(dst_map);
5905			return KERN_INVALID_ADDRESS;
5906		}
5907
5908		/* "start" must be within the current map entry */
5909		assert ((start>=entry->vme_start) && (start<entry->vme_end));
5910
5911		dst_offset = start - entry->vme_start;
5912
5913		dst_size = entry->vme_end - start;
5914
5915		src_size = copy_entry->vme_end -
5916			(copy_entry->vme_start + src_offset);
5917
5918		if (dst_size < src_size) {
5919/*
5920 *			we can only copy dst_size bytes before
5921 *			we have to get the next destination entry
5922 */
5923			copy_size = dst_size;
5924		} else {
5925/*
5926 *			we can only copy src_size bytes before
5927 *			we have to get the next source copy entry
5928 */
5929			copy_size = src_size;
5930		}
5931
5932		if (copy_size > amount_left) {
5933			copy_size = amount_left;
5934		}
5935/*
5936 *		Entry needs copy, create a shadow shadow object for
5937 *		Copy on write region.
5938 */
5939		if (entry->needs_copy &&
5940		    ((entry->protection & VM_PROT_WRITE) != 0))
5941		{
5942			if (vm_map_lock_read_to_write(dst_map)) {
5943				vm_map_lock_read(dst_map);
5944				goto RetryLookup;
5945			}
5946			vm_object_shadow(&entry->object.vm_object,
5947					 &entry->offset,
5948					 (vm_map_size_t)(entry->vme_end
5949							 - entry->vme_start));
5950			entry->needs_copy = FALSE;
5951			vm_map_lock_write_to_read(dst_map);
5952		}
5953		dst_object = entry->object.vm_object;
5954/*
5955 *		unlike with the virtual (aligned) copy we're going
5956 *		to fault on it therefore we need a target object.
5957 */
5958                if (dst_object == VM_OBJECT_NULL) {
5959			if (vm_map_lock_read_to_write(dst_map)) {
5960				vm_map_lock_read(dst_map);
5961				goto RetryLookup;
5962			}
5963			dst_object = vm_object_allocate((vm_map_size_t)
5964							entry->vme_end - entry->vme_start);
5965			entry->object.vm_object = dst_object;
5966			entry->offset = 0;
5967			vm_map_lock_write_to_read(dst_map);
5968		}
5969/*
5970 *		Take an object reference and unlock map. The "entry" may
5971 *		disappear or change when the map is unlocked.
5972 */
5973		vm_object_reference(dst_object);
5974		version.main_timestamp = dst_map->timestamp;
5975		entry_offset = entry->offset;
5976		entry_end = entry->vme_end;
5977		vm_map_unlock_read(dst_map);
5978/*
5979 *		Copy as much as possible in one pass
5980 */
5981		kr = vm_fault_copy(
5982			copy_entry->object.vm_object,
5983			copy_entry->offset + src_offset,
5984			&copy_size,
5985			dst_object,
5986			entry_offset + dst_offset,
5987			dst_map,
5988			&version,
5989			THREAD_UNINT );
5990
5991		start += copy_size;
5992		src_offset += copy_size;
5993		amount_left -= copy_size;
5994/*
5995 *		Release the object reference
5996 */
5997		vm_object_deallocate(dst_object);
5998/*
5999 *		If a hard error occurred, return it now
6000 */
6001		if (kr != KERN_SUCCESS)
6002			return kr;
6003
6004		if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
6005		    || amount_left == 0)
6006		{
6007/*
6008 *			all done with this copy entry, dispose.
6009 */
6010			vm_map_copy_entry_unlink(copy, copy_entry);
6011			vm_object_deallocate(copy_entry->object.vm_object);
6012			vm_map_copy_entry_dispose(copy, copy_entry);
6013
6014			if ((copy_entry = vm_map_copy_first_entry(copy))
6015			    == vm_map_copy_to_entry(copy) && amount_left) {
6016/*
6017 *				not finished copying but run out of source
6018 */
6019				return KERN_INVALID_ADDRESS;
6020			}
6021			src_offset = 0;
6022		}
6023
6024		if (amount_left == 0)
6025			return KERN_SUCCESS;
6026
6027		vm_map_lock_read(dst_map);
6028		if (version.main_timestamp == dst_map->timestamp) {
6029			if (start == entry_end) {
6030/*
6031 *				destination region is split.  Use the version
6032 *				information to avoid a lookup in the normal
6033 *				case.
6034 */
6035				entry = entry->vme_next;
6036/*
6037 *				should be contiguous. Fail if we encounter
6038 *				a hole in the destination.
6039 */
6040				if (start != entry->vme_start) {
6041					vm_map_unlock_read(dst_map);
6042					return KERN_INVALID_ADDRESS ;
6043				}
6044			}
6045		} else {
6046/*
6047 *			Map version check failed.
6048 *			we must lookup the entry because somebody
6049 *			might have changed the map behind our backs.
6050 */
6051		RetryLookup:
6052			if (!vm_map_lookup_entry(dst_map, start, &entry))
6053			{
6054				vm_map_unlock_read(dst_map);
6055				return KERN_INVALID_ADDRESS ;
6056			}
6057		}
6058	}/* while */
6059
6060	return KERN_SUCCESS;
6061}/* vm_map_copy_overwrite_unaligned */
6062
6063/*
6064 *	Routine: vm_map_copy_overwrite_aligned	[internal use only]
6065 *
6066 *	Description:
6067 *	Does all the vm_trickery possible for whole pages.
6068 *
6069 *	Implementation:
6070 *
6071 *	If there are no permanent objects in the destination,
6072 *	and the source and destination map entry zones match,
6073 *	and the destination map entry is not shared,
6074 *	then the map entries can be deleted and replaced
6075 *	with those from the copy.  The following code is the
6076 *	basic idea of what to do, but there are lots of annoying
6077 *	little details about getting protection and inheritance
6078 *	right.  Should add protection, inheritance, and sharing checks
6079 *	to the above pass and make sure that no wiring is involved.
6080 */
6081
6082static kern_return_t
6083vm_map_copy_overwrite_aligned(
6084	vm_map_t	dst_map,
6085	vm_map_entry_t	tmp_entry,
6086	vm_map_copy_t	copy,
6087	vm_map_offset_t	start,
6088	__unused pmap_t	pmap)
6089{
6090	vm_object_t	object;
6091	vm_map_entry_t	copy_entry;
6092	vm_map_size_t	copy_size;
6093	vm_map_size_t	size;
6094	vm_map_entry_t	entry;
6095
6096	while ((copy_entry = vm_map_copy_first_entry(copy))
6097	       != vm_map_copy_to_entry(copy))
6098	{
6099		copy_size = (copy_entry->vme_end - copy_entry->vme_start);
6100
6101		entry = tmp_entry;
6102		assert(!entry->use_pmap); /* unnested when clipped earlier */
6103		if (entry == vm_map_to_entry(dst_map)) {
6104			vm_map_unlock(dst_map);
6105			return KERN_INVALID_ADDRESS;
6106		}
6107		size = (entry->vme_end - entry->vme_start);
6108		/*
6109		 *	Make sure that no holes popped up in the
6110		 *	address map, and that the protection is
6111		 *	still valid, in case the map was unlocked
6112		 *	earlier.
6113		 */
6114
6115		if ((entry->vme_start != start) || ((entry->is_sub_map)
6116						    && !entry->needs_copy)) {
6117			vm_map_unlock(dst_map);
6118			return(KERN_INVALID_ADDRESS);
6119		}
6120		assert(entry != vm_map_to_entry(dst_map));
6121
6122		/*
6123		 *	Check protection again
6124		 */
6125
6126		if ( ! (entry->protection & VM_PROT_WRITE)) {
6127			vm_map_unlock(dst_map);
6128			return(KERN_PROTECTION_FAILURE);
6129		}
6130
6131		/*
6132		 *	Adjust to source size first
6133		 */
6134
6135		if (copy_size < size) {
6136			vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
6137			size = copy_size;
6138		}
6139
6140		/*
6141		 *	Adjust to destination size
6142		 */
6143
6144		if (size < copy_size) {
6145			vm_map_copy_clip_end(copy, copy_entry,
6146					     copy_entry->vme_start + size);
6147			copy_size = size;
6148		}
6149
6150		assert((entry->vme_end - entry->vme_start) == size);
6151		assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
6152		assert((copy_entry->vme_end - copy_entry->vme_start) == size);
6153
6154		/*
6155		 *	If the destination contains temporary unshared memory,
6156		 *	we can perform the copy by throwing it away and
6157		 *	installing the source data.
6158		 */
6159
6160		object = entry->object.vm_object;
6161		if ((!entry->is_shared &&
6162		     ((object == VM_OBJECT_NULL) ||
6163		      (object->internal && !object->true_share))) ||
6164		    entry->needs_copy) {
6165			vm_object_t	old_object = entry->object.vm_object;
6166			vm_object_offset_t	old_offset = entry->offset;
6167			vm_object_offset_t	offset;
6168
6169			/*
6170			 * Ensure that the source and destination aren't
6171			 * identical
6172			 */
6173			if (old_object == copy_entry->object.vm_object &&
6174			    old_offset == copy_entry->offset) {
6175				vm_map_copy_entry_unlink(copy, copy_entry);
6176				vm_map_copy_entry_dispose(copy, copy_entry);
6177
6178				if (old_object != VM_OBJECT_NULL)
6179					vm_object_deallocate(old_object);
6180
6181				start = tmp_entry->vme_end;
6182				tmp_entry = tmp_entry->vme_next;
6183				continue;
6184			}
6185
6186			if (old_object != VM_OBJECT_NULL) {
6187				if(entry->is_sub_map) {
6188					if(entry->use_pmap) {
6189#ifndef NO_NESTED_PMAP
6190						pmap_unnest(dst_map->pmap,
6191							    (addr64_t)entry->vme_start,
6192							    entry->vme_end - entry->vme_start);
6193#endif	/* NO_NESTED_PMAP */
6194						if(dst_map->mapped) {
6195							/* clean up parent */
6196							/* map/maps */
6197							vm_map_submap_pmap_clean(
6198								dst_map, entry->vme_start,
6199								entry->vme_end,
6200								entry->object.sub_map,
6201								entry->offset);
6202						}
6203					} else {
6204						vm_map_submap_pmap_clean(
6205							dst_map, entry->vme_start,
6206							entry->vme_end,
6207							entry->object.sub_map,
6208							entry->offset);
6209					}
6210				   	vm_map_deallocate(
6211						entry->object.sub_map);
6212			   	} else {
6213					if(dst_map->mapped) {
6214						vm_object_pmap_protect(
6215							entry->object.vm_object,
6216							entry->offset,
6217							entry->vme_end
6218							- entry->vme_start,
6219							PMAP_NULL,
6220							entry->vme_start,
6221							VM_PROT_NONE);
6222					} else {
6223						pmap_remove(dst_map->pmap,
6224							    (addr64_t)(entry->vme_start),
6225							    (addr64_t)(entry->vme_end));
6226					}
6227					vm_object_deallocate(old_object);
6228			   	}
6229			}
6230
6231			entry->is_sub_map = FALSE;
6232			entry->object = copy_entry->object;
6233			object = entry->object.vm_object;
6234			entry->needs_copy = copy_entry->needs_copy;
6235			entry->wired_count = 0;
6236			entry->user_wired_count = 0;
6237			offset = entry->offset = copy_entry->offset;
6238
6239			vm_map_copy_entry_unlink(copy, copy_entry);
6240			vm_map_copy_entry_dispose(copy, copy_entry);
6241
6242			/*
6243			 * we could try to push pages into the pmap at this point, BUT
6244			 * this optimization only saved on average 2 us per page if ALL
6245			 * the pages in the source were currently mapped
6246			 * and ALL the pages in the dest were touched, if there were fewer
6247			 * than 2/3 of the pages touched, this optimization actually cost more cycles
6248			 * it also puts a lot of pressure on the pmap layer w/r to mapping structures
6249			 */
6250
6251			/*
6252			 *	Set up for the next iteration.  The map
6253			 *	has not been unlocked, so the next
6254			 *	address should be at the end of this
6255			 *	entry, and the next map entry should be
6256			 *	the one following it.
6257			 */
6258
6259			start = tmp_entry->vme_end;
6260			tmp_entry = tmp_entry->vme_next;
6261		} else {
6262			vm_map_version_t	version;
6263			vm_object_t		dst_object = entry->object.vm_object;
6264			vm_object_offset_t	dst_offset = entry->offset;
6265			kern_return_t		r;
6266
6267			/*
6268			 *	Take an object reference, and record
6269			 *	the map version information so that the
6270			 *	map can be safely unlocked.
6271			 */
6272
6273			vm_object_reference(dst_object);
6274
6275			/* account for unlock bumping up timestamp */
6276			version.main_timestamp = dst_map->timestamp + 1;
6277
6278			vm_map_unlock(dst_map);
6279
6280			/*
6281			 *	Copy as much as possible in one pass
6282			 */
6283
6284			copy_size = size;
6285			r = vm_fault_copy(
6286				copy_entry->object.vm_object,
6287				copy_entry->offset,
6288				&copy_size,
6289				dst_object,
6290				dst_offset,
6291				dst_map,
6292				&version,
6293				THREAD_UNINT );
6294
6295			/*
6296			 *	Release the object reference
6297			 */
6298
6299			vm_object_deallocate(dst_object);
6300
6301			/*
6302			 *	If a hard error occurred, return it now
6303			 */
6304
6305			if (r != KERN_SUCCESS)
6306				return(r);
6307
6308			if (copy_size != 0) {
6309				/*
6310				 *	Dispose of the copied region
6311				 */
6312
6313				vm_map_copy_clip_end(copy, copy_entry,
6314						     copy_entry->vme_start + copy_size);
6315				vm_map_copy_entry_unlink(copy, copy_entry);
6316				vm_object_deallocate(copy_entry->object.vm_object);
6317				vm_map_copy_entry_dispose(copy, copy_entry);
6318			}
6319
6320			/*
6321			 *	Pick up in the destination map where we left off.
6322			 *
6323			 *	Use the version information to avoid a lookup
6324			 *	in the normal case.
6325			 */
6326
6327			start += copy_size;
6328			vm_map_lock(dst_map);
6329			if (version.main_timestamp == dst_map->timestamp) {
6330				/* We can safely use saved tmp_entry value */
6331
6332				vm_map_clip_end(dst_map, tmp_entry, start);
6333				tmp_entry = tmp_entry->vme_next;
6334			} else {
6335				/* Must do lookup of tmp_entry */
6336
6337				if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
6338					vm_map_unlock(dst_map);
6339					return(KERN_INVALID_ADDRESS);
6340				}
6341				vm_map_clip_start(dst_map, tmp_entry, start);
6342			}
6343		}
6344	}/* while */
6345
6346	return(KERN_SUCCESS);
6347}/* vm_map_copy_overwrite_aligned */
6348
6349/*
6350 *	Routine: vm_map_copyin_kernel_buffer [internal use only]
6351 *
6352 *	Description:
6353 *		Copy in data to a kernel buffer from space in the
6354 *		source map. The original space may be optionally
6355 *		deallocated.
6356 *
6357 *		If successful, returns a new copy object.
6358 */
6359static kern_return_t
6360vm_map_copyin_kernel_buffer(
6361	vm_map_t	src_map,
6362	vm_map_offset_t	src_addr,
6363	vm_map_size_t	len,
6364	boolean_t	src_destroy,
6365	vm_map_copy_t	*copy_result)
6366{
6367	kern_return_t kr;
6368	vm_map_copy_t copy;
6369	vm_map_size_t kalloc_size = sizeof(struct vm_map_copy) + len;
6370
6371	copy = (vm_map_copy_t) kalloc(kalloc_size);
6372	if (copy == VM_MAP_COPY_NULL) {
6373		return KERN_RESOURCE_SHORTAGE;
6374	}
6375	copy->type = VM_MAP_COPY_KERNEL_BUFFER;
6376	copy->size = len;
6377	copy->offset = 0;
6378	copy->cpy_kdata = (void *) (copy + 1);
6379	copy->cpy_kalloc_size = kalloc_size;
6380
6381	kr = copyinmap(src_map, src_addr, copy->cpy_kdata, len);
6382	if (kr != KERN_SUCCESS) {
6383		kfree(copy, kalloc_size);
6384		return kr;
6385	}
6386	if (src_destroy) {
6387		(void) vm_map_remove(src_map, vm_map_trunc_page(src_addr),
6388				     vm_map_round_page(src_addr + len),
6389				     VM_MAP_REMOVE_INTERRUPTIBLE |
6390				     VM_MAP_REMOVE_WAIT_FOR_KWIRE |
6391				     (src_map == kernel_map) ?
6392				     VM_MAP_REMOVE_KUNWIRE : 0);
6393	}
6394	*copy_result = copy;
6395	return KERN_SUCCESS;
6396}
6397
6398/*
6399 *	Routine: vm_map_copyout_kernel_buffer	[internal use only]
6400 *
6401 *	Description:
6402 *		Copy out data from a kernel buffer into space in the
6403 *		destination map. The space may be otpionally dynamically
6404 *		allocated.
6405 *
6406 *		If successful, consumes the copy object.
6407 *		Otherwise, the caller is responsible for it.
6408 */
6409static int vm_map_copyout_kernel_buffer_failures = 0;
6410static kern_return_t
6411vm_map_copyout_kernel_buffer(
6412	vm_map_t		map,
6413	vm_map_address_t	*addr,	/* IN/OUT */
6414	vm_map_copy_t		copy,
6415	boolean_t		overwrite)
6416{
6417	kern_return_t kr = KERN_SUCCESS;
6418	thread_t thread = current_thread();
6419
6420	if (!overwrite) {
6421
6422		/*
6423		 * Allocate space in the target map for the data
6424		 */
6425		*addr = 0;
6426		kr = vm_map_enter(map,
6427				  addr,
6428				  vm_map_round_page(copy->size),
6429				  (vm_map_offset_t) 0,
6430				  VM_FLAGS_ANYWHERE,
6431				  VM_OBJECT_NULL,
6432				  (vm_object_offset_t) 0,
6433				  FALSE,
6434				  VM_PROT_DEFAULT,
6435				  VM_PROT_ALL,
6436				  VM_INHERIT_DEFAULT);
6437		if (kr != KERN_SUCCESS)
6438			return kr;
6439	}
6440
6441	/*
6442	 * Copyout the data from the kernel buffer to the target map.
6443	 */
6444	if (thread->map == map) {
6445
6446		/*
6447		 * If the target map is the current map, just do
6448		 * the copy.
6449		 */
6450		if (copyout(copy->cpy_kdata, *addr, copy->size)) {
6451			kr = KERN_INVALID_ADDRESS;
6452		}
6453	}
6454	else {
6455		vm_map_t oldmap;
6456
6457		/*
6458		 * If the target map is another map, assume the
6459		 * target's address space identity for the duration
6460		 * of the copy.
6461		 */
6462		vm_map_reference(map);
6463		oldmap = vm_map_switch(map);
6464
6465		if (copyout(copy->cpy_kdata, *addr, copy->size)) {
6466			vm_map_copyout_kernel_buffer_failures++;
6467			kr = KERN_INVALID_ADDRESS;
6468		}
6469
6470		(void) vm_map_switch(oldmap);
6471		vm_map_deallocate(map);
6472	}
6473
6474	if (kr != KERN_SUCCESS) {
6475		/* the copy failed, clean up */
6476		if (!overwrite) {
6477			/*
6478			 * Deallocate the space we allocated in the target map.
6479			 */
6480			(void) vm_map_remove(map,
6481					     vm_map_trunc_page(*addr),
6482					     vm_map_round_page(*addr +
6483							       vm_map_round_page(copy->size)),
6484					     VM_MAP_NO_FLAGS);
6485			*addr = 0;
6486		}
6487	} else {
6488		/* copy was successful, dicard the copy structure */
6489		kfree(copy, copy->cpy_kalloc_size);
6490	}
6491
6492	return kr;
6493}
6494
6495/*
6496 *	Macro:		vm_map_copy_insert
6497 *
6498 *	Description:
6499 *		Link a copy chain ("copy") into a map at the
6500 *		specified location (after "where").
6501 *	Side effects:
6502 *		The copy chain is destroyed.
6503 *	Warning:
6504 *		The arguments are evaluated multiple times.
6505 */
6506#define	vm_map_copy_insert(map, where, copy)				\
6507MACRO_BEGIN								\
6508	vm_map_t VMCI_map;						\
6509	vm_map_entry_t VMCI_where;					\
6510	vm_map_copy_t VMCI_copy;					\
6511	VMCI_map = (map);						\
6512	VMCI_where = (where);						\
6513	VMCI_copy = (copy);						\
6514	((VMCI_where->vme_next)->vme_prev = vm_map_copy_last_entry(VMCI_copy))\
6515		->vme_next = (VMCI_where->vme_next);			\
6516	((VMCI_where)->vme_next = vm_map_copy_first_entry(VMCI_copy))	\
6517		->vme_prev = VMCI_where;				\
6518	VMCI_map->hdr.nentries += VMCI_copy->cpy_hdr.nentries;		\
6519	UPDATE_FIRST_FREE(VMCI_map, VMCI_map->first_free);		\
6520	zfree(vm_map_copy_zone, VMCI_copy);				\
6521MACRO_END
6522
6523/*
6524 *	Routine:	vm_map_copyout
6525 *
6526 *	Description:
6527 *		Copy out a copy chain ("copy") into newly-allocated
6528 *		space in the destination map.
6529 *
6530 *		If successful, consumes the copy object.
6531 *		Otherwise, the caller is responsible for it.
6532 */
6533kern_return_t
6534vm_map_copyout(
6535	vm_map_t		dst_map,
6536	vm_map_address_t	*dst_addr,	/* OUT */
6537	vm_map_copy_t		copy)
6538{
6539	vm_map_size_t		size;
6540	vm_map_size_t		adjustment;
6541	vm_map_offset_t		start;
6542	vm_object_offset_t	vm_copy_start;
6543	vm_map_entry_t		last;
6544	register
6545	vm_map_entry_t		entry;
6546
6547	/*
6548	 *	Check for null copy object.
6549	 */
6550
6551	if (copy == VM_MAP_COPY_NULL) {
6552		*dst_addr = 0;
6553		return(KERN_SUCCESS);
6554	}
6555
6556	/*
6557	 *	Check for special copy object, created
6558	 *	by vm_map_copyin_object.
6559	 */
6560
6561	if (copy->type == VM_MAP_COPY_OBJECT) {
6562		vm_object_t 		object = copy->cpy_object;
6563		kern_return_t 		kr;
6564		vm_object_offset_t	offset;
6565
6566		offset = vm_object_trunc_page(copy->offset);
6567		size = vm_map_round_page(copy->size +
6568					 (vm_map_size_t)(copy->offset - offset));
6569		*dst_addr = 0;
6570		kr = vm_map_enter(dst_map, dst_addr, size,
6571				  (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
6572				  object, offset, FALSE,
6573				  VM_PROT_DEFAULT, VM_PROT_ALL,
6574				  VM_INHERIT_DEFAULT);
6575		if (kr != KERN_SUCCESS)
6576			return(kr);
6577		/* Account for non-pagealigned copy object */
6578		*dst_addr += (vm_map_offset_t)(copy->offset - offset);
6579		zfree(vm_map_copy_zone, copy);
6580		return(KERN_SUCCESS);
6581	}
6582
6583	/*
6584	 *	Check for special kernel buffer allocated
6585	 *	by new_ipc_kmsg_copyin.
6586	 */
6587
6588	if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
6589		return(vm_map_copyout_kernel_buffer(dst_map, dst_addr,
6590						    copy, FALSE));
6591	}
6592
6593	/*
6594	 *	Find space for the data
6595	 */
6596
6597	vm_copy_start = vm_object_trunc_page(copy->offset);
6598	size =	vm_map_round_page((vm_map_size_t)copy->offset + copy->size)
6599		- vm_copy_start;
6600
6601StartAgain: ;
6602
6603	vm_map_lock(dst_map);
6604	assert(first_free_is_valid(dst_map));
6605	start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
6606		vm_map_min(dst_map) : last->vme_end;
6607
6608	while (TRUE) {
6609		vm_map_entry_t	next = last->vme_next;
6610		vm_map_offset_t	end = start + size;
6611
6612		if ((end > dst_map->max_offset) || (end < start)) {
6613			if (dst_map->wait_for_space) {
6614				if (size <= (dst_map->max_offset - dst_map->min_offset)) {
6615					assert_wait((event_t) dst_map,
6616						    THREAD_INTERRUPTIBLE);
6617					vm_map_unlock(dst_map);
6618					thread_block(THREAD_CONTINUE_NULL);
6619					goto StartAgain;
6620				}
6621			}
6622			vm_map_unlock(dst_map);
6623			return(KERN_NO_SPACE);
6624		}
6625
6626		if ((next == vm_map_to_entry(dst_map)) ||
6627		    (next->vme_start >= end))
6628			break;
6629
6630		last = next;
6631		start = last->vme_end;
6632	}
6633
6634	/*
6635	 *	Since we're going to just drop the map
6636	 *	entries from the copy into the destination
6637	 *	map, they must come from the same pool.
6638	 */
6639
6640	if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
6641		/*
6642		 * Mismatches occur when dealing with the default
6643		 * pager.
6644		 */
6645		zone_t		old_zone;
6646		vm_map_entry_t	next, new;
6647
6648		/*
6649		 * Find the zone that the copies were allocated from
6650		 */
6651		old_zone = (copy->cpy_hdr.entries_pageable)
6652			? vm_map_entry_zone
6653			: vm_map_kentry_zone;
6654		entry = vm_map_copy_first_entry(copy);
6655
6656		/*
6657		 * Reinitialize the copy so that vm_map_copy_entry_link
6658		 * will work.
6659		 */
6660		copy->cpy_hdr.nentries = 0;
6661		copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
6662		vm_map_copy_first_entry(copy) =
6663			vm_map_copy_last_entry(copy) =
6664			vm_map_copy_to_entry(copy);
6665
6666		/*
6667		 * Copy each entry.
6668		 */
6669		while (entry != vm_map_copy_to_entry(copy)) {
6670			new = vm_map_copy_entry_create(copy);
6671			vm_map_entry_copy_full(new, entry);
6672			new->use_pmap = FALSE;	/* clr address space specifics */
6673			vm_map_copy_entry_link(copy,
6674					       vm_map_copy_last_entry(copy),
6675					       new);
6676			next = entry->vme_next;
6677			zfree(old_zone, entry);
6678			entry = next;
6679		}
6680	}
6681
6682	/*
6683	 *	Adjust the addresses in the copy chain, and
6684	 *	reset the region attributes.
6685	 */
6686
6687	adjustment = start - vm_copy_start;
6688	for (entry = vm_map_copy_first_entry(copy);
6689	     entry != vm_map_copy_to_entry(copy);
6690	     entry = entry->vme_next) {
6691		entry->vme_start += adjustment;
6692		entry->vme_end += adjustment;
6693
6694		entry->inheritance = VM_INHERIT_DEFAULT;
6695		entry->protection = VM_PROT_DEFAULT;
6696		entry->max_protection = VM_PROT_ALL;
6697		entry->behavior = VM_BEHAVIOR_DEFAULT;
6698
6699		/*
6700		 * If the entry is now wired,
6701		 * map the pages into the destination map.
6702		 */
6703		if (entry->wired_count != 0) {
6704			register vm_map_offset_t va;
6705			vm_object_offset_t	 offset;
6706			register vm_object_t object;
6707			vm_prot_t prot;
6708			int	type_of_fault;
6709
6710			object = entry->object.vm_object;
6711			offset = entry->offset;
6712			va = entry->vme_start;
6713
6714			pmap_pageable(dst_map->pmap,
6715				      entry->vme_start,
6716				      entry->vme_end,
6717				      TRUE);
6718
6719			while (va < entry->vme_end) {
6720				register vm_page_t	m;
6721
6722				/*
6723				 * Look up the page in the object.
6724				 * Assert that the page will be found in the
6725				 * top object:
6726				 * either
6727				 *	the object was newly created by
6728				 *	vm_object_copy_slowly, and has
6729				 *	copies of all of the pages from
6730				 *	the source object
6731				 * or
6732				 *	the object was moved from the old
6733				 *	map entry; because the old map
6734				 *	entry was wired, all of the pages
6735				 *	were in the top-level object.
6736				 *	(XXX not true if we wire pages for
6737				 *	 reading)
6738				 */
6739				vm_object_lock(object);
6740
6741				m = vm_page_lookup(object, offset);
6742				if (m == VM_PAGE_NULL || m->wire_count == 0 ||
6743				    m->absent)
6744					panic("vm_map_copyout: wiring %p", m);
6745
6746				/*
6747				 * ENCRYPTED SWAP:
6748				 * The page is assumed to be wired here, so it
6749				 * shouldn't be encrypted.  Otherwise, we
6750				 * couldn't enter it in the page table, since
6751				 * we don't want the user to see the encrypted
6752				 * data.
6753				 */
6754				ASSERT_PAGE_DECRYPTED(m);
6755
6756				prot = entry->protection;
6757
6758				if (override_nx(dst_map, entry->alias) && prot)
6759				        prot |= VM_PROT_EXECUTE;
6760
6761				type_of_fault = DBG_CACHE_HIT_FAULT;
6762
6763				vm_fault_enter(m, dst_map->pmap, va, prot,
6764					       m->wire_count != 0, FALSE, FALSE,
6765					       &type_of_fault);
6766
6767				vm_object_unlock(object);
6768
6769				offset += PAGE_SIZE_64;
6770				va += PAGE_SIZE;
6771			}
6772		}
6773	}
6774
6775	/*
6776	 *	Correct the page alignment for the result
6777	 */
6778
6779	*dst_addr = start + (copy->offset - vm_copy_start);
6780
6781	/*
6782	 *	Update the hints and the map size
6783	 */
6784
6785	SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
6786
6787	dst_map->size += size;
6788
6789	/*
6790	 *	Link in the copy
6791	 */
6792
6793	vm_map_copy_insert(dst_map, last, copy);
6794
6795	vm_map_unlock(dst_map);
6796
6797	/*
6798	 * XXX	If wiring_required, call vm_map_pageable
6799	 */
6800
6801	return(KERN_SUCCESS);
6802}
6803
6804/*
6805 *	Routine:	vm_map_copyin
6806 *
6807 *	Description:
6808 *		see vm_map_copyin_common.  Exported via Unsupported.exports.
6809 *
6810 */
6811
6812#undef vm_map_copyin
6813
6814kern_return_t
6815vm_map_copyin(
6816	vm_map_t			src_map,
6817	vm_map_address_t	src_addr,
6818	vm_map_size_t		len,
6819	boolean_t			src_destroy,
6820	vm_map_copy_t		*copy_result)	/* OUT */
6821{
6822	return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
6823					FALSE, copy_result, FALSE));
6824}
6825
6826/*
6827 *	Routine:	vm_map_copyin_common
6828 *
6829 *	Description:
6830 *		Copy the specified region (src_addr, len) from the
6831 *		source address space (src_map), possibly removing
6832 *		the region from the source address space (src_destroy).
6833 *
6834 *	Returns:
6835 *		A vm_map_copy_t object (copy_result), suitable for
6836 *		insertion into another address space (using vm_map_copyout),
6837 *		copying over another address space region (using
6838 *		vm_map_copy_overwrite).  If the copy is unused, it
6839 *		should be destroyed (using vm_map_copy_discard).
6840 *
6841 *	In/out conditions:
6842 *		The source map should not be locked on entry.
6843 */
6844
6845typedef struct submap_map {
6846	vm_map_t	parent_map;
6847	vm_map_offset_t	base_start;
6848	vm_map_offset_t	base_end;
6849	vm_map_size_t	base_len;
6850	struct submap_map *next;
6851} submap_map_t;
6852
6853kern_return_t
6854vm_map_copyin_common(
6855	vm_map_t	src_map,
6856	vm_map_address_t src_addr,
6857	vm_map_size_t	len,
6858	boolean_t	src_destroy,
6859	__unused boolean_t	src_volatile,
6860	vm_map_copy_t	*copy_result,	/* OUT */
6861	boolean_t	use_maxprot)
6862{
6863	vm_map_entry_t	tmp_entry;	/* Result of last map lookup --
6864					 * in multi-level lookup, this
6865					 * entry contains the actual
6866					 * vm_object/offset.
6867					 */
6868	register
6869	vm_map_entry_t	new_entry = VM_MAP_ENTRY_NULL;	/* Map entry for copy */
6870
6871	vm_map_offset_t	src_start;	/* Start of current entry --
6872					 * where copy is taking place now
6873					 */
6874	vm_map_offset_t	src_end;	/* End of entire region to be
6875					 * copied */
6876	vm_map_offset_t src_base;
6877	vm_map_t	base_map = src_map;
6878	boolean_t	map_share=FALSE;
6879	submap_map_t	*parent_maps = NULL;
6880
6881	register
6882	vm_map_copy_t	copy;		/* Resulting copy */
6883	vm_map_address_t	copy_addr;
6884
6885	/*
6886	 *	Check for copies of zero bytes.
6887	 */
6888
6889	if (len == 0) {
6890		*copy_result = VM_MAP_COPY_NULL;
6891		return(KERN_SUCCESS);
6892	}
6893
6894	/*
6895	 *	Check that the end address doesn't overflow
6896	 */
6897	src_end = src_addr + len;
6898	if (src_end < src_addr)
6899		return KERN_INVALID_ADDRESS;
6900
6901	/*
6902	 * If the copy is sufficiently small, use a kernel buffer instead
6903	 * of making a virtual copy.  The theory being that the cost of
6904	 * setting up VM (and taking C-O-W faults) dominates the copy costs
6905	 * for small regions.
6906	 */
6907	if ((len < msg_ool_size_small) && !use_maxprot)
6908		return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
6909						   src_destroy, copy_result);
6910
6911	/*
6912	 *	Compute (page aligned) start and end of region
6913	 */
6914	src_start = vm_map_trunc_page(src_addr);
6915	src_end = vm_map_round_page(src_end);
6916
6917	XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", (natural_t)src_map, src_addr, len, src_destroy, 0);
6918
6919	/*
6920	 *	Allocate a header element for the list.
6921	 *
6922	 *	Use the start and end in the header to
6923	 *	remember the endpoints prior to rounding.
6924	 */
6925
6926	copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6927	vm_map_copy_first_entry(copy) =
6928		vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
6929	copy->type = VM_MAP_COPY_ENTRY_LIST;
6930	copy->cpy_hdr.nentries = 0;
6931	copy->cpy_hdr.entries_pageable = TRUE;
6932
6933	copy->offset = src_addr;
6934	copy->size = len;
6935
6936	new_entry = vm_map_copy_entry_create(copy);
6937
6938#define	RETURN(x)						\
6939	MACRO_BEGIN						\
6940	vm_map_unlock(src_map);					\
6941	if(src_map != base_map)					\
6942		vm_map_deallocate(src_map);			\
6943	if (new_entry != VM_MAP_ENTRY_NULL)			\
6944		vm_map_copy_entry_dispose(copy,new_entry);	\
6945	vm_map_copy_discard(copy);				\
6946	{							\
6947		submap_map_t	*_ptr;				\
6948								\
6949		for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
6950			parent_maps=parent_maps->next;		\
6951			if (_ptr->parent_map != base_map)	\
6952				vm_map_deallocate(_ptr->parent_map);	\
6953			kfree(_ptr, sizeof(submap_map_t));	\
6954		}						\
6955	}							\
6956	MACRO_RETURN(x);					\
6957	MACRO_END
6958
6959	/*
6960	 *	Find the beginning of the region.
6961	 */
6962
6963 	vm_map_lock(src_map);
6964
6965	if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
6966		RETURN(KERN_INVALID_ADDRESS);
6967	if(!tmp_entry->is_sub_map) {
6968		vm_map_clip_start(src_map, tmp_entry, src_start);
6969	}
6970	/* set for later submap fix-up */
6971	copy_addr = src_start;
6972
6973	/*
6974	 *	Go through entries until we get to the end.
6975	 */
6976
6977	while (TRUE) {
6978		register
6979		vm_map_entry_t	src_entry = tmp_entry;	/* Top-level entry */
6980		vm_map_size_t	src_size;		/* Size of source
6981							 * map entry (in both
6982							 * maps)
6983							 */
6984
6985		register
6986		vm_object_t		src_object;	/* Object to copy */
6987		vm_object_offset_t	src_offset;
6988
6989		boolean_t	src_needs_copy;		/* Should source map
6990							 * be made read-only
6991							 * for copy-on-write?
6992							 */
6993
6994		boolean_t	new_entry_needs_copy;	/* Will new entry be COW? */
6995
6996		boolean_t	was_wired;		/* Was source wired? */
6997		vm_map_version_t version;		/* Version before locks
6998							 * dropped to make copy
6999							 */
7000		kern_return_t	result;			/* Return value from
7001							 * copy_strategically.
7002							 */
7003		while(tmp_entry->is_sub_map) {
7004			vm_map_size_t submap_len;
7005			submap_map_t *ptr;
7006
7007			ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
7008			ptr->next = parent_maps;
7009			parent_maps = ptr;
7010			ptr->parent_map = src_map;
7011			ptr->base_start = src_start;
7012			ptr->base_end = src_end;
7013			submap_len = tmp_entry->vme_end - src_start;
7014			if(submap_len > (src_end-src_start))
7015				submap_len = src_end-src_start;
7016			ptr->base_len = submap_len;
7017
7018			src_start -= tmp_entry->vme_start;
7019			src_start += tmp_entry->offset;
7020			src_end = src_start + submap_len;
7021			src_map = tmp_entry->object.sub_map;
7022			vm_map_lock(src_map);
7023			/* keep an outstanding reference for all maps in */
7024			/* the parents tree except the base map */
7025			vm_map_reference(src_map);
7026			vm_map_unlock(ptr->parent_map);
7027			if (!vm_map_lookup_entry(
7028				    src_map, src_start, &tmp_entry))
7029				RETURN(KERN_INVALID_ADDRESS);
7030			map_share = TRUE;
7031			if(!tmp_entry->is_sub_map)
7032				vm_map_clip_start(src_map, tmp_entry, src_start);
7033			src_entry = tmp_entry;
7034		}
7035		/* we are now in the lowest level submap... */
7036
7037		if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
7038		    (tmp_entry->object.vm_object->phys_contiguous)) {
7039			/* This is not, supported for now.In future */
7040			/* we will need to detect the phys_contig   */
7041			/* condition and then upgrade copy_slowly   */
7042			/* to do physical copy from the device mem  */
7043			/* based object. We can piggy-back off of   */
7044			/* the was wired boolean to set-up the      */
7045			/* proper handling */
7046			RETURN(KERN_PROTECTION_FAILURE);
7047		}
7048		/*
7049		 *	Create a new address map entry to hold the result.
7050		 *	Fill in the fields from the appropriate source entries.
7051		 *	We must unlock the source map to do this if we need
7052		 *	to allocate a map entry.
7053		 */
7054		if (new_entry == VM_MAP_ENTRY_NULL) {
7055			version.main_timestamp = src_map->timestamp;
7056			vm_map_unlock(src_map);
7057
7058			new_entry = vm_map_copy_entry_create(copy);
7059
7060			vm_map_lock(src_map);
7061			if ((version.main_timestamp + 1) != src_map->timestamp) {
7062				if (!vm_map_lookup_entry(src_map, src_start,
7063							 &tmp_entry)) {
7064					RETURN(KERN_INVALID_ADDRESS);
7065				}
7066				if (!tmp_entry->is_sub_map)
7067					vm_map_clip_start(src_map, tmp_entry, src_start);
7068				continue; /* restart w/ new tmp_entry */
7069			}
7070		}
7071
7072		/*
7073		 *	Verify that the region can be read.
7074		 */
7075		if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
7076		     !use_maxprot) ||
7077		    (src_entry->max_protection & VM_PROT_READ) == 0)
7078			RETURN(KERN_PROTECTION_FAILURE);
7079
7080		/*
7081		 *	Clip against the endpoints of the entire region.
7082		 */
7083
7084		vm_map_clip_end(src_map, src_entry, src_end);
7085
7086		src_size = src_entry->vme_end - src_start;
7087		src_object = src_entry->object.vm_object;
7088		src_offset = src_entry->offset;
7089		was_wired = (src_entry->wired_count != 0);
7090
7091		vm_map_entry_copy(new_entry, src_entry);
7092		new_entry->use_pmap = FALSE; /* clr address space specifics */
7093
7094		/*
7095		 *	Attempt non-blocking copy-on-write optimizations.
7096		 */
7097
7098		if (src_destroy &&
7099		    (src_object == VM_OBJECT_NULL ||
7100		     (src_object->internal && !src_object->true_share
7101		      && !map_share))) {
7102			/*
7103			 * If we are destroying the source, and the object
7104			 * is internal, we can move the object reference
7105			 * from the source to the copy.  The copy is
7106			 * copy-on-write only if the source is.
7107			 * We make another reference to the object, because
7108			 * destroying the source entry will deallocate it.
7109			 */
7110			vm_object_reference(src_object);
7111
7112			/*
7113			 * Copy is always unwired.  vm_map_copy_entry
7114			 * set its wired count to zero.
7115			 */
7116
7117			goto CopySuccessful;
7118		}
7119
7120
7121	RestartCopy:
7122		XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
7123		    src_object, new_entry, new_entry->object.vm_object,
7124		    was_wired, 0);
7125		if ((src_object == VM_OBJECT_NULL ||
7126		     (!was_wired && !map_share && !tmp_entry->is_shared)) &&
7127		    vm_object_copy_quickly(
7128			    &new_entry->object.vm_object,
7129			    src_offset,
7130			    src_size,
7131			    &src_needs_copy,
7132			    &new_entry_needs_copy)) {
7133
7134			new_entry->needs_copy = new_entry_needs_copy;
7135
7136			/*
7137			 *	Handle copy-on-write obligations
7138			 */
7139
7140			if (src_needs_copy && !tmp_entry->needs_copy) {
7141			        vm_prot_t prot;
7142
7143				prot = src_entry->protection & ~VM_PROT_WRITE;
7144
7145				if (override_nx(src_map, src_entry->alias) && prot)
7146				        prot |= VM_PROT_EXECUTE;
7147
7148				vm_object_pmap_protect(
7149					src_object,
7150					src_offset,
7151					src_size,
7152			      		(src_entry->is_shared ?
7153					 PMAP_NULL
7154					 : src_map->pmap),
7155					src_entry->vme_start,
7156					prot);
7157
7158				tmp_entry->needs_copy = TRUE;
7159			}
7160
7161			/*
7162			 *	The map has never been unlocked, so it's safe
7163			 *	to move to the next entry rather than doing
7164			 *	another lookup.
7165			 */
7166
7167			goto CopySuccessful;
7168		}
7169
7170		/*
7171		 *	Take an object reference, so that we may
7172		 *	release the map lock(s).
7173		 */
7174
7175		assert(src_object != VM_OBJECT_NULL);
7176		vm_object_reference(src_object);
7177
7178		/*
7179		 *	Record the timestamp for later verification.
7180		 *	Unlock the map.
7181		 */
7182
7183		version.main_timestamp = src_map->timestamp;
7184		vm_map_unlock(src_map);	/* Increments timestamp once! */
7185
7186		/*
7187		 *	Perform the copy
7188		 */
7189
7190		if (was_wired) {
7191		CopySlowly:
7192			vm_object_lock(src_object);
7193			result = vm_object_copy_slowly(
7194				src_object,
7195				src_offset,
7196				src_size,
7197				THREAD_UNINT,
7198				&new_entry->object.vm_object);
7199			new_entry->offset = 0;
7200			new_entry->needs_copy = FALSE;
7201
7202		}
7203		else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
7204			 (tmp_entry->is_shared  || map_share)) {
7205		  	vm_object_t new_object;
7206
7207			vm_object_lock_shared(src_object);
7208			new_object = vm_object_copy_delayed(
7209				src_object,
7210				src_offset,
7211				src_size,
7212				TRUE);
7213			if (new_object == VM_OBJECT_NULL)
7214			  	goto CopySlowly;
7215
7216			new_entry->object.vm_object = new_object;
7217			new_entry->needs_copy = TRUE;
7218			result = KERN_SUCCESS;
7219
7220		} else {
7221			result = vm_object_copy_strategically(src_object,
7222							      src_offset,
7223							      src_size,
7224							      &new_entry->object.vm_object,
7225							      &new_entry->offset,
7226							      &new_entry_needs_copy);
7227
7228			new_entry->needs_copy = new_entry_needs_copy;
7229		}
7230
7231		if (result != KERN_SUCCESS &&
7232		    result != KERN_MEMORY_RESTART_COPY) {
7233			vm_map_lock(src_map);
7234			RETURN(result);
7235		}
7236
7237		/*
7238		 *	Throw away the extra reference
7239		 */
7240
7241		vm_object_deallocate(src_object);
7242
7243		/*
7244		 *	Verify that the map has not substantially
7245		 *	changed while the copy was being made.
7246		 */
7247
7248		vm_map_lock(src_map);
7249
7250		if ((version.main_timestamp + 1) == src_map->timestamp)
7251			goto VerificationSuccessful;
7252
7253		/*
7254		 *	Simple version comparison failed.
7255		 *
7256		 *	Retry the lookup and verify that the
7257		 *	same object/offset are still present.
7258		 *
7259		 *	[Note: a memory manager that colludes with
7260		 *	the calling task can detect that we have
7261		 *	cheated.  While the map was unlocked, the
7262		 *	mapping could have been changed and restored.]
7263		 */
7264
7265		if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
7266			RETURN(KERN_INVALID_ADDRESS);
7267		}
7268
7269		src_entry = tmp_entry;
7270		vm_map_clip_start(src_map, src_entry, src_start);
7271
7272		if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
7273		     !use_maxprot) ||
7274		    ((src_entry->max_protection & VM_PROT_READ) == 0))
7275			goto VerificationFailed;
7276
7277		if (src_entry->vme_end < new_entry->vme_end)
7278			src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
7279
7280		if ((src_entry->object.vm_object != src_object) ||
7281		    (src_entry->offset != src_offset) ) {
7282
7283			/*
7284			 *	Verification failed.
7285			 *
7286			 *	Start over with this top-level entry.
7287			 */
7288
7289		VerificationFailed: ;
7290
7291			vm_object_deallocate(new_entry->object.vm_object);
7292			tmp_entry = src_entry;
7293			continue;
7294		}
7295
7296		/*
7297		 *	Verification succeeded.
7298		 */
7299
7300	VerificationSuccessful: ;
7301
7302		if (result == KERN_MEMORY_RESTART_COPY)
7303			goto RestartCopy;
7304
7305		/*
7306		 *	Copy succeeded.
7307		 */
7308
7309	CopySuccessful: ;
7310
7311		/*
7312		 *	Link in the new copy entry.
7313		 */
7314
7315		vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
7316				       new_entry);
7317
7318		/*
7319		 *	Determine whether the entire region
7320		 *	has been copied.
7321		 */
7322		src_base = src_start;
7323		src_start = new_entry->vme_end;
7324		new_entry = VM_MAP_ENTRY_NULL;
7325		while ((src_start >= src_end) && (src_end != 0)) {
7326			if (src_map != base_map) {
7327				submap_map_t	*ptr;
7328
7329				ptr = parent_maps;
7330				assert(ptr != NULL);
7331				parent_maps = parent_maps->next;
7332
7333				/* fix up the damage we did in that submap */
7334				vm_map_simplify_range(src_map,
7335						      src_base,
7336						      src_end);
7337
7338				vm_map_unlock(src_map);
7339				vm_map_deallocate(src_map);
7340				vm_map_lock(ptr->parent_map);
7341				src_map = ptr->parent_map;
7342				src_base = ptr->base_start;
7343				src_start = ptr->base_start + ptr->base_len;
7344				src_end = ptr->base_end;
7345				if ((src_end > src_start) &&
7346				    !vm_map_lookup_entry(
7347					    src_map, src_start, &tmp_entry))
7348					RETURN(KERN_INVALID_ADDRESS);
7349				kfree(ptr, sizeof(submap_map_t));
7350				if(parent_maps == NULL)
7351					map_share = FALSE;
7352				src_entry = tmp_entry->vme_prev;
7353			} else
7354				break;
7355		}
7356		if ((src_start >= src_end) && (src_end != 0))
7357			break;
7358
7359		/*
7360		 *	Verify that there are no gaps in the region
7361		 */
7362
7363		tmp_entry = src_entry->vme_next;
7364		if ((tmp_entry->vme_start != src_start) ||
7365		    (tmp_entry == vm_map_to_entry(src_map)))
7366			RETURN(KERN_INVALID_ADDRESS);
7367	}
7368
7369	/*
7370	 * If the source should be destroyed, do it now, since the
7371	 * copy was successful.
7372	 */
7373	if (src_destroy) {
7374		(void) vm_map_delete(src_map,
7375				     vm_map_trunc_page(src_addr),
7376				     src_end,
7377				     (src_map == kernel_map) ?
7378				     VM_MAP_REMOVE_KUNWIRE :
7379				     VM_MAP_NO_FLAGS,
7380				     VM_MAP_NULL);
7381	} else {
7382		/* fix up the damage we did in the base map */
7383		vm_map_simplify_range(src_map,
7384				      vm_map_trunc_page(src_addr),
7385				      vm_map_round_page(src_end));
7386	}
7387
7388	vm_map_unlock(src_map);
7389
7390	/* Fix-up start and end points in copy.  This is necessary */
7391	/* when the various entries in the copy object were picked */
7392	/* up from different sub-maps */
7393
7394	tmp_entry = vm_map_copy_first_entry(copy);
7395	while (tmp_entry != vm_map_copy_to_entry(copy)) {
7396		tmp_entry->vme_end = copy_addr +
7397			(tmp_entry->vme_end - tmp_entry->vme_start);
7398		tmp_entry->vme_start = copy_addr;
7399		copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
7400		tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
7401	}
7402
7403	*copy_result = copy;
7404	return(KERN_SUCCESS);
7405
7406#undef	RETURN
7407}
7408
7409/*
7410 *	vm_map_copyin_object:
7411 *
7412 *	Create a copy object from an object.
7413 *	Our caller donates an object reference.
7414 */
7415
7416kern_return_t
7417vm_map_copyin_object(
7418	vm_object_t		object,
7419	vm_object_offset_t	offset,	/* offset of region in object */
7420	vm_object_size_t	size,	/* size of region in object */
7421	vm_map_copy_t	*copy_result)	/* OUT */
7422{
7423	vm_map_copy_t	copy;		/* Resulting copy */
7424
7425	/*
7426	 *	We drop the object into a special copy object
7427	 *	that contains the object directly.
7428	 */
7429
7430	copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7431	copy->type = VM_MAP_COPY_OBJECT;
7432	copy->cpy_object = object;
7433	copy->offset = offset;
7434	copy->size = size;
7435
7436	*copy_result = copy;
7437	return(KERN_SUCCESS);
7438}
7439
7440static void
7441vm_map_fork_share(
7442	vm_map_t	old_map,
7443	vm_map_entry_t	old_entry,
7444	vm_map_t	new_map)
7445{
7446	vm_object_t 	object;
7447	vm_map_entry_t 	new_entry;
7448
7449	/*
7450	 *	New sharing code.  New map entry
7451	 *	references original object.  Internal
7452	 *	objects use asynchronous copy algorithm for
7453	 *	future copies.  First make sure we have
7454	 *	the right object.  If we need a shadow,
7455	 *	or someone else already has one, then
7456	 *	make a new shadow and share it.
7457	 */
7458
7459	object = old_entry->object.vm_object;
7460	if (old_entry->is_sub_map) {
7461		assert(old_entry->wired_count == 0);
7462#ifndef NO_NESTED_PMAP
7463		if(old_entry->use_pmap) {
7464			kern_return_t	result;
7465
7466			result = pmap_nest(new_map->pmap,
7467					   (old_entry->object.sub_map)->pmap,
7468					   (addr64_t)old_entry->vme_start,
7469					   (addr64_t)old_entry->vme_start,
7470					   (uint64_t)(old_entry->vme_end - old_entry->vme_start));
7471			if(result)
7472				panic("vm_map_fork_share: pmap_nest failed!");
7473		}
7474#endif	/* NO_NESTED_PMAP */
7475	} else if (object == VM_OBJECT_NULL) {
7476		object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
7477							    old_entry->vme_start));
7478		old_entry->offset = 0;
7479		old_entry->object.vm_object = object;
7480		assert(!old_entry->needs_copy);
7481	} else if (object->copy_strategy !=
7482		   MEMORY_OBJECT_COPY_SYMMETRIC) {
7483
7484		/*
7485		 *	We are already using an asymmetric
7486		 *	copy, and therefore we already have
7487		 *	the right object.
7488		 */
7489
7490		assert(! old_entry->needs_copy);
7491	}
7492	else if (old_entry->needs_copy ||	/* case 1 */
7493		 object->shadowed ||		/* case 2 */
7494		 (!object->true_share && 	/* case 3 */
7495		  !old_entry->is_shared &&
7496		  (object->size >
7497		   (vm_map_size_t)(old_entry->vme_end -
7498				   old_entry->vme_start)))) {
7499
7500		/*
7501		 *	We need to create a shadow.
7502		 *	There are three cases here.
7503		 *	In the first case, we need to
7504		 *	complete a deferred symmetrical
7505		 *	copy that we participated in.
7506		 *	In the second and third cases,
7507		 *	we need to create the shadow so
7508		 *	that changes that we make to the
7509		 *	object do not interfere with
7510		 *	any symmetrical copies which
7511		 *	have occured (case 2) or which
7512		 *	might occur (case 3).
7513		 *
7514		 *	The first case is when we had
7515		 *	deferred shadow object creation
7516		 *	via the entry->needs_copy mechanism.
7517		 *	This mechanism only works when
7518		 *	only one entry points to the source
7519		 *	object, and we are about to create
7520		 *	a second entry pointing to the
7521		 *	same object. The problem is that
7522		 *	there is no way of mapping from
7523		 *	an object to the entries pointing
7524		 *	to it. (Deferred shadow creation
7525		 *	works with one entry because occurs
7526		 *	at fault time, and we walk from the
7527		 *	entry to the object when handling
7528		 *	the fault.)
7529		 *
7530		 *	The second case is when the object
7531		 *	to be shared has already been copied
7532		 *	with a symmetric copy, but we point
7533		 *	directly to the object without
7534		 *	needs_copy set in our entry. (This
7535		 *	can happen because different ranges
7536		 *	of an object can be pointed to by
7537		 *	different entries. In particular,
7538		 *	a single entry pointing to an object
7539		 *	can be split by a call to vm_inherit,
7540		 *	which, combined with task_create, can
7541		 *	result in the different entries
7542		 *	having different needs_copy values.)
7543		 *	The shadowed flag in the object allows
7544		 *	us to detect this case. The problem
7545		 *	with this case is that if this object
7546		 *	has or will have shadows, then we
7547		 *	must not perform an asymmetric copy
7548		 *	of this object, since such a copy
7549		 *	allows the object to be changed, which
7550		 *	will break the previous symmetrical
7551		 *	copies (which rely upon the object
7552		 *	not changing). In a sense, the shadowed
7553		 *	flag says "don't change this object".
7554		 *	We fix this by creating a shadow
7555		 *	object for this object, and sharing
7556		 *	that. This works because we are free
7557		 *	to change the shadow object (and thus
7558		 *	to use an asymmetric copy strategy);
7559		 *	this is also semantically correct,
7560		 *	since this object is temporary, and
7561		 *	therefore a copy of the object is
7562		 *	as good as the object itself. (This
7563		 *	is not true for permanent objects,
7564		 *	since the pager needs to see changes,
7565		 *	which won't happen if the changes
7566		 *	are made to a copy.)
7567		 *
7568		 *	The third case is when the object
7569		 *	to be shared has parts sticking
7570		 *	outside of the entry we're working
7571		 *	with, and thus may in the future
7572		 *	be subject to a symmetrical copy.
7573		 *	(This is a preemptive version of
7574		 *	case 2.)
7575		 */
7576
7577		vm_object_shadow(&old_entry->object.vm_object,
7578				 &old_entry->offset,
7579				 (vm_map_size_t) (old_entry->vme_end -
7580						  old_entry->vme_start));
7581
7582		/*
7583		 *	If we're making a shadow for other than
7584		 *	copy on write reasons, then we have
7585		 *	to remove write permission.
7586		 */
7587
7588		if (!old_entry->needs_copy &&
7589		    (old_entry->protection & VM_PROT_WRITE)) {
7590		        vm_prot_t prot;
7591
7592			prot = old_entry->protection & ~VM_PROT_WRITE;
7593
7594			if (override_nx(old_map, old_entry->alias) && prot)
7595			        prot |= VM_PROT_EXECUTE;
7596
7597			if (old_map->mapped) {
7598				vm_object_pmap_protect(
7599					old_entry->object.vm_object,
7600					old_entry->offset,
7601					(old_entry->vme_end -
7602					 old_entry->vme_start),
7603					PMAP_NULL,
7604					old_entry->vme_start,
7605					prot);
7606			} else {
7607				pmap_protect(old_map->pmap,
7608					     old_entry->vme_start,
7609					     old_entry->vme_end,
7610					     prot);
7611			}
7612		}
7613
7614		old_entry->needs_copy = FALSE;
7615		object = old_entry->object.vm_object;
7616	}
7617
7618	/*
7619	 *	If object was using a symmetric copy strategy,
7620	 *	change its copy strategy to the default
7621	 *	asymmetric copy strategy, which is copy_delay
7622	 *	in the non-norma case and copy_call in the
7623	 *	norma case. Bump the reference count for the
7624	 *	new entry.
7625	 */
7626
7627	if(old_entry->is_sub_map) {
7628		vm_map_lock(old_entry->object.sub_map);
7629		vm_map_reference(old_entry->object.sub_map);
7630		vm_map_unlock(old_entry->object.sub_map);
7631	} else {
7632		vm_object_lock(object);
7633		vm_object_reference_locked(object);
7634		if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
7635			object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
7636		}
7637		vm_object_unlock(object);
7638	}
7639
7640	/*
7641	 *	Clone the entry, using object ref from above.
7642	 *	Mark both entries as shared.
7643	 */
7644
7645	new_entry = vm_map_entry_create(new_map);
7646	vm_map_entry_copy(new_entry, old_entry);
7647	old_entry->is_shared = TRUE;
7648	new_entry->is_shared = TRUE;
7649
7650	/*
7651	 *	Insert the entry into the new map -- we
7652	 *	know we're inserting at the end of the new
7653	 *	map.
7654	 */
7655
7656	vm_map_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
7657
7658	/*
7659	 *	Update the physical map
7660	 */
7661
7662	if (old_entry->is_sub_map) {
7663		/* Bill Angell pmap support goes here */
7664	} else {
7665		pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
7666			  old_entry->vme_end - old_entry->vme_start,
7667			  old_entry->vme_start);
7668	}
7669}
7670
7671static boolean_t
7672vm_map_fork_copy(
7673	vm_map_t	old_map,
7674	vm_map_entry_t	*old_entry_p,
7675	vm_map_t	new_map)
7676{
7677	vm_map_entry_t old_entry = *old_entry_p;
7678	vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
7679	vm_map_offset_t start = old_entry->vme_start;
7680	vm_map_copy_t copy;
7681	vm_map_entry_t last = vm_map_last_entry(new_map);
7682
7683	vm_map_unlock(old_map);
7684	/*
7685	 *	Use maxprot version of copyin because we
7686	 *	care about whether this memory can ever
7687	 *	be accessed, not just whether it's accessible
7688	 *	right now.
7689	 */
7690	if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
7691	    != KERN_SUCCESS) {
7692		/*
7693		 *	The map might have changed while it
7694		 *	was unlocked, check it again.  Skip
7695		 *	any blank space or permanently
7696		 *	unreadable region.
7697		 */
7698		vm_map_lock(old_map);
7699		if (!vm_map_lookup_entry(old_map, start, &last) ||
7700		    (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
7701			last = last->vme_next;
7702		}
7703		*old_entry_p = last;
7704
7705		/*
7706		 * XXX	For some error returns, want to
7707		 * XXX	skip to the next element.  Note
7708		 *	that INVALID_ADDRESS and
7709		 *	PROTECTION_FAILURE are handled above.
7710		 */
7711
7712		return FALSE;
7713	}
7714
7715	/*
7716	 *	Insert the copy into the new map
7717	 */
7718
7719	vm_map_copy_insert(new_map, last, copy);
7720
7721	/*
7722	 *	Pick up the traversal at the end of
7723	 *	the copied region.
7724	 */
7725
7726	vm_map_lock(old_map);
7727	start += entry_size;
7728	if (! vm_map_lookup_entry(old_map, start, &last)) {
7729		last = last->vme_next;
7730	} else {
7731		if (last->vme_start == start) {
7732			/*
7733			 * No need to clip here and we don't
7734			 * want to cause any unnecessary
7735			 * unnesting...
7736			 */
7737		} else {
7738			vm_map_clip_start(old_map, last, start);
7739		}
7740	}
7741	*old_entry_p = last;
7742
7743	return TRUE;
7744}
7745
7746/*
7747 *	vm_map_fork:
7748 *
7749 *	Create and return a new map based on the old
7750 *	map, according to the inheritance values on the
7751 *	regions in that map.
7752 *
7753 *	The source map must not be locked.
7754 */
7755vm_map_t
7756vm_map_fork(
7757	vm_map_t	old_map)
7758{
7759	pmap_t		new_pmap;
7760	vm_map_t	new_map;
7761	vm_map_entry_t	old_entry;
7762	vm_map_size_t	new_size = 0, entry_size;
7763	vm_map_entry_t	new_entry;
7764	boolean_t	src_needs_copy;
7765	boolean_t	new_entry_needs_copy;
7766
7767#ifdef __i386__
7768	new_pmap = pmap_create((vm_map_size_t) 0,
7769			       old_map->pmap->pm_task_map != TASK_MAP_32BIT);
7770	if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED)
7771		pmap_set_4GB_pagezero(new_pmap);
7772#else
7773	new_pmap = pmap_create((vm_map_size_t) 0, 0);
7774#endif
7775
7776	vm_map_reference_swap(old_map);
7777	vm_map_lock(old_map);
7778
7779	new_map = vm_map_create(new_pmap,
7780				old_map->min_offset,
7781				old_map->max_offset,
7782				old_map->hdr.entries_pageable);
7783
7784	for (
7785		old_entry = vm_map_first_entry(old_map);
7786		old_entry != vm_map_to_entry(old_map);
7787		) {
7788
7789		entry_size = old_entry->vme_end - old_entry->vme_start;
7790
7791		switch (old_entry->inheritance) {
7792		case VM_INHERIT_NONE:
7793			break;
7794
7795		case VM_INHERIT_SHARE:
7796			vm_map_fork_share(old_map, old_entry, new_map);
7797			new_size += entry_size;
7798			break;
7799
7800		case VM_INHERIT_COPY:
7801
7802			/*
7803			 *	Inline the copy_quickly case;
7804			 *	upon failure, fall back on call
7805			 *	to vm_map_fork_copy.
7806			 */
7807
7808			if(old_entry->is_sub_map)
7809				break;
7810			if ((old_entry->wired_count != 0) ||
7811			    ((old_entry->object.vm_object != NULL) &&
7812			     (old_entry->object.vm_object->true_share))) {
7813				goto slow_vm_map_fork_copy;
7814			}
7815
7816			new_entry = vm_map_entry_create(new_map);
7817			vm_map_entry_copy(new_entry, old_entry);
7818			/* clear address space specifics */
7819			new_entry->use_pmap = FALSE;
7820
7821			if (! vm_object_copy_quickly(
7822				    &new_entry->object.vm_object,
7823				    old_entry->offset,
7824				    (old_entry->vme_end -
7825				     old_entry->vme_start),
7826				    &src_needs_copy,
7827				    &new_entry_needs_copy)) {
7828				vm_map_entry_dispose(new_map, new_entry);
7829				goto slow_vm_map_fork_copy;
7830			}
7831
7832			/*
7833			 *	Handle copy-on-write obligations
7834			 */
7835
7836			if (src_needs_copy && !old_entry->needs_copy) {
7837			        vm_prot_t prot;
7838
7839				prot = old_entry->protection & ~VM_PROT_WRITE;
7840
7841				if (override_nx(old_map, old_entry->alias) && prot)
7842				        prot |= VM_PROT_EXECUTE;
7843
7844				vm_object_pmap_protect(
7845					old_entry->object.vm_object,
7846					old_entry->offset,
7847					(old_entry->vme_end -
7848					 old_entry->vme_start),
7849					((old_entry->is_shared
7850					  || old_map->mapped)
7851					 ? PMAP_NULL :
7852					 old_map->pmap),
7853					old_entry->vme_start,
7854					prot);
7855
7856				old_entry->needs_copy = TRUE;
7857			}
7858			new_entry->needs_copy = new_entry_needs_copy;
7859
7860			/*
7861			 *	Insert the entry at the end
7862			 *	of the map.
7863			 */
7864
7865			vm_map_entry_link(new_map, vm_map_last_entry(new_map),
7866					  new_entry);
7867			new_size += entry_size;
7868			break;
7869
7870		slow_vm_map_fork_copy:
7871			if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
7872				new_size += entry_size;
7873			}
7874			continue;
7875		}
7876		old_entry = old_entry->vme_next;
7877	}
7878
7879	new_map->size = new_size;
7880	vm_map_unlock(old_map);
7881	vm_map_deallocate(old_map);
7882
7883	return(new_map);
7884}
7885
7886/*
7887 * vm_map_exec:
7888 *
7889 * 	Setup the "new_map" with the proper execution environment according
7890 *	to the type of executable (platform, 64bit, chroot environment).
7891 *	Map the comm page and shared region, etc...
7892 */
7893kern_return_t
7894vm_map_exec(
7895	vm_map_t	new_map,
7896	task_t		task,
7897	void		*fsroot,
7898	cpu_type_t	cpu)
7899{
7900	SHARED_REGION_TRACE_DEBUG(
7901		("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
7902		 current_task(), new_map, task, fsroot, cpu));
7903	(void) vm_commpage_enter(new_map, task);
7904	(void) vm_shared_region_enter(new_map, task, fsroot, cpu);
7905	SHARED_REGION_TRACE_DEBUG(
7906		("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
7907		 current_task(), new_map, task, fsroot, cpu));
7908	return KERN_SUCCESS;
7909}
7910
7911/*
7912 *	vm_map_lookup_locked:
7913 *
7914 *	Finds the VM object, offset, and
7915 *	protection for a given virtual address in the
7916 *	specified map, assuming a page fault of the
7917 *	type specified.
7918 *
7919 *	Returns the (object, offset, protection) for
7920 *	this address, whether it is wired down, and whether
7921 *	this map has the only reference to the data in question.
7922 *	In order to later verify this lookup, a "version"
7923 *	is returned.
7924 *
7925 *	The map MUST be locked by the caller and WILL be
7926 *	locked on exit.  In order to guarantee the
7927 *	existence of the returned object, it is returned
7928 *	locked.
7929 *
7930 *	If a lookup is requested with "write protection"
7931 *	specified, the map may be changed to perform virtual
7932 *	copying operations, although the data referenced will
7933 *	remain the same.
7934 */
7935kern_return_t
7936vm_map_lookup_locked(
7937	vm_map_t		*var_map,	/* IN/OUT */
7938	vm_map_offset_t		vaddr,
7939	vm_prot_t		fault_type,
7940	int			object_lock_type,
7941	vm_map_version_t	*out_version,	/* OUT */
7942	vm_object_t		*object,	/* OUT */
7943	vm_object_offset_t	*offset,	/* OUT */
7944	vm_prot_t		*out_prot,	/* OUT */
7945	boolean_t		*wired,		/* OUT */
7946	vm_object_fault_info_t	fault_info,	/* OUT */
7947	vm_map_t		*real_map)
7948{
7949	vm_map_entry_t			entry;
7950	register vm_map_t		map = *var_map;
7951	vm_map_t			old_map = *var_map;
7952	vm_map_t			cow_sub_map_parent = VM_MAP_NULL;
7953	vm_map_offset_t			cow_parent_vaddr = 0;
7954	vm_map_offset_t			old_start = 0;
7955	vm_map_offset_t			old_end = 0;
7956	register vm_prot_t		prot;
7957
7958	*real_map = map;
7959RetryLookup: ;
7960
7961	/*
7962	 *	If the map has an interesting hint, try it before calling
7963	 *	full blown lookup routine.
7964	 */
7965	entry = map->hint;
7966
7967	if ((entry == vm_map_to_entry(map)) ||
7968	    (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
7969		vm_map_entry_t	tmp_entry;
7970
7971		/*
7972		 *	Entry was either not a valid hint, or the vaddr
7973		 *	was not contained in the entry, so do a full lookup.
7974		 */
7975		if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
7976			if((cow_sub_map_parent) && (cow_sub_map_parent != map))
7977				vm_map_unlock(cow_sub_map_parent);
7978			if((*real_map != map)
7979			   && (*real_map != cow_sub_map_parent))
7980				vm_map_unlock(*real_map);
7981			return KERN_INVALID_ADDRESS;
7982		}
7983
7984		entry = tmp_entry;
7985	}
7986	if(map == old_map) {
7987		old_start = entry->vme_start;
7988		old_end = entry->vme_end;
7989	}
7990
7991	/*
7992	 *	Handle submaps.  Drop lock on upper map, submap is
7993	 *	returned locked.
7994	 */
7995
7996submap_recurse:
7997	if (entry->is_sub_map) {
7998		vm_map_offset_t		local_vaddr;
7999		vm_map_offset_t		end_delta;
8000		vm_map_offset_t		start_delta;
8001		vm_map_entry_t		submap_entry;
8002		boolean_t		mapped_needs_copy=FALSE;
8003
8004		local_vaddr = vaddr;
8005
8006		if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
8007			/* if real_map equals map we unlock below */
8008			if ((*real_map != map) &&
8009			    (*real_map != cow_sub_map_parent))
8010				vm_map_unlock(*real_map);
8011			*real_map = entry->object.sub_map;
8012		}
8013
8014		if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
8015			if (!mapped_needs_copy) {
8016				if (vm_map_lock_read_to_write(map)) {
8017					vm_map_lock_read(map);
8018					/* XXX FBDP: entry still valid ? */
8019					if(*real_map == entry->object.sub_map)
8020						*real_map = map;
8021					goto RetryLookup;
8022				}
8023				vm_map_lock_read(entry->object.sub_map);
8024				cow_sub_map_parent = map;
8025				/* reset base to map before cow object */
8026				/* this is the map which will accept   */
8027				/* the new cow object */
8028				old_start = entry->vme_start;
8029				old_end = entry->vme_end;
8030				cow_parent_vaddr = vaddr;
8031				mapped_needs_copy = TRUE;
8032			} else {
8033				vm_map_lock_read(entry->object.sub_map);
8034				if((cow_sub_map_parent != map) &&
8035				   (*real_map != map))
8036					vm_map_unlock(map);
8037			}
8038		} else {
8039			vm_map_lock_read(entry->object.sub_map);
8040			/* leave map locked if it is a target */
8041			/* cow sub_map above otherwise, just  */
8042			/* follow the maps down to the object */
8043			/* here we unlock knowing we are not  */
8044			/* revisiting the map.  */
8045			if((*real_map != map) && (map != cow_sub_map_parent))
8046				vm_map_unlock_read(map);
8047		}
8048
8049		/* XXX FBDP: map has been unlocked, what protects "entry" !? */
8050		*var_map = map = entry->object.sub_map;
8051
8052		/* calculate the offset in the submap for vaddr */
8053		local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
8054
8055	RetrySubMap:
8056		if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
8057			if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
8058				vm_map_unlock(cow_sub_map_parent);
8059			}
8060			if((*real_map != map)
8061			   && (*real_map != cow_sub_map_parent)) {
8062				vm_map_unlock(*real_map);
8063			}
8064			*real_map = map;
8065			return KERN_INVALID_ADDRESS;
8066		}
8067
8068		/* find the attenuated shadow of the underlying object */
8069		/* on our target map */
8070
8071		/* in english the submap object may extend beyond the     */
8072		/* region mapped by the entry or, may only fill a portion */
8073		/* of it.  For our purposes, we only care if the object   */
8074		/* doesn't fill.  In this case the area which will        */
8075		/* ultimately be clipped in the top map will only need    */
8076		/* to be as big as the portion of the underlying entry    */
8077		/* which is mapped */
8078		start_delta = submap_entry->vme_start > entry->offset ?
8079			submap_entry->vme_start - entry->offset : 0;
8080
8081		end_delta =
8082			(entry->offset + start_delta + (old_end - old_start)) <=
8083			submap_entry->vme_end ?
8084			0 : (entry->offset +
8085			     (old_end - old_start))
8086			- submap_entry->vme_end;
8087
8088		old_start += start_delta;
8089		old_end -= end_delta;
8090
8091		if(submap_entry->is_sub_map) {
8092			entry = submap_entry;
8093			vaddr = local_vaddr;
8094			goto submap_recurse;
8095		}
8096
8097		if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
8098
8099			vm_object_t	sub_object, copy_object;
8100			vm_object_offset_t copy_offset;
8101			vm_map_offset_t	local_start;
8102			vm_map_offset_t	local_end;
8103			boolean_t		copied_slowly = FALSE;
8104
8105			if (vm_map_lock_read_to_write(map)) {
8106				vm_map_lock_read(map);
8107				old_start -= start_delta;
8108				old_end += end_delta;
8109				goto RetrySubMap;
8110			}
8111
8112
8113			sub_object = submap_entry->object.vm_object;
8114			if (sub_object == VM_OBJECT_NULL) {
8115				sub_object =
8116					vm_object_allocate(
8117						(vm_map_size_t)
8118						(submap_entry->vme_end -
8119						 submap_entry->vme_start));
8120				submap_entry->object.vm_object = sub_object;
8121				submap_entry->offset = 0;
8122			}
8123			local_start =  local_vaddr -
8124				(cow_parent_vaddr - old_start);
8125			local_end = local_vaddr +
8126				(old_end - cow_parent_vaddr);
8127			vm_map_clip_start(map, submap_entry, local_start);
8128			vm_map_clip_end(map, submap_entry, local_end);
8129			/* unnesting was done in vm_map_clip_start/end() */
8130			assert(!submap_entry->use_pmap);
8131
8132			/* This is the COW case, lets connect */
8133			/* an entry in our space to the underlying */
8134			/* object in the submap, bypassing the  */
8135			/* submap. */
8136
8137
8138			if(submap_entry->wired_count != 0 ||
8139			   (sub_object->copy_strategy ==
8140			    MEMORY_OBJECT_COPY_NONE)) {
8141				vm_object_lock(sub_object);
8142				vm_object_copy_slowly(sub_object,
8143						      submap_entry->offset,
8144						      (submap_entry->vme_end -
8145						       submap_entry->vme_start),
8146						      FALSE,
8147						      &copy_object);
8148				copied_slowly = TRUE;
8149			} else {
8150
8151				/* set up shadow object */
8152				copy_object = sub_object;
8153				vm_object_reference(copy_object);
8154				sub_object->shadowed = TRUE;
8155				submap_entry->needs_copy = TRUE;
8156
8157				prot = submap_entry->protection & ~VM_PROT_WRITE;
8158
8159				if (override_nx(map, submap_entry->alias) && prot)
8160				        prot |= VM_PROT_EXECUTE;
8161
8162				vm_object_pmap_protect(
8163					sub_object,
8164					submap_entry->offset,
8165					submap_entry->vme_end -
8166					submap_entry->vme_start,
8167					(submap_entry->is_shared
8168					 || map->mapped) ?
8169					PMAP_NULL : map->pmap,
8170					submap_entry->vme_start,
8171					prot);
8172			}
8173
8174			/*
8175			 * Adjust the fault offset to the submap entry.
8176			 */
8177			copy_offset = (local_vaddr -
8178				       submap_entry->vme_start +
8179				       submap_entry->offset);
8180
8181			/* This works diffently than the   */
8182			/* normal submap case. We go back  */
8183			/* to the parent of the cow map and*/
8184			/* clip out the target portion of  */
8185			/* the sub_map, substituting the   */
8186			/* new copy object,                */
8187
8188			vm_map_unlock(map);
8189			local_start = old_start;
8190			local_end = old_end;
8191			map = cow_sub_map_parent;
8192			*var_map = cow_sub_map_parent;
8193			vaddr = cow_parent_vaddr;
8194			cow_sub_map_parent = NULL;
8195
8196			if(!vm_map_lookup_entry(map,
8197						vaddr, &entry)) {
8198				vm_object_deallocate(
8199					copy_object);
8200				vm_map_lock_write_to_read(map);
8201				return KERN_INVALID_ADDRESS;
8202			}
8203
8204			/* clip out the portion of space */
8205			/* mapped by the sub map which   */
8206			/* corresponds to the underlying */
8207			/* object */
8208
8209			/*
8210			 * Clip (and unnest) the smallest nested chunk
8211			 * possible around the faulting address...
8212			 */
8213			local_start = vaddr & ~(pmap_nesting_size_min - 1);
8214			local_end = local_start + pmap_nesting_size_min;
8215			/*
8216			 * ... but don't go beyond the "old_start" to "old_end"
8217			 * range, to avoid spanning over another VM region
8218			 * with a possibly different VM object and/or offset.
8219			 */
8220			if (local_start < old_start) {
8221				local_start = old_start;
8222			}
8223			if (local_end > old_end) {
8224				local_end = old_end;
8225			}
8226			/*
8227			 * Adjust copy_offset to the start of the range.
8228			 */
8229			copy_offset -= (vaddr - local_start);
8230
8231			vm_map_clip_start(map, entry, local_start);
8232			vm_map_clip_end(map, entry, local_end);
8233			/* unnesting was done in vm_map_clip_start/end() */
8234			assert(!entry->use_pmap);
8235
8236			/* substitute copy object for */
8237			/* shared map entry           */
8238			vm_map_deallocate(entry->object.sub_map);
8239			entry->is_sub_map = FALSE;
8240			entry->object.vm_object = copy_object;
8241
8242			/* propagate the submap entry's protections */
8243			entry->protection |= submap_entry->protection;
8244			entry->max_protection |= submap_entry->max_protection;
8245
8246			if(copied_slowly) {
8247				entry->offset = local_start - old_start;
8248				entry->needs_copy = FALSE;
8249				entry->is_shared = FALSE;
8250			} else {
8251				entry->offset = copy_offset;
8252				entry->needs_copy = TRUE;
8253				if(entry->inheritance == VM_INHERIT_SHARE)
8254					entry->inheritance = VM_INHERIT_COPY;
8255				if (map != old_map)
8256					entry->is_shared = TRUE;
8257			}
8258			if(entry->inheritance == VM_INHERIT_SHARE)
8259				entry->inheritance = VM_INHERIT_COPY;
8260
8261			vm_map_lock_write_to_read(map);
8262		} else {
8263			if((cow_sub_map_parent)
8264			   && (cow_sub_map_parent != *real_map)
8265			   && (cow_sub_map_parent != map)) {
8266				vm_map_unlock(cow_sub_map_parent);
8267			}
8268			entry = submap_entry;
8269			vaddr = local_vaddr;
8270		}
8271	}
8272
8273	/*
8274	 *	Check whether this task is allowed to have
8275	 *	this page.
8276	 */
8277
8278	prot = entry->protection;
8279
8280	if (override_nx(map, entry->alias) && prot) {
8281	        /*
8282		 * HACK -- if not a stack, then allow execution
8283		 */
8284	        prot |= VM_PROT_EXECUTE;
8285	}
8286
8287	if ((fault_type & (prot)) != fault_type) {
8288		if (*real_map != map) {
8289			vm_map_unlock(*real_map);
8290		}
8291		*real_map = map;
8292
8293		if ((fault_type & VM_PROT_EXECUTE) && prot)
8294		        log_stack_execution_failure((addr64_t)vaddr, prot);
8295
8296		DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
8297		return KERN_PROTECTION_FAILURE;
8298	}
8299
8300	/*
8301	 *	If this page is not pageable, we have to get
8302	 *	it for all possible accesses.
8303	 */
8304
8305	*wired = (entry->wired_count != 0);
8306	if (*wired)
8307	        fault_type = prot;
8308
8309	/*
8310	 *	If the entry was copy-on-write, we either ...
8311	 */
8312
8313	if (entry->needs_copy) {
8314	    	/*
8315		 *	If we want to write the page, we may as well
8316		 *	handle that now since we've got the map locked.
8317		 *
8318		 *	If we don't need to write the page, we just
8319		 *	demote the permissions allowed.
8320		 */
8321
8322		if ((fault_type & VM_PROT_WRITE) || *wired) {
8323			/*
8324			 *	Make a new object, and place it in the
8325			 *	object chain.  Note that no new references
8326			 *	have appeared -- one just moved from the
8327			 *	map to the new object.
8328			 */
8329
8330			if (vm_map_lock_read_to_write(map)) {
8331				vm_map_lock_read(map);
8332				goto RetryLookup;
8333			}
8334			vm_object_shadow(&entry->object.vm_object,
8335					 &entry->offset,
8336					 (vm_map_size_t) (entry->vme_end -
8337							  entry->vme_start));
8338
8339			entry->object.vm_object->shadowed = TRUE;
8340			entry->needs_copy = FALSE;
8341			vm_map_lock_write_to_read(map);
8342		}
8343		else {
8344			/*
8345			 *	We're attempting to read a copy-on-write
8346			 *	page -- don't allow writes.
8347			 */
8348
8349			prot &= (~VM_PROT_WRITE);
8350		}
8351	}
8352
8353	/*
8354	 *	Create an object if necessary.
8355	 */
8356	if (entry->object.vm_object == VM_OBJECT_NULL) {
8357
8358		if (vm_map_lock_read_to_write(map)) {
8359			vm_map_lock_read(map);
8360			goto RetryLookup;
8361		}
8362
8363		entry->object.vm_object = vm_object_allocate(
8364			(vm_map_size_t)(entry->vme_end - entry->vme_start));
8365		entry->offset = 0;
8366		vm_map_lock_write_to_read(map);
8367	}
8368
8369	/*
8370	 *	Return the object/offset from this entry.  If the entry
8371	 *	was copy-on-write or empty, it has been fixed up.  Also
8372	 *	return the protection.
8373	 */
8374
8375        *offset = (vaddr - entry->vme_start) + entry->offset;
8376        *object = entry->object.vm_object;
8377	*out_prot = prot;
8378
8379	if (fault_info) {
8380		fault_info->interruptible = THREAD_UNINT; /* for now... */
8381		/* ... the caller will change "interruptible" if needed */
8382	        fault_info->cluster_size = 0;
8383		fault_info->user_tag = entry->alias;
8384	        fault_info->behavior = entry->behavior;
8385		fault_info->lo_offset = entry->offset;
8386		fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
8387		fault_info->no_cache  = entry->no_cache;
8388	}
8389
8390	/*
8391	 *	Lock the object to prevent it from disappearing
8392	 */
8393	if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
8394	        vm_object_lock(*object);
8395	else
8396	        vm_object_lock_shared(*object);
8397
8398	/*
8399	 *	Save the version number
8400	 */
8401
8402	out_version->main_timestamp = map->timestamp;
8403
8404	return KERN_SUCCESS;
8405}
8406
8407
8408/*
8409 *	vm_map_verify:
8410 *
8411 *	Verifies that the map in question has not changed
8412 *	since the given version.  If successful, the map
8413 *	will not change until vm_map_verify_done() is called.
8414 */
8415boolean_t
8416vm_map_verify(
8417	register vm_map_t		map,
8418	register vm_map_version_t	*version)	/* REF */
8419{
8420	boolean_t	result;
8421
8422	vm_map_lock_read(map);
8423	result = (map->timestamp == version->main_timestamp);
8424
8425	if (!result)
8426		vm_map_unlock_read(map);
8427
8428	return(result);
8429}
8430
8431/*
8432 *	vm_map_verify_done:
8433 *
8434 *	Releases locks acquired by a vm_map_verify.
8435 *
8436 *	This is now a macro in vm/vm_map.h.  It does a
8437 *	vm_map_unlock_read on the map.
8438 */
8439
8440
8441/*
8442 *	TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
8443 *	Goes away after regular vm_region_recurse function migrates to
8444 *	64 bits
8445 *	vm_region_recurse: A form of vm_region which follows the
8446 *	submaps in a target map
8447 *
8448 */
8449
8450kern_return_t
8451vm_map_region_recurse_64(
8452	vm_map_t		 map,
8453	vm_map_offset_t	*address,		/* IN/OUT */
8454	vm_map_size_t		*size,			/* OUT */
8455	natural_t	 	*nesting_depth,	/* IN/OUT */
8456	vm_region_submap_info_64_t	submap_info,	/* IN/OUT */
8457	mach_msg_type_number_t	*count)	/* IN/OUT */
8458{
8459	vm_region_extended_info_data_t	extended;
8460	vm_map_entry_t			tmp_entry;
8461	vm_map_offset_t			user_address;
8462	unsigned int			user_max_depth;
8463
8464	/*
8465	 * "curr_entry" is the VM map entry preceding or including the
8466	 * address we're looking for.
8467	 * "curr_map" is the map or sub-map containing "curr_entry".
8468	 * "curr_offset" is the cumulated offset of "curr_map" in the
8469	 * target task's address space.
8470	 * "curr_depth" is the depth of "curr_map" in the chain of
8471	 * sub-maps.
8472	 * "curr_max_offset" is the maximum offset we should take into
8473	 * account in the current map.  It may be smaller than the current
8474	 * map's "max_offset" because we might not have mapped it all in
8475	 * the upper level map.
8476	 */
8477	vm_map_entry_t			curr_entry;
8478	vm_map_offset_t			curr_offset;
8479	vm_map_t			curr_map;
8480	unsigned int			curr_depth;
8481	vm_map_offset_t			curr_max_offset;
8482
8483	/*
8484	 * "next_" is the same as "curr_" but for the VM region immediately
8485	 * after the address we're looking for.  We need to keep track of this
8486	 * too because we want to return info about that region if the
8487	 * address we're looking for is not mapped.
8488	 */
8489	vm_map_entry_t			next_entry;
8490	vm_map_offset_t			next_offset;
8491	vm_map_t			next_map;
8492	unsigned int			next_depth;
8493	vm_map_offset_t			next_max_offset;
8494
8495	boolean_t			look_for_pages;
8496	vm_region_submap_short_info_64_t short_info;
8497
8498	if (map == VM_MAP_NULL) {
8499		/* no address space to work on */
8500		return KERN_INVALID_ARGUMENT;
8501	}
8502
8503	if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) {
8504		if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
8505			/*
8506			 * "info" structure is not big enough and
8507			 * would overflow
8508			 */
8509			return KERN_INVALID_ARGUMENT;
8510		} else {
8511			look_for_pages = FALSE;
8512			*count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
8513			short_info = (vm_region_submap_short_info_64_t) submap_info;
8514			submap_info = NULL;
8515		}
8516	} else {
8517		look_for_pages = TRUE;
8518		*count = VM_REGION_SUBMAP_INFO_COUNT_64;
8519		short_info = NULL;
8520	}
8521
8522
8523	user_address = *address;
8524	user_max_depth = *nesting_depth;
8525
8526	curr_entry = NULL;
8527	curr_map = map;
8528	curr_offset = 0;
8529	curr_depth = 0;
8530	curr_max_offset = curr_map->max_offset;
8531
8532	next_entry = NULL;
8533	next_map = NULL;
8534	next_offset = 0;
8535	next_depth = 0;
8536	next_max_offset = curr_max_offset;
8537
8538	if (not_in_kdp) {
8539		vm_map_lock_read(curr_map);
8540	}
8541
8542	for (;;) {
8543		if (vm_map_lookup_entry(curr_map,
8544					user_address - curr_offset,
8545					&tmp_entry)) {
8546			/* tmp_entry contains the address we're looking for */
8547			curr_entry = tmp_entry;
8548		} else {
8549			/*
8550			 * The address is not mapped.  "tmp_entry" is the
8551			 * map entry preceding the address.  We want the next
8552			 * one, if it exists.
8553			 */
8554			curr_entry = tmp_entry->vme_next;
8555			if (curr_entry == vm_map_to_entry(curr_map) ||
8556			    curr_entry->vme_start >= curr_max_offset) {
8557				/* no next entry at this level: stop looking */
8558				if (not_in_kdp) {
8559					vm_map_unlock_read(curr_map);
8560				}
8561				curr_entry = NULL;
8562				curr_map = NULL;
8563				curr_offset = 0;
8564				curr_depth = 0;
8565				curr_max_offset = 0;
8566				break;
8567			}
8568		}
8569
8570		/*
8571		 * Is the next entry at this level closer to the address (or
8572		 * deeper in the submap chain) than the one we had
8573		 * so far ?
8574		 */
8575		tmp_entry = curr_entry->vme_next;
8576		if (tmp_entry == vm_map_to_entry(curr_map)) {
8577			/* no next entry at this level */
8578		} else if (tmp_entry->vme_start >= curr_max_offset) {
8579			/*
8580			 * tmp_entry is beyond the scope of what we mapped of
8581			 * this submap in the upper level: ignore it.
8582			 */
8583		} else if ((next_entry == NULL) ||
8584			   (tmp_entry->vme_start + curr_offset <=
8585			    next_entry->vme_start + next_offset)) {
8586			/*
8587			 * We didn't have a "next_entry" or this one is
8588			 * closer to the address we're looking for:
8589			 * use this "tmp_entry" as the new "next_entry".
8590			 */
8591			if (next_entry != NULL) {
8592				/* unlock the last "next_map" */
8593				if (next_map != curr_map && not_in_kdp) {
8594					vm_map_unlock_read(next_map);
8595				}
8596			}
8597			next_entry = tmp_entry;
8598			next_map = curr_map;
8599			next_offset = curr_offset;
8600			next_depth = curr_depth;
8601			next_max_offset = curr_max_offset;
8602		}
8603
8604		if (!curr_entry->is_sub_map ||
8605		    curr_depth >= user_max_depth) {
8606			/*
8607			 * We hit a leaf map or we reached the maximum depth
8608			 * we could, so stop looking.  Keep the current map
8609			 * locked.
8610			 */
8611			break;
8612		}
8613
8614		/*
8615		 * Get down to the next submap level.
8616		 */
8617
8618		/*
8619		 * Lock the next level and unlock the current level,
8620		 * unless we need to keep it locked to access the "next_entry"
8621		 * later.
8622		 */
8623		if (not_in_kdp) {
8624			vm_map_lock_read(curr_entry->object.sub_map);
8625		}
8626		if (curr_map == next_map) {
8627			/* keep "next_map" locked in case we need it */
8628		} else {
8629			/* release this map */
8630			vm_map_unlock_read(curr_map);
8631		}
8632
8633		/*
8634		 * Adjust the offset.  "curr_entry" maps the submap
8635		 * at relative address "curr_entry->vme_start" in the
8636		 * curr_map but skips the first "curr_entry->offset"
8637		 * bytes of the submap.
8638		 * "curr_offset" always represents the offset of a virtual
8639		 * address in the curr_map relative to the absolute address
8640		 * space (i.e. the top-level VM map).
8641		 */
8642		curr_offset +=
8643			(curr_entry->vme_start - curr_entry->offset);
8644		/* switch to the submap */
8645		curr_map = curr_entry->object.sub_map;
8646		curr_depth++;
8647		/*
8648		 * "curr_max_offset" allows us to keep track of the
8649		 * portion of the submap that is actually mapped at this level:
8650		 * the rest of that submap is irrelevant to us, since it's not
8651		 * mapped here.
8652		 * The relevant portion of the map starts at
8653		 * "curr_entry->offset" up to the size of "curr_entry".
8654		 */
8655		curr_max_offset =
8656			curr_entry->vme_end - curr_entry->vme_start +
8657			curr_entry->offset;
8658		curr_entry = NULL;
8659	}
8660
8661	if (curr_entry == NULL) {
8662		/* no VM region contains the address... */
8663		if (next_entry == NULL) {
8664			/* ... and no VM region follows it either */
8665			return KERN_INVALID_ADDRESS;
8666		}
8667		/* ... gather info about the next VM region */
8668		curr_entry = next_entry;
8669		curr_map = next_map;	/* still locked ... */
8670		curr_offset = next_offset;
8671		curr_depth = next_depth;
8672		curr_max_offset = next_max_offset;
8673	} else {
8674		/* we won't need "next_entry" after all */
8675		if (next_entry != NULL) {
8676			/* release "next_map" */
8677			if (next_map != curr_map && not_in_kdp) {
8678				vm_map_unlock_read(next_map);
8679			}
8680		}
8681	}
8682	next_entry = NULL;
8683	next_map = NULL;
8684	next_offset = 0;
8685	next_depth = 0;
8686	next_max_offset = 0;
8687
8688	*nesting_depth = curr_depth;
8689	*size = curr_entry->vme_end - curr_entry->vme_start;
8690	*address = curr_entry->vme_start + curr_offset;
8691
8692	if (look_for_pages) {
8693		submap_info->user_tag = curr_entry->alias;
8694		submap_info->offset = curr_entry->offset;
8695		submap_info->protection = curr_entry->protection;
8696		submap_info->inheritance = curr_entry->inheritance;
8697		submap_info->max_protection = curr_entry->max_protection;
8698		submap_info->behavior = curr_entry->behavior;
8699		submap_info->user_wired_count = curr_entry->user_wired_count;
8700		submap_info->is_submap = curr_entry->is_sub_map;
8701		submap_info->object_id = (uint32_t) curr_entry->object.vm_object;
8702	} else {
8703		short_info->user_tag = curr_entry->alias;
8704		short_info->offset = curr_entry->offset;
8705		short_info->protection = curr_entry->protection;
8706		short_info->inheritance = curr_entry->inheritance;
8707		short_info->max_protection = curr_entry->max_protection;
8708		short_info->behavior = curr_entry->behavior;
8709		short_info->user_wired_count = curr_entry->user_wired_count;
8710		short_info->is_submap = curr_entry->is_sub_map;
8711		short_info->object_id = (uint32_t) curr_entry->object.vm_object;
8712	}
8713
8714	extended.pages_resident = 0;
8715	extended.pages_swapped_out = 0;
8716	extended.pages_shared_now_private = 0;
8717	extended.pages_dirtied = 0;
8718	extended.external_pager = 0;
8719	extended.shadow_depth = 0;
8720
8721	if (not_in_kdp) {
8722		if (!curr_entry->is_sub_map) {
8723			vm_map_region_walk(curr_map,
8724					   curr_entry->vme_start,
8725					   curr_entry,
8726					   curr_entry->offset,
8727					   (curr_entry->vme_end -
8728					    curr_entry->vme_start),
8729					   &extended,
8730					   look_for_pages);
8731			if (extended.external_pager &&
8732			    extended.ref_count == 2 &&
8733			    extended.share_mode == SM_SHARED) {
8734				extended.share_mode = SM_PRIVATE;
8735			}
8736		} else {
8737			if (curr_entry->use_pmap) {
8738				extended.share_mode = SM_TRUESHARED;
8739			} else {
8740				extended.share_mode = SM_PRIVATE;
8741			}
8742			extended.ref_count =
8743				curr_entry->object.sub_map->ref_count;
8744		}
8745	}
8746
8747	if (look_for_pages) {
8748		submap_info->pages_resident = extended.pages_resident;
8749		submap_info->pages_swapped_out = extended.pages_swapped_out;
8750		submap_info->pages_shared_now_private =
8751			extended.pages_shared_now_private;
8752		submap_info->pages_dirtied = extended.pages_dirtied;
8753		submap_info->external_pager = extended.external_pager;
8754		submap_info->shadow_depth = extended.shadow_depth;
8755		submap_info->share_mode = extended.share_mode;
8756		submap_info->ref_count = extended.ref_count;
8757	} else {
8758		short_info->external_pager = extended.external_pager;
8759		short_info->shadow_depth = extended.shadow_depth;
8760		short_info->share_mode = extended.share_mode;
8761		short_info->ref_count = extended.ref_count;
8762	}
8763
8764	if (not_in_kdp) {
8765		vm_map_unlock_read(curr_map);
8766	}
8767
8768	return KERN_SUCCESS;
8769}
8770
8771/*
8772 *	vm_region:
8773 *
8774 *	User call to obtain information about a region in
8775 *	a task's address map. Currently, only one flavor is
8776 *	supported.
8777 *
8778 *	XXX The reserved and behavior fields cannot be filled
8779 *	    in until the vm merge from the IK is completed, and
8780 *	    vm_reserve is implemented.
8781 */
8782
8783kern_return_t
8784vm_map_region(
8785	vm_map_t		 map,
8786	vm_map_offset_t	*address,		/* IN/OUT */
8787	vm_map_size_t		*size,			/* OUT */
8788	vm_region_flavor_t	 flavor,		/* IN */
8789	vm_region_info_t	 info,			/* OUT */
8790	mach_msg_type_number_t	*count,	/* IN/OUT */
8791	mach_port_t		*object_name)		/* OUT */
8792{
8793	vm_map_entry_t		tmp_entry;
8794	vm_map_entry_t		entry;
8795	vm_map_offset_t		start;
8796
8797	if (map == VM_MAP_NULL)
8798		return(KERN_INVALID_ARGUMENT);
8799
8800	switch (flavor) {
8801
8802	case VM_REGION_BASIC_INFO:
8803		/* legacy for old 32-bit objects info */
8804	{
8805		vm_region_basic_info_t	basic;
8806
8807		if (*count < VM_REGION_BASIC_INFO_COUNT)
8808			return(KERN_INVALID_ARGUMENT);
8809
8810		basic = (vm_region_basic_info_t) info;
8811		*count = VM_REGION_BASIC_INFO_COUNT;
8812
8813		vm_map_lock_read(map);
8814
8815		start = *address;
8816		if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
8817			if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
8818				vm_map_unlock_read(map);
8819				return(KERN_INVALID_ADDRESS);
8820			}
8821		} else {
8822			entry = tmp_entry;
8823		}
8824
8825		start = entry->vme_start;
8826
8827		basic->offset = (uint32_t)entry->offset;
8828		basic->protection = entry->protection;
8829		basic->inheritance = entry->inheritance;
8830		basic->max_protection = entry->max_protection;
8831		basic->behavior = entry->behavior;
8832		basic->user_wired_count = entry->user_wired_count;
8833		basic->reserved = entry->is_sub_map;
8834		*address = start;
8835		*size = (entry->vme_end - start);
8836
8837		if (object_name) *object_name = IP_NULL;
8838		if (entry->is_sub_map) {
8839			basic->shared = FALSE;
8840		} else {
8841			basic->shared = entry->is_shared;
8842		}
8843
8844		vm_map_unlock_read(map);
8845		return(KERN_SUCCESS);
8846	}
8847
8848	case VM_REGION_BASIC_INFO_64:
8849	{
8850		vm_region_basic_info_64_t	basic;
8851
8852		if (*count < VM_REGION_BASIC_INFO_COUNT_64)
8853			return(KERN_INVALID_ARGUMENT);
8854
8855		basic = (vm_region_basic_info_64_t) info;
8856		*count = VM_REGION_BASIC_INFO_COUNT_64;
8857
8858		vm_map_lock_read(map);
8859
8860		start = *address;
8861		if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
8862			if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
8863				vm_map_unlock_read(map);
8864				return(KERN_INVALID_ADDRESS);
8865			}
8866		} else {
8867			entry = tmp_entry;
8868		}
8869
8870		start = entry->vme_start;
8871
8872		basic->offset = entry->offset;
8873		basic->protection = entry->protection;
8874		basic->inheritance = entry->inheritance;
8875		basic->max_protection = entry->max_protection;
8876		basic->behavior = entry->behavior;
8877		basic->user_wired_count = entry->user_wired_count;
8878		basic->reserved = entry->is_sub_map;
8879		*address = start;
8880		*size = (entry->vme_end - start);
8881
8882		if (object_name) *object_name = IP_NULL;
8883		if (entry->is_sub_map) {
8884			basic->shared = FALSE;
8885		} else {
8886			basic->shared = entry->is_shared;
8887		}
8888
8889		vm_map_unlock_read(map);
8890		return(KERN_SUCCESS);
8891	}
8892	case VM_REGION_EXTENDED_INFO:
8893	{
8894		vm_region_extended_info_t	extended;
8895
8896		if (*count < VM_REGION_EXTENDED_INFO_COUNT)
8897			return(KERN_INVALID_ARGUMENT);
8898
8899		extended = (vm_region_extended_info_t) info;
8900		*count = VM_REGION_EXTENDED_INFO_COUNT;
8901
8902		vm_map_lock_read(map);
8903
8904		start = *address;
8905		if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
8906			if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
8907				vm_map_unlock_read(map);
8908				return(KERN_INVALID_ADDRESS);
8909			}
8910		} else {
8911			entry = tmp_entry;
8912		}
8913		start = entry->vme_start;
8914
8915		extended->protection = entry->protection;
8916		extended->user_tag = entry->alias;
8917		extended->pages_resident = 0;
8918		extended->pages_swapped_out = 0;
8919		extended->pages_shared_now_private = 0;
8920		extended->pages_dirtied = 0;
8921		extended->external_pager = 0;
8922		extended->shadow_depth = 0;
8923
8924		vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE);
8925
8926		if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
8927			extended->share_mode = SM_PRIVATE;
8928
8929		if (object_name)
8930			*object_name = IP_NULL;
8931		*address = start;
8932		*size = (entry->vme_end - start);
8933
8934		vm_map_unlock_read(map);
8935		return(KERN_SUCCESS);
8936	}
8937	case VM_REGION_TOP_INFO:
8938	{
8939		vm_region_top_info_t	top;
8940
8941		if (*count < VM_REGION_TOP_INFO_COUNT)
8942			return(KERN_INVALID_ARGUMENT);
8943
8944		top = (vm_region_top_info_t) info;
8945		*count = VM_REGION_TOP_INFO_COUNT;
8946
8947		vm_map_lock_read(map);
8948
8949		start = *address;
8950		if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
8951			if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
8952				vm_map_unlock_read(map);
8953				return(KERN_INVALID_ADDRESS);
8954			}
8955		} else {
8956			entry = tmp_entry;
8957
8958		}
8959		start = entry->vme_start;
8960
8961		top->private_pages_resident = 0;
8962		top->shared_pages_resident = 0;
8963
8964		vm_map_region_top_walk(entry, top);
8965
8966		if (object_name)
8967			*object_name = IP_NULL;
8968		*address = start;
8969		*size = (entry->vme_end - start);
8970
8971		vm_map_unlock_read(map);
8972		return(KERN_SUCCESS);
8973	}
8974	default:
8975		return(KERN_INVALID_ARGUMENT);
8976	}
8977}
8978
8979#define min(a, b)  (((a) < (b)) ? (a) : (b))
8980
8981void
8982vm_map_region_top_walk(
8983        vm_map_entry_t		   entry,
8984	vm_region_top_info_t       top)
8985{
8986
8987	if (entry->object.vm_object == 0 || entry->is_sub_map) {
8988		top->share_mode = SM_EMPTY;
8989		top->ref_count = 0;
8990		top->obj_id = 0;
8991		return;
8992	}
8993
8994	{
8995	        struct	vm_object *obj, *tmp_obj;
8996		int		ref_count;
8997		uint32_t	entry_size;
8998
8999		entry_size = (entry->vme_end - entry->vme_start) / PAGE_SIZE;
9000
9001		obj = entry->object.vm_object;
9002
9003		vm_object_lock(obj);
9004
9005		if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9006			ref_count--;
9007
9008		if (obj->shadow) {
9009			if (ref_count == 1)
9010				top->private_pages_resident = min(obj->resident_page_count, entry_size);
9011			else
9012				top->shared_pages_resident = min(obj->resident_page_count, entry_size);
9013			top->ref_count  = ref_count;
9014			top->share_mode = SM_COW;
9015
9016			while ((tmp_obj = obj->shadow)) {
9017				vm_object_lock(tmp_obj);
9018				vm_object_unlock(obj);
9019				obj = tmp_obj;
9020
9021				if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9022					ref_count--;
9023
9024				top->shared_pages_resident += min(obj->resident_page_count, entry_size);
9025				top->ref_count += ref_count - 1;
9026			}
9027		} else {
9028			if (entry->needs_copy) {
9029				top->share_mode = SM_COW;
9030				top->shared_pages_resident = min(obj->resident_page_count, entry_size);
9031			} else {
9032				if (ref_count == 1 ||
9033				    (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
9034					top->share_mode = SM_PRIVATE;
9035					top->private_pages_resident = min(obj->resident_page_count, entry_size);
9036				} else {
9037					top->share_mode = SM_SHARED;
9038					top->shared_pages_resident = min(obj->resident_page_count, entry_size);
9039				}
9040			}
9041			top->ref_count = ref_count;
9042		}
9043		top->obj_id = (int)obj;
9044
9045		vm_object_unlock(obj);
9046	}
9047}
9048
9049void
9050vm_map_region_walk(
9051	vm_map_t		   	map,
9052	vm_map_offset_t			va,
9053	vm_map_entry_t			entry,
9054	vm_object_offset_t		offset,
9055	vm_object_size_t		range,
9056	vm_region_extended_info_t	extended,
9057	boolean_t			look_for_pages)
9058{
9059        register struct vm_object *obj, *tmp_obj;
9060	register vm_map_offset_t       last_offset;
9061	register int               i;
9062	register int               ref_count;
9063	struct vm_object	*shadow_object;
9064	int			shadow_depth;
9065
9066	if ((entry->object.vm_object == 0) ||
9067	    (entry->is_sub_map) ||
9068	    (entry->object.vm_object->phys_contiguous)) {
9069		extended->share_mode = SM_EMPTY;
9070		extended->ref_count = 0;
9071		return;
9072	}
9073	{
9074		obj = entry->object.vm_object;
9075
9076		vm_object_lock(obj);
9077
9078		if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9079			ref_count--;
9080
9081		if (look_for_pages) {
9082			for (last_offset = offset + range;
9083			     offset < last_offset;
9084			     offset += PAGE_SIZE_64, va += PAGE_SIZE)
9085				vm_map_region_look_for_page(map, va, obj,
9086							    offset, ref_count,
9087							    0, extended);
9088		}
9089
9090		shadow_object = obj->shadow;
9091		shadow_depth = 0;
9092		if (shadow_object != VM_OBJECT_NULL) {
9093			vm_object_lock(shadow_object);
9094			for (;
9095			     shadow_object != VM_OBJECT_NULL;
9096			     shadow_depth++) {
9097				vm_object_t	next_shadow;
9098
9099				next_shadow = shadow_object->shadow;
9100				if (next_shadow) {
9101					vm_object_lock(next_shadow);
9102				}
9103				vm_object_unlock(shadow_object);
9104				shadow_object = next_shadow;
9105			}
9106		}
9107		extended->shadow_depth = shadow_depth;
9108
9109		if (extended->shadow_depth || entry->needs_copy)
9110			extended->share_mode = SM_COW;
9111		else {
9112			if (ref_count == 1)
9113				extended->share_mode = SM_PRIVATE;
9114			else {
9115				if (obj->true_share)
9116					extended->share_mode = SM_TRUESHARED;
9117				else
9118					extended->share_mode = SM_SHARED;
9119			}
9120		}
9121		extended->ref_count = ref_count - extended->shadow_depth;
9122
9123		for (i = 0; i < extended->shadow_depth; i++) {
9124			if ((tmp_obj = obj->shadow) == 0)
9125				break;
9126			vm_object_lock(tmp_obj);
9127			vm_object_unlock(obj);
9128
9129			if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
9130				ref_count--;
9131
9132			extended->ref_count += ref_count;
9133			obj = tmp_obj;
9134		}
9135		vm_object_unlock(obj);
9136
9137		if (extended->share_mode == SM_SHARED) {
9138			register vm_map_entry_t	     cur;
9139			register vm_map_entry_t	     last;
9140			int      my_refs;
9141
9142			obj = entry->object.vm_object;
9143			last = vm_map_to_entry(map);
9144			my_refs = 0;
9145
9146			if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9147				ref_count--;
9148			for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
9149				my_refs += vm_map_region_count_obj_refs(cur, obj);
9150
9151			if (my_refs == ref_count)
9152				extended->share_mode = SM_PRIVATE_ALIASED;
9153			else if (my_refs > 1)
9154				extended->share_mode = SM_SHARED_ALIASED;
9155		}
9156	}
9157}
9158
9159
9160/* object is locked on entry and locked on return */
9161
9162
9163static void
9164vm_map_region_look_for_page(
9165	__unused vm_map_t		map,
9166	__unused vm_map_offset_t	va,
9167	vm_object_t			object,
9168	vm_object_offset_t		offset,
9169	int				max_refcnt,
9170	int				depth,
9171	vm_region_extended_info_t	extended)
9172{
9173        register vm_page_t	p;
9174        register vm_object_t	shadow;
9175	register int            ref_count;
9176	vm_object_t		caller_object;
9177#if	MACH_PAGEMAP
9178	kern_return_t		kr;
9179#endif
9180	shadow = object->shadow;
9181	caller_object = object;
9182
9183
9184	while (TRUE) {
9185
9186		if ( !(object->pager_trusted) && !(object->internal))
9187			extended->external_pager = 1;
9188
9189		if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
9190	        	if (shadow && (max_refcnt == 1))
9191		    		extended->pages_shared_now_private++;
9192
9193			if (!p->fictitious &&
9194			    (p->dirty || pmap_is_modified(p->phys_page)))
9195		    		extended->pages_dirtied++;
9196
9197	        	extended->pages_resident++;
9198
9199			if(object != caller_object)
9200				vm_object_unlock(object);
9201
9202			return;
9203		}
9204#if	MACH_PAGEMAP
9205		if (object->existence_map) {
9206	    		if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
9207
9208	        		extended->pages_swapped_out++;
9209
9210				if(object != caller_object)
9211					vm_object_unlock(object);
9212
9213				return;
9214	    		}
9215		} else if (object->internal &&
9216			   object->alive &&
9217			   !object->terminating &&
9218			   object->pager_ready) {
9219
9220			memory_object_t pager;
9221
9222			vm_object_paging_begin(object);
9223			pager = object->pager;
9224			vm_object_unlock(object);
9225
9226			kr = memory_object_data_request(
9227				pager,
9228				offset + object->paging_offset,
9229				0, /* just poke the pager */
9230				VM_PROT_READ,
9231				NULL);
9232
9233			vm_object_lock(object);
9234			vm_object_paging_end(object);
9235
9236			if (kr == KERN_SUCCESS) {
9237				/* the pager has that page */
9238				extended->pages_swapped_out++;
9239				if (object != caller_object)
9240					vm_object_unlock(object);
9241				return;
9242			}
9243		}
9244#endif /* MACH_PAGEMAP */
9245
9246		if (shadow) {
9247			vm_object_lock(shadow);
9248
9249			if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
9250			        ref_count--;
9251
9252	    		if (++depth > extended->shadow_depth)
9253	        		extended->shadow_depth = depth;
9254
9255	    		if (ref_count > max_refcnt)
9256	        		max_refcnt = ref_count;
9257
9258			if(object != caller_object)
9259				vm_object_unlock(object);
9260
9261			offset = offset + object->shadow_offset;
9262			object = shadow;
9263			shadow = object->shadow;
9264			continue;
9265		}
9266		if(object != caller_object)
9267			vm_object_unlock(object);
9268		break;
9269	}
9270}
9271
9272static int
9273vm_map_region_count_obj_refs(
9274        vm_map_entry_t    entry,
9275	vm_object_t       object)
9276{
9277        register int ref_count;
9278	register vm_object_t chk_obj;
9279	register vm_object_t tmp_obj;
9280
9281	if (entry->object.vm_object == 0)
9282		return(0);
9283
9284        if (entry->is_sub_map)
9285		return(0);
9286	else {
9287		ref_count = 0;
9288
9289		chk_obj = entry->object.vm_object;
9290		vm_object_lock(chk_obj);
9291
9292		while (chk_obj) {
9293			if (chk_obj == object)
9294				ref_count++;
9295			tmp_obj = chk_obj->shadow;
9296			if (tmp_obj)
9297				vm_object_lock(tmp_obj);
9298			vm_object_unlock(chk_obj);
9299
9300			chk_obj = tmp_obj;
9301		}
9302	}
9303	return(ref_count);
9304}
9305
9306
9307/*
9308 *	Routine:	vm_map_simplify
9309 *
9310 *	Description:
9311 *		Attempt to simplify the map representation in
9312 *		the vicinity of the given starting address.
9313 *	Note:
9314 *		This routine is intended primarily to keep the
9315 *		kernel maps more compact -- they generally don't
9316 *		benefit from the "expand a map entry" technology
9317 *		at allocation time because the adjacent entry
9318 *		is often wired down.
9319 */
9320void
9321vm_map_simplify_entry(
9322	vm_map_t	map,
9323	vm_map_entry_t	this_entry)
9324{
9325	vm_map_entry_t	prev_entry;
9326
9327	counter(c_vm_map_simplify_entry_called++);
9328
9329	prev_entry = this_entry->vme_prev;
9330
9331	if ((this_entry != vm_map_to_entry(map)) &&
9332	    (prev_entry != vm_map_to_entry(map)) &&
9333
9334	    (prev_entry->vme_end == this_entry->vme_start) &&
9335
9336	    (prev_entry->is_sub_map == this_entry->is_sub_map) &&
9337
9338	    (prev_entry->object.vm_object == this_entry->object.vm_object) &&
9339	    ((prev_entry->offset + (prev_entry->vme_end -
9340				    prev_entry->vme_start))
9341	     == this_entry->offset) &&
9342
9343	    (prev_entry->inheritance == this_entry->inheritance) &&
9344	    (prev_entry->protection == this_entry->protection) &&
9345	    (prev_entry->max_protection == this_entry->max_protection) &&
9346	    (prev_entry->behavior == this_entry->behavior) &&
9347	    (prev_entry->alias == this_entry->alias) &&
9348	    (prev_entry->no_cache == this_entry->no_cache) &&
9349	    (prev_entry->wired_count == this_entry->wired_count) &&
9350	    (prev_entry->user_wired_count == this_entry->user_wired_count) &&
9351
9352	    (prev_entry->needs_copy == this_entry->needs_copy) &&
9353
9354	    (prev_entry->use_pmap == FALSE) &&
9355	    (this_entry->use_pmap == FALSE) &&
9356	    (prev_entry->in_transition == FALSE) &&
9357	    (this_entry->in_transition == FALSE) &&
9358	    (prev_entry->needs_wakeup == FALSE) &&
9359	    (this_entry->needs_wakeup == FALSE) &&
9360	    (prev_entry->is_shared == FALSE) &&
9361	    (this_entry->is_shared == FALSE)
9362		) {
9363		_vm_map_entry_unlink(&map->hdr, prev_entry);
9364		this_entry->vme_start = prev_entry->vme_start;
9365		this_entry->offset = prev_entry->offset;
9366		if (prev_entry->is_sub_map) {
9367			vm_map_deallocate(prev_entry->object.sub_map);
9368		} else {
9369			vm_object_deallocate(prev_entry->object.vm_object);
9370		}
9371		vm_map_entry_dispose(map, prev_entry);
9372		SAVE_HINT_MAP_WRITE(map, this_entry);
9373		counter(c_vm_map_simplified++);
9374	}
9375}
9376
9377void
9378vm_map_simplify(
9379	vm_map_t	map,
9380	vm_map_offset_t	start)
9381{
9382	vm_map_entry_t	this_entry;
9383
9384	vm_map_lock(map);
9385	if (vm_map_lookup_entry(map, start, &this_entry)) {
9386		vm_map_simplify_entry(map, this_entry);
9387		vm_map_simplify_entry(map, this_entry->vme_next);
9388	}
9389	counter(c_vm_map_simplify_called++);
9390	vm_map_unlock(map);
9391}
9392
9393static void
9394vm_map_simplify_range(
9395	vm_map_t	map,
9396	vm_map_offset_t	start,
9397	vm_map_offset_t	end)
9398{
9399	vm_map_entry_t	entry;
9400
9401	/*
9402	 * The map should be locked (for "write") by the caller.
9403	 */
9404
9405	if (start >= end) {
9406		/* invalid address range */
9407		return;
9408	}
9409
9410	start = vm_map_trunc_page(start);
9411	end = vm_map_round_page(end);
9412
9413	if (!vm_map_lookup_entry(map, start, &entry)) {
9414		/* "start" is not mapped and "entry" ends before "start" */
9415		if (entry == vm_map_to_entry(map)) {
9416			/* start with first entry in the map */
9417			entry = vm_map_first_entry(map);
9418		} else {
9419			/* start with next entry */
9420			entry = entry->vme_next;
9421		}
9422	}
9423
9424	while (entry != vm_map_to_entry(map) &&
9425	       entry->vme_start <= end) {
9426		/* try and coalesce "entry" with its previous entry */
9427		vm_map_simplify_entry(map, entry);
9428		entry = entry->vme_next;
9429	}
9430}
9431
9432
9433/*
9434 *	Routine:	vm_map_machine_attribute
9435 *	Purpose:
9436 *		Provide machine-specific attributes to mappings,
9437 *		such as cachability etc. for machines that provide
9438 *		them.  NUMA architectures and machines with big/strange
9439 *		caches will use this.
9440 *	Note:
9441 *		Responsibilities for locking and checking are handled here,
9442 *		everything else in the pmap module. If any non-volatile
9443 *		information must be kept, the pmap module should handle
9444 *		it itself. [This assumes that attributes do not
9445 *		need to be inherited, which seems ok to me]
9446 */
9447kern_return_t
9448vm_map_machine_attribute(
9449	vm_map_t			map,
9450	vm_map_offset_t		start,
9451	vm_map_offset_t		end,
9452	vm_machine_attribute_t	attribute,
9453	vm_machine_attribute_val_t* value)		/* IN/OUT */
9454{
9455	kern_return_t	ret;
9456	vm_map_size_t sync_size;
9457	vm_map_entry_t entry;
9458
9459	if (start < vm_map_min(map) || end > vm_map_max(map))
9460		return KERN_INVALID_ADDRESS;
9461
9462	/* Figure how much memory we need to flush (in page increments) */
9463	sync_size = end - start;
9464
9465	vm_map_lock(map);
9466
9467	if (attribute != MATTR_CACHE) {
9468		/* If we don't have to find physical addresses, we */
9469		/* don't have to do an explicit traversal here.    */
9470		ret = pmap_attribute(map->pmap, start, end-start,
9471				     attribute, value);
9472		vm_map_unlock(map);
9473		return ret;
9474	}
9475
9476	ret = KERN_SUCCESS;										/* Assume it all worked */
9477
9478	while(sync_size) {
9479		if (vm_map_lookup_entry(map, start, &entry)) {
9480			vm_map_size_t	sub_size;
9481			if((entry->vme_end - start) > sync_size) {
9482				sub_size = sync_size;
9483				sync_size = 0;
9484			} else {
9485				sub_size = entry->vme_end - start;
9486				sync_size -= sub_size;
9487			}
9488			if(entry->is_sub_map) {
9489				vm_map_offset_t sub_start;
9490				vm_map_offset_t sub_end;
9491
9492				sub_start = (start - entry->vme_start)
9493					+ entry->offset;
9494				sub_end = sub_start + sub_size;
9495				vm_map_machine_attribute(
9496					entry->object.sub_map,
9497					sub_start,
9498					sub_end,
9499					attribute, value);
9500			} else {
9501				if(entry->object.vm_object) {
9502					vm_page_t		m;
9503					vm_object_t		object;
9504					vm_object_t		base_object;
9505					vm_object_t		last_object;
9506					vm_object_offset_t	offset;
9507					vm_object_offset_t	base_offset;
9508					vm_map_size_t		range;
9509					range = sub_size;
9510					offset = (start - entry->vme_start)
9511						+ entry->offset;
9512					base_offset = offset;
9513					object = entry->object.vm_object;
9514					base_object = object;
9515					last_object = NULL;
9516
9517					vm_object_lock(object);
9518
9519					while (range) {
9520						m = vm_page_lookup(
9521							object, offset);
9522
9523						if (m && !m->fictitious) {
9524						        ret =
9525								pmap_attribute_cache_sync(
9526									m->phys_page,
9527									PAGE_SIZE,
9528									attribute, value);
9529
9530						} else if (object->shadow) {
9531						        offset = offset + object->shadow_offset;
9532							last_object = object;
9533							object = object->shadow;
9534							vm_object_lock(last_object->shadow);
9535							vm_object_unlock(last_object);
9536							continue;
9537						}
9538						range -= PAGE_SIZE;
9539
9540						if (base_object != object) {
9541						        vm_object_unlock(object);
9542							vm_object_lock(base_object);
9543							object = base_object;
9544						}
9545						/* Bump to the next page */
9546						base_offset += PAGE_SIZE;
9547						offset = base_offset;
9548					}
9549					vm_object_unlock(object);
9550				}
9551			}
9552			start += sub_size;
9553		} else {
9554			vm_map_unlock(map);
9555			return KERN_FAILURE;
9556		}
9557
9558	}
9559
9560	vm_map_unlock(map);
9561
9562	return ret;
9563}
9564
9565/*
9566 *	vm_map_behavior_set:
9567 *
9568 *	Sets the paging reference behavior of the specified address
9569 *	range in the target map.  Paging reference behavior affects
9570 *	how pagein operations resulting from faults on the map will be
9571 *	clustered.
9572 */
9573kern_return_t
9574vm_map_behavior_set(
9575	vm_map_t	map,
9576	vm_map_offset_t	start,
9577	vm_map_offset_t	end,
9578	vm_behavior_t	new_behavior)
9579{
9580	register vm_map_entry_t	entry;
9581	vm_map_entry_t	temp_entry;
9582
9583	XPR(XPR_VM_MAP,
9584	    "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
9585	    (integer_t)map, start, end, new_behavior, 0);
9586
9587	switch (new_behavior) {
9588	case VM_BEHAVIOR_DEFAULT:
9589	case VM_BEHAVIOR_RANDOM:
9590	case VM_BEHAVIOR_SEQUENTIAL:
9591	case VM_BEHAVIOR_RSEQNTL:
9592		break;
9593	case VM_BEHAVIOR_WILLNEED:
9594	case VM_BEHAVIOR_DONTNEED:
9595		new_behavior = VM_BEHAVIOR_DEFAULT;
9596		break;
9597	default:
9598		return(KERN_INVALID_ARGUMENT);
9599	}
9600
9601	vm_map_lock(map);
9602
9603	/*
9604	 *	The entire address range must be valid for the map.
9605	 * 	Note that vm_map_range_check() does a
9606	 *	vm_map_lookup_entry() internally and returns the
9607	 *	entry containing the start of the address range if
9608	 *	the entire range is valid.
9609	 */
9610	if (vm_map_range_check(map, start, end, &temp_entry)) {
9611		entry = temp_entry;
9612		vm_map_clip_start(map, entry, start);
9613	}
9614	else {
9615		vm_map_unlock(map);
9616		return(KERN_INVALID_ADDRESS);
9617	}
9618
9619	while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
9620		vm_map_clip_end(map, entry, end);
9621		assert(!entry->use_pmap);
9622
9623		entry->behavior = new_behavior;
9624
9625		entry = entry->vme_next;
9626	}
9627
9628	vm_map_unlock(map);
9629	return(KERN_SUCCESS);
9630}
9631
9632
9633#include <mach_kdb.h>
9634#if	MACH_KDB
9635#include <ddb/db_output.h>
9636#include <vm/vm_print.h>
9637
9638#define	printf	db_printf
9639
9640/*
9641 * Forward declarations for internal functions.
9642 */
9643extern void vm_map_links_print(
9644	struct vm_map_links	*links);
9645
9646extern void vm_map_header_print(
9647	struct vm_map_header	*header);
9648
9649extern void vm_map_entry_print(
9650	vm_map_entry_t		entry);
9651
9652extern void vm_follow_entry(
9653	vm_map_entry_t		entry);
9654
9655extern void vm_follow_map(
9656	vm_map_t		map);
9657
9658/*
9659 *	vm_map_links_print:	[ debug ]
9660 */
9661void
9662vm_map_links_print(
9663	struct vm_map_links	*links)
9664{
9665	iprintf("prev = %08X  next = %08X  start = %016llX  end = %016llX\n",
9666		links->prev,
9667		links->next,
9668		(unsigned long long)links->start,
9669		(unsigned long long)links->end);
9670}
9671
9672/*
9673 *	vm_map_header_print:	[ debug ]
9674 */
9675void
9676vm_map_header_print(
9677	struct vm_map_header	*header)
9678{
9679	vm_map_links_print(&header->links);
9680	iprintf("nentries = %08X, %sentries_pageable\n",
9681		header->nentries,
9682		(header->entries_pageable ? "" : "!"));
9683}
9684
9685/*
9686 *	vm_follow_entry:	[ debug ]
9687 */
9688void
9689vm_follow_entry(
9690	vm_map_entry_t entry)
9691{
9692	int shadows;
9693
9694	iprintf("map entry %08X\n", entry);
9695
9696	db_indent += 2;
9697
9698	shadows = vm_follow_object(entry->object.vm_object);
9699	iprintf("Total objects : %d\n",shadows);
9700
9701	db_indent -= 2;
9702}
9703
9704/*
9705 *	vm_map_entry_print:	[ debug ]
9706 */
9707void
9708vm_map_entry_print(
9709	register vm_map_entry_t	entry)
9710{
9711	static const char *inheritance_name[4] =
9712		{ "share", "copy", "none", "?"};
9713	static const char *behavior_name[4] =
9714		{ "dflt", "rand", "seqtl", "rseqntl" };
9715
9716	iprintf("map entry %08X - prev = %08X  next = %08X\n", entry, entry->vme_prev, entry->vme_next);
9717
9718	db_indent += 2;
9719
9720	vm_map_links_print(&entry->links);
9721
9722	iprintf("start = %016llX  end = %016llX - prot=%x/%x/%s\n",
9723		(unsigned long long)entry->vme_start,
9724		(unsigned long long)entry->vme_end,
9725		entry->protection,
9726		entry->max_protection,
9727		inheritance_name[(entry->inheritance & 0x3)]);
9728
9729	iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n",
9730		behavior_name[(entry->behavior & 0x3)],
9731		entry->wired_count,
9732		entry->user_wired_count);
9733	iprintf("%sin_transition, %sneeds_wakeup\n",
9734		(entry->in_transition ? "" : "!"),
9735		(entry->needs_wakeup ? "" : "!"));
9736
9737	if (entry->is_sub_map) {
9738		iprintf("submap = %08X - offset = %016llX\n",
9739			entry->object.sub_map,
9740			(unsigned long long)entry->offset);
9741	} else {
9742		iprintf("object = %08X  offset = %016llX - ",
9743			entry->object.vm_object,
9744			(unsigned long long)entry->offset);
9745		printf("%sis_shared, %sneeds_copy\n",
9746		       (entry->is_shared ? "" : "!"),
9747		       (entry->needs_copy ? "" : "!"));
9748	}
9749
9750	db_indent -= 2;
9751}
9752
9753/*
9754 *	vm_follow_map:	[ debug ]
9755 */
9756void
9757vm_follow_map(
9758	vm_map_t map)
9759{
9760	register vm_map_entry_t	entry;
9761
9762	iprintf("task map %08X\n", map);
9763
9764	db_indent += 2;
9765
9766	for (entry = vm_map_first_entry(map);
9767	     entry && entry != vm_map_to_entry(map);
9768	     entry = entry->vme_next) {
9769		vm_follow_entry(entry);
9770	}
9771
9772	db_indent -= 2;
9773}
9774
9775/*
9776 *	vm_map_print:	[ debug ]
9777 */
9778void
9779vm_map_print(
9780	db_addr_t inmap)
9781{
9782	register vm_map_entry_t	entry;
9783	vm_map_t map;
9784#if TASK_SWAPPER
9785	char *swstate;
9786#endif /* TASK_SWAPPER */
9787
9788	map = (vm_map_t)(long)
9789		inmap;	/* Make sure we have the right type */
9790
9791	iprintf("task map %08X\n", map);
9792
9793	db_indent += 2;
9794
9795	vm_map_header_print(&map->hdr);
9796
9797	iprintf("pmap = %08X  size = %08X  ref = %d  hint = %08X  first_free = %08X\n",
9798		map->pmap,
9799		map->size,
9800		map->ref_count,
9801		map->hint,
9802		map->first_free);
9803
9804	iprintf("%swait_for_space, %swiring_required, timestamp = %d\n",
9805		(map->wait_for_space ? "" : "!"),
9806		(map->wiring_required ? "" : "!"),
9807		map->timestamp);
9808
9809#if	TASK_SWAPPER
9810	switch (map->sw_state) {
9811	case MAP_SW_IN:
9812		swstate = "SW_IN";
9813		break;
9814	case MAP_SW_OUT:
9815		swstate = "SW_OUT";
9816		break;
9817	default:
9818		swstate = "????";
9819		break;
9820	}
9821	iprintf("res = %d, sw_state = %s\n", map->res_count, swstate);
9822#endif	/* TASK_SWAPPER */
9823
9824	for (entry = vm_map_first_entry(map);
9825	     entry && entry != vm_map_to_entry(map);
9826	     entry = entry->vme_next) {
9827		vm_map_entry_print(entry);
9828	}
9829
9830	db_indent -= 2;
9831}
9832
9833/*
9834 *	Routine:	vm_map_copy_print
9835 *	Purpose:
9836 *		Pretty-print a copy object for ddb.
9837 */
9838
9839void
9840vm_map_copy_print(
9841	db_addr_t	incopy)
9842{
9843	vm_map_copy_t copy;
9844	vm_map_entry_t entry;
9845
9846	copy = (vm_map_copy_t)(long)
9847		incopy;	/* Make sure we have the right type */
9848
9849	printf("copy object 0x%x\n", copy);
9850
9851	db_indent += 2;
9852
9853	iprintf("type=%d", copy->type);
9854	switch (copy->type) {
9855	case VM_MAP_COPY_ENTRY_LIST:
9856		printf("[entry_list]");
9857		break;
9858
9859	case VM_MAP_COPY_OBJECT:
9860		printf("[object]");
9861		break;
9862
9863	case VM_MAP_COPY_KERNEL_BUFFER:
9864		printf("[kernel_buffer]");
9865		break;
9866
9867	default:
9868		printf("[bad type]");
9869		break;
9870	}
9871	printf(", offset=0x%llx", (unsigned long long)copy->offset);
9872	printf(", size=0x%x\n", copy->size);
9873
9874	switch (copy->type) {
9875	case VM_MAP_COPY_ENTRY_LIST:
9876		vm_map_header_print(&copy->cpy_hdr);
9877		for (entry = vm_map_copy_first_entry(copy);
9878		     entry && entry != vm_map_copy_to_entry(copy);
9879		     entry = entry->vme_next) {
9880			vm_map_entry_print(entry);
9881		}
9882		break;
9883
9884	case VM_MAP_COPY_OBJECT:
9885		iprintf("object=0x%x\n", copy->cpy_object);
9886		break;
9887
9888	case VM_MAP_COPY_KERNEL_BUFFER:
9889		iprintf("kernel buffer=0x%x", copy->cpy_kdata);
9890		printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size);
9891		break;
9892
9893	}
9894
9895	db_indent -=2;
9896}
9897
9898/*
9899 *	db_vm_map_total_size(map)	[ debug ]
9900 *
9901 *	return the total virtual size (in bytes) of the map
9902 */
9903vm_map_size_t
9904db_vm_map_total_size(
9905	db_addr_t	inmap)
9906{
9907	vm_map_entry_t	entry;
9908	vm_map_size_t	total;
9909	vm_map_t map;
9910
9911	map = (vm_map_t)(long)
9912		inmap;	/* Make sure we have the right type */
9913
9914	total = 0;
9915	for (entry = vm_map_first_entry(map);
9916	     entry != vm_map_to_entry(map);
9917	     entry = entry->vme_next) {
9918		total += entry->vme_end - entry->vme_start;
9919	}
9920
9921	return total;
9922}
9923
9924#endif	/* MACH_KDB */
9925
9926/*
9927 *	Routine:	vm_map_entry_insert
9928 *
9929 *	Descritpion:	This routine inserts a new vm_entry in a locked map.
9930 */
9931vm_map_entry_t
9932vm_map_entry_insert(
9933	vm_map_t		map,
9934	vm_map_entry_t		insp_entry,
9935	vm_map_offset_t		start,
9936	vm_map_offset_t		end,
9937	vm_object_t		object,
9938	vm_object_offset_t	offset,
9939	boolean_t		needs_copy,
9940	boolean_t		is_shared,
9941	boolean_t		in_transition,
9942	vm_prot_t		cur_protection,
9943	vm_prot_t		max_protection,
9944	vm_behavior_t		behavior,
9945	vm_inherit_t		inheritance,
9946	unsigned		wired_count,
9947	boolean_t		no_cache)
9948{
9949	vm_map_entry_t	new_entry;
9950
9951	assert(insp_entry != (vm_map_entry_t)0);
9952
9953	new_entry = vm_map_entry_create(map);
9954
9955	new_entry->vme_start = start;
9956	new_entry->vme_end = end;
9957	assert(page_aligned(new_entry->vme_start));
9958	assert(page_aligned(new_entry->vme_end));
9959
9960	new_entry->object.vm_object = object;
9961	new_entry->offset = offset;
9962	new_entry->is_shared = is_shared;
9963	new_entry->is_sub_map = FALSE;
9964	new_entry->needs_copy = needs_copy;
9965	new_entry->in_transition = in_transition;
9966	new_entry->needs_wakeup = FALSE;
9967	new_entry->inheritance = inheritance;
9968	new_entry->protection = cur_protection;
9969	new_entry->max_protection = max_protection;
9970	new_entry->behavior = behavior;
9971	new_entry->wired_count = wired_count;
9972	new_entry->user_wired_count = 0;
9973	new_entry->use_pmap = FALSE;
9974	new_entry->alias = 0;
9975	new_entry->no_cache = no_cache;
9976
9977	/*
9978	 *	Insert the new entry into the list.
9979	 */
9980
9981	vm_map_entry_link(map, insp_entry, new_entry);
9982	map->size += end - start;
9983
9984	/*
9985	 *	Update the free space hint and the lookup hint.
9986	 */
9987
9988	SAVE_HINT_MAP_WRITE(map, new_entry);
9989	return new_entry;
9990}
9991
9992/*
9993 *	Routine:	vm_map_remap_extract
9994 *
9995 *	Descritpion:	This routine returns a vm_entry list from a map.
9996 */
9997static kern_return_t
9998vm_map_remap_extract(
9999	vm_map_t		map,
10000	vm_map_offset_t		addr,
10001	vm_map_size_t		size,
10002	boolean_t		copy,
10003	struct vm_map_header	*map_header,
10004	vm_prot_t		*cur_protection,
10005	vm_prot_t		*max_protection,
10006	/* What, no behavior? */
10007	vm_inherit_t		inheritance,
10008	boolean_t		pageable)
10009{
10010	kern_return_t		result;
10011	vm_map_size_t		mapped_size;
10012	vm_map_size_t		tmp_size;
10013	vm_map_entry_t		src_entry;     /* result of last map lookup */
10014	vm_map_entry_t		new_entry;
10015	vm_object_offset_t	offset;
10016	vm_map_offset_t		map_address;
10017	vm_map_offset_t		src_start;     /* start of entry to map */
10018	vm_map_offset_t		src_end;       /* end of region to be mapped */
10019	vm_object_t		object;
10020	vm_map_version_t	version;
10021	boolean_t		src_needs_copy;
10022	boolean_t		new_entry_needs_copy;
10023
10024	assert(map != VM_MAP_NULL);
10025	assert(size != 0 && size == vm_map_round_page(size));
10026	assert(inheritance == VM_INHERIT_NONE ||
10027	       inheritance == VM_INHERIT_COPY ||
10028	       inheritance == VM_INHERIT_SHARE);
10029
10030	/*
10031	 *	Compute start and end of region.
10032	 */
10033	src_start = vm_map_trunc_page(addr);
10034	src_end = vm_map_round_page(src_start + size);
10035
10036	/*
10037	 *	Initialize map_header.
10038	 */
10039	map_header->links.next = (struct vm_map_entry *)&map_header->links;
10040	map_header->links.prev = (struct vm_map_entry *)&map_header->links;
10041	map_header->nentries = 0;
10042	map_header->entries_pageable = pageable;
10043
10044	*cur_protection = VM_PROT_ALL;
10045	*max_protection = VM_PROT_ALL;
10046
10047	map_address = 0;
10048	mapped_size = 0;
10049	result = KERN_SUCCESS;
10050
10051	/*
10052	 *	The specified source virtual space might correspond to
10053	 *	multiple map entries, need to loop on them.
10054	 */
10055	vm_map_lock(map);
10056	while (mapped_size != size) {
10057		vm_map_size_t	entry_size;
10058
10059		/*
10060		 *	Find the beginning of the region.
10061		 */
10062		if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
10063			result = KERN_INVALID_ADDRESS;
10064			break;
10065		}
10066
10067		if (src_start < src_entry->vme_start ||
10068		    (mapped_size && src_start != src_entry->vme_start)) {
10069			result = KERN_INVALID_ADDRESS;
10070			break;
10071		}
10072
10073		if(src_entry->is_sub_map) {
10074			result = KERN_INVALID_ADDRESS;
10075			break;
10076		}
10077
10078		tmp_size = size - mapped_size;
10079		if (src_end > src_entry->vme_end)
10080			tmp_size -= (src_end - src_entry->vme_end);
10081
10082		entry_size = (vm_map_size_t)(src_entry->vme_end -
10083					     src_entry->vme_start);
10084
10085		if(src_entry->is_sub_map) {
10086			vm_map_reference(src_entry->object.sub_map);
10087			object = VM_OBJECT_NULL;
10088		} else {
10089			object = src_entry->object.vm_object;
10090
10091			if (object == VM_OBJECT_NULL) {
10092				object = vm_object_allocate(entry_size);
10093				src_entry->offset = 0;
10094				src_entry->object.vm_object = object;
10095			} else if (object->copy_strategy !=
10096				   MEMORY_OBJECT_COPY_SYMMETRIC) {
10097				/*
10098				 *	We are already using an asymmetric
10099				 *	copy, and therefore we already have
10100				 *	the right object.
10101				 */
10102				assert(!src_entry->needs_copy);
10103			} else if (src_entry->needs_copy || object->shadowed ||
10104				   (object->internal && !object->true_share &&
10105				    !src_entry->is_shared &&
10106				    object->size > entry_size)) {
10107
10108				vm_object_shadow(&src_entry->object.vm_object,
10109						 &src_entry->offset,
10110						 entry_size);
10111
10112				if (!src_entry->needs_copy &&
10113				    (src_entry->protection & VM_PROT_WRITE)) {
10114				        vm_prot_t prot;
10115
10116				        prot = src_entry->protection & ~VM_PROT_WRITE;
10117
10118					if (override_nx(map, src_entry->alias) && prot)
10119					        prot |= VM_PROT_EXECUTE;
10120
10121					if(map->mapped) {
10122						vm_object_pmap_protect(
10123							src_entry->object.vm_object,
10124							src_entry->offset,
10125							entry_size,
10126							PMAP_NULL,
10127							src_entry->vme_start,
10128							prot);
10129					} else {
10130						pmap_protect(vm_map_pmap(map),
10131							     src_entry->vme_start,
10132							     src_entry->vme_end,
10133							     prot);
10134					}
10135				}
10136
10137				object = src_entry->object.vm_object;
10138				src_entry->needs_copy = FALSE;
10139			}
10140
10141
10142			vm_object_lock(object);
10143			vm_object_reference_locked(object); /* object ref. for new entry */
10144			if (object->copy_strategy ==
10145			    MEMORY_OBJECT_COPY_SYMMETRIC) {
10146				object->copy_strategy =
10147					MEMORY_OBJECT_COPY_DELAY;
10148			}
10149			vm_object_unlock(object);
10150		}
10151
10152		offset = src_entry->offset + (src_start - src_entry->vme_start);
10153
10154		new_entry = _vm_map_entry_create(map_header);
10155		vm_map_entry_copy(new_entry, src_entry);
10156		new_entry->use_pmap = FALSE; /* clr address space specifics */
10157
10158		new_entry->vme_start = map_address;
10159		new_entry->vme_end = map_address + tmp_size;
10160		new_entry->inheritance = inheritance;
10161		new_entry->offset = offset;
10162
10163		/*
10164		 * The new region has to be copied now if required.
10165		 */
10166	RestartCopy:
10167		if (!copy) {
10168			src_entry->is_shared = TRUE;
10169			new_entry->is_shared = TRUE;
10170			if (!(new_entry->is_sub_map))
10171				new_entry->needs_copy = FALSE;
10172
10173		} else if (src_entry->is_sub_map) {
10174			/* make this a COW sub_map if not already */
10175			new_entry->needs_copy = TRUE;
10176			object = VM_OBJECT_NULL;
10177		} else if (src_entry->wired_count == 0 &&
10178			   vm_object_copy_quickly(&new_entry->object.vm_object,
10179						  new_entry->offset,
10180						  (new_entry->vme_end -
10181						   new_entry->vme_start),
10182						  &src_needs_copy,
10183						  &new_entry_needs_copy)) {
10184
10185			new_entry->needs_copy = new_entry_needs_copy;
10186			new_entry->is_shared = FALSE;
10187
10188			/*
10189			 * Handle copy_on_write semantics.
10190			 */
10191			if (src_needs_copy && !src_entry->needs_copy) {
10192			        vm_prot_t prot;
10193
10194				prot = src_entry->protection & ~VM_PROT_WRITE;
10195
10196				if (override_nx(map, src_entry->alias) && prot)
10197				        prot |= VM_PROT_EXECUTE;
10198
10199				vm_object_pmap_protect(object,
10200						       offset,
10201						       entry_size,
10202						       ((src_entry->is_shared
10203							 || map->mapped) ?
10204							PMAP_NULL : map->pmap),
10205						       src_entry->vme_start,
10206						       prot);
10207
10208				src_entry->needs_copy = TRUE;
10209			}
10210			/*
10211			 * Throw away the old object reference of the new entry.
10212			 */
10213			vm_object_deallocate(object);
10214
10215		} else {
10216			new_entry->is_shared = FALSE;
10217
10218			/*
10219			 * The map can be safely unlocked since we
10220			 * already hold a reference on the object.
10221			 *
10222			 * Record the timestamp of the map for later
10223			 * verification, and unlock the map.
10224			 */
10225			version.main_timestamp = map->timestamp;
10226			vm_map_unlock(map); 	/* Increments timestamp once! */
10227
10228			/*
10229			 * Perform the copy.
10230			 */
10231			if (src_entry->wired_count > 0) {
10232				vm_object_lock(object);
10233				result = vm_object_copy_slowly(
10234					object,
10235					offset,
10236					entry_size,
10237					THREAD_UNINT,
10238					&new_entry->object.vm_object);
10239
10240				new_entry->offset = 0;
10241				new_entry->needs_copy = FALSE;
10242			} else {
10243				result = vm_object_copy_strategically(
10244					object,
10245					offset,
10246					entry_size,
10247					&new_entry->object.vm_object,
10248					&new_entry->offset,
10249					&new_entry_needs_copy);
10250
10251				new_entry->needs_copy = new_entry_needs_copy;
10252			}
10253
10254			/*
10255			 * Throw away the old object reference of the new entry.
10256			 */
10257			vm_object_deallocate(object);
10258
10259			if (result != KERN_SUCCESS &&
10260			    result != KERN_MEMORY_RESTART_COPY) {
10261				_vm_map_entry_dispose(map_header, new_entry);
10262				break;
10263			}
10264
10265			/*
10266			 * Verify that the map has not substantially
10267			 * changed while the copy was being made.
10268			 */
10269
10270			vm_map_lock(map);
10271			if (version.main_timestamp + 1 != map->timestamp) {
10272				/*
10273				 * Simple version comparison failed.
10274				 *
10275				 * Retry the lookup and verify that the
10276				 * same object/offset are still present.
10277				 */
10278				vm_object_deallocate(new_entry->
10279						     object.vm_object);
10280				_vm_map_entry_dispose(map_header, new_entry);
10281				if (result == KERN_MEMORY_RESTART_COPY)
10282					result = KERN_SUCCESS;
10283				continue;
10284			}
10285
10286			if (result == KERN_MEMORY_RESTART_COPY) {
10287				vm_object_reference(object);
10288				goto RestartCopy;
10289			}
10290		}
10291
10292		_vm_map_entry_link(map_header,
10293				   map_header->links.prev, new_entry);
10294
10295		*cur_protection &= src_entry->protection;
10296		*max_protection &= src_entry->max_protection;
10297
10298		map_address += tmp_size;
10299		mapped_size += tmp_size;
10300		src_start += tmp_size;
10301
10302	} /* end while */
10303
10304	vm_map_unlock(map);
10305	if (result != KERN_SUCCESS) {
10306		/*
10307		 * Free all allocated elements.
10308		 */
10309		for (src_entry = map_header->links.next;
10310		     src_entry != (struct vm_map_entry *)&map_header->links;
10311		     src_entry = new_entry) {
10312			new_entry = src_entry->vme_next;
10313			_vm_map_entry_unlink(map_header, src_entry);
10314			vm_object_deallocate(src_entry->object.vm_object);
10315			_vm_map_entry_dispose(map_header, src_entry);
10316		}
10317	}
10318	return result;
10319}
10320
10321/*
10322 *	Routine:	vm_remap
10323 *
10324 *			Map portion of a task's address space.
10325 *			Mapped region must not overlap more than
10326 *			one vm memory object. Protections and
10327 *			inheritance attributes remain the same
10328 *			as in the original task and are	out parameters.
10329 *			Source and Target task can be identical
10330 *			Other attributes are identical as for vm_map()
10331 */
10332kern_return_t
10333vm_map_remap(
10334	vm_map_t		target_map,
10335	vm_map_address_t	*address,
10336	vm_map_size_t		size,
10337	vm_map_offset_t		mask,
10338	boolean_t		anywhere,
10339	vm_map_t		src_map,
10340	vm_map_offset_t		memory_address,
10341	boolean_t		copy,
10342	vm_prot_t		*cur_protection,
10343	vm_prot_t		*max_protection,
10344	vm_inherit_t		inheritance)
10345{
10346	kern_return_t		result;
10347	vm_map_entry_t		entry;
10348	vm_map_entry_t		insp_entry = VM_MAP_ENTRY_NULL;
10349	vm_map_entry_t		new_entry;
10350	struct vm_map_header	map_header;
10351
10352	if (target_map == VM_MAP_NULL)
10353		return KERN_INVALID_ARGUMENT;
10354
10355	switch (inheritance) {
10356	case VM_INHERIT_NONE:
10357	case VM_INHERIT_COPY:
10358	case VM_INHERIT_SHARE:
10359		if (size != 0 && src_map != VM_MAP_NULL)
10360			break;
10361		/*FALL THRU*/
10362	default:
10363		return KERN_INVALID_ARGUMENT;
10364	}
10365
10366	size = vm_map_round_page(size);
10367
10368	result = vm_map_remap_extract(src_map, memory_address,
10369				      size, copy, &map_header,
10370				      cur_protection,
10371				      max_protection,
10372				      inheritance,
10373				      target_map->hdr.
10374				      entries_pageable);
10375
10376	if (result != KERN_SUCCESS) {
10377		return result;
10378	}
10379
10380	/*
10381	 * Allocate/check a range of free virtual address
10382	 * space for the target
10383	 */
10384	*address = vm_map_trunc_page(*address);
10385	vm_map_lock(target_map);
10386	result = vm_map_remap_range_allocate(target_map, address, size,
10387					     mask, anywhere, &insp_entry);
10388
10389	for (entry = map_header.links.next;
10390	     entry != (struct vm_map_entry *)&map_header.links;
10391	     entry = new_entry) {
10392		new_entry = entry->vme_next;
10393		_vm_map_entry_unlink(&map_header, entry);
10394		if (result == KERN_SUCCESS) {
10395			entry->vme_start += *address;
10396			entry->vme_end += *address;
10397			vm_map_entry_link(target_map, insp_entry, entry);
10398			insp_entry = entry;
10399		} else {
10400			if (!entry->is_sub_map) {
10401				vm_object_deallocate(entry->object.vm_object);
10402			} else {
10403				vm_map_deallocate(entry->object.sub_map);
10404			}
10405			_vm_map_entry_dispose(&map_header, entry);
10406		}
10407	}
10408
10409	if (result == KERN_SUCCESS) {
10410		target_map->size += size;
10411		SAVE_HINT_MAP_WRITE(target_map, insp_entry);
10412	}
10413	vm_map_unlock(target_map);
10414
10415	if (result == KERN_SUCCESS && target_map->wiring_required)
10416		result = vm_map_wire(target_map, *address,
10417				     *address + size, *cur_protection, TRUE);
10418	return result;
10419}
10420
10421/*
10422 *	Routine:	vm_map_remap_range_allocate
10423 *
10424 *	Description:
10425 *		Allocate a range in the specified virtual address map.
10426 *		returns the address and the map entry just before the allocated
10427 *		range
10428 *
10429 *	Map must be locked.
10430 */
10431
10432static kern_return_t
10433vm_map_remap_range_allocate(
10434	vm_map_t		map,
10435	vm_map_address_t	*address,	/* IN/OUT */
10436	vm_map_size_t		size,
10437	vm_map_offset_t		mask,
10438	boolean_t		anywhere,
10439	vm_map_entry_t		*map_entry)	/* OUT */
10440{
10441	register vm_map_entry_t	entry;
10442	register vm_map_offset_t	start;
10443	register vm_map_offset_t	end;
10444
10445StartAgain: ;
10446
10447	start = *address;
10448
10449	if (anywhere)
10450	{
10451		/*
10452		 *	Calculate the first possible address.
10453		 */
10454
10455		if (start < map->min_offset)
10456			start = map->min_offset;
10457		if (start > map->max_offset)
10458			return(KERN_NO_SPACE);
10459
10460		/*
10461		 *	Look for the first possible address;
10462		 *	if there's already something at this
10463		 *	address, we have to start after it.
10464		 */
10465
10466		assert(first_free_is_valid(map));
10467		if (start == map->min_offset) {
10468			if ((entry = map->first_free) != vm_map_to_entry(map))
10469				start = entry->vme_end;
10470		} else {
10471			vm_map_entry_t	tmp_entry;
10472			if (vm_map_lookup_entry(map, start, &tmp_entry))
10473				start = tmp_entry->vme_end;
10474			entry = tmp_entry;
10475		}
10476
10477		/*
10478		 *	In any case, the "entry" always precedes
10479		 *	the proposed new region throughout the
10480		 *	loop:
10481		 */
10482
10483		while (TRUE) {
10484			register vm_map_entry_t	next;
10485
10486			/*
10487			 *	Find the end of the proposed new region.
10488			 *	Be sure we didn't go beyond the end, or
10489			 *	wrap around the address.
10490			 */
10491
10492			end = ((start + mask) & ~mask);
10493			if (end < start)
10494				return(KERN_NO_SPACE);
10495			start = end;
10496			end += size;
10497
10498			if ((end > map->max_offset) || (end < start)) {
10499				if (map->wait_for_space) {
10500					if (size <= (map->max_offset -
10501						     map->min_offset)) {
10502						assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
10503						vm_map_unlock(map);
10504						thread_block(THREAD_CONTINUE_NULL);
10505						vm_map_lock(map);
10506						goto StartAgain;
10507					}
10508				}
10509
10510				return(KERN_NO_SPACE);
10511			}
10512
10513			/*
10514			 *	If there are no more entries, we must win.
10515			 */
10516
10517			next = entry->vme_next;
10518			if (next == vm_map_to_entry(map))
10519				break;
10520
10521			/*
10522			 *	If there is another entry, it must be
10523			 *	after the end of the potential new region.
10524			 */
10525
10526			if (next->vme_start >= end)
10527				break;
10528
10529			/*
10530			 *	Didn't fit -- move to the next entry.
10531			 */
10532
10533			entry = next;
10534			start = entry->vme_end;
10535		}
10536		*address = start;
10537	} else {
10538		vm_map_entry_t		temp_entry;
10539
10540		/*
10541		 *	Verify that:
10542		 *		the address doesn't itself violate
10543		 *		the mask requirement.
10544		 */
10545
10546		if ((start & mask) != 0)
10547			return(KERN_NO_SPACE);
10548
10549
10550		/*
10551		 *	...	the address is within bounds
10552		 */
10553
10554		end = start + size;
10555
10556		if ((start < map->min_offset) ||
10557		    (end > map->max_offset) ||
10558		    (start >= end)) {
10559			return(KERN_INVALID_ADDRESS);
10560		}
10561
10562		/*
10563		 *	...	the starting address isn't allocated
10564		 */
10565
10566		if (vm_map_lookup_entry(map, start, &temp_entry))
10567			return(KERN_NO_SPACE);
10568
10569		entry = temp_entry;
10570
10571		/*
10572		 *	...	the next region doesn't overlap the
10573		 *		end point.
10574		 */
10575
10576		if ((entry->vme_next != vm_map_to_entry(map)) &&
10577		    (entry->vme_next->vme_start < end))
10578			return(KERN_NO_SPACE);
10579	}
10580	*map_entry = entry;
10581	return(KERN_SUCCESS);
10582}
10583
10584/*
10585 *	vm_map_switch:
10586 *
10587 *	Set the address map for the current thread to the specified map
10588 */
10589
10590vm_map_t
10591vm_map_switch(
10592	vm_map_t	map)
10593{
10594	int		mycpu;
10595	thread_t	thread = current_thread();
10596	vm_map_t	oldmap = thread->map;
10597
10598	mp_disable_preemption();
10599	mycpu = cpu_number();
10600
10601	/*
10602	 *	Deactivate the current map and activate the requested map
10603	 */
10604	PMAP_SWITCH_USER(thread, map, mycpu);
10605
10606	mp_enable_preemption();
10607	return(oldmap);
10608}
10609
10610
10611/*
10612 *	Routine:	vm_map_write_user
10613 *
10614 *	Description:
10615 *		Copy out data from a kernel space into space in the
10616 *		destination map. The space must already exist in the
10617 *		destination map.
10618 *		NOTE:  This routine should only be called by threads
10619 *		which can block on a page fault. i.e. kernel mode user
10620 *		threads.
10621 *
10622 */
10623kern_return_t
10624vm_map_write_user(
10625	vm_map_t		map,
10626	void			*src_p,
10627	vm_map_address_t	dst_addr,
10628	vm_size_t		size)
10629{
10630	kern_return_t	kr = KERN_SUCCESS;
10631
10632	if(current_map() == map) {
10633		if (copyout(src_p, dst_addr, size)) {
10634			kr = KERN_INVALID_ADDRESS;
10635		}
10636	} else {
10637		vm_map_t	oldmap;
10638
10639		/* take on the identity of the target map while doing */
10640		/* the transfer */
10641
10642		vm_map_reference(map);
10643		oldmap = vm_map_switch(map);
10644		if (copyout(src_p, dst_addr, size)) {
10645			kr = KERN_INVALID_ADDRESS;
10646		}
10647		vm_map_switch(oldmap);
10648		vm_map_deallocate(map);
10649	}
10650	return kr;
10651}
10652
10653/*
10654 *	Routine:	vm_map_read_user
10655 *
10656 *	Description:
10657 *		Copy in data from a user space source map into the
10658 *		kernel map. The space must already exist in the
10659 *		kernel map.
10660 *		NOTE:  This routine should only be called by threads
10661 *		which can block on a page fault. i.e. kernel mode user
10662 *		threads.
10663 *
10664 */
10665kern_return_t
10666vm_map_read_user(
10667	vm_map_t		map,
10668	vm_map_address_t	src_addr,
10669	void			*dst_p,
10670	vm_size_t		size)
10671{
10672	kern_return_t	kr = KERN_SUCCESS;
10673
10674	if(current_map() == map) {
10675		if (copyin(src_addr, dst_p, size)) {
10676			kr = KERN_INVALID_ADDRESS;
10677		}
10678	} else {
10679		vm_map_t	oldmap;
10680
10681		/* take on the identity of the target map while doing */
10682		/* the transfer */
10683
10684		vm_map_reference(map);
10685		oldmap = vm_map_switch(map);
10686		if (copyin(src_addr, dst_p, size)) {
10687			kr = KERN_INVALID_ADDRESS;
10688		}
10689		vm_map_switch(oldmap);
10690		vm_map_deallocate(map);
10691	}
10692	return kr;
10693}
10694
10695
10696/*
10697 *	vm_map_check_protection:
10698 *
10699 *	Assert that the target map allows the specified
10700 *	privilege on the entire address region given.
10701 *	The entire region must be allocated.
10702 */
10703boolean_t
10704vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
10705			vm_map_offset_t end, vm_prot_t protection)
10706{
10707	vm_map_entry_t entry;
10708	vm_map_entry_t tmp_entry;
10709
10710	vm_map_lock(map);
10711
10712	if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
10713	{
10714		vm_map_unlock(map);
10715		return (FALSE);
10716	}
10717
10718	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
10719		vm_map_unlock(map);
10720		return(FALSE);
10721	}
10722
10723	entry = tmp_entry;
10724
10725	while (start < end) {
10726		if (entry == vm_map_to_entry(map)) {
10727			vm_map_unlock(map);
10728			return(FALSE);
10729		}
10730
10731		/*
10732		 *	No holes allowed!
10733		 */
10734
10735		if (start < entry->vme_start) {
10736			vm_map_unlock(map);
10737			return(FALSE);
10738		}
10739
10740		/*
10741		 * Check protection associated with entry.
10742		 */
10743
10744		if ((entry->protection & protection) != protection) {
10745			vm_map_unlock(map);
10746			return(FALSE);
10747		}
10748
10749		/* go to next entry */
10750
10751		start = entry->vme_end;
10752		entry = entry->vme_next;
10753	}
10754	vm_map_unlock(map);
10755	return(TRUE);
10756}
10757
10758kern_return_t
10759vm_map_purgable_control(
10760	vm_map_t		map,
10761	vm_map_offset_t		address,
10762	vm_purgable_t		control,
10763	int			*state)
10764{
10765	vm_map_entry_t		entry;
10766	vm_object_t		object;
10767	kern_return_t		kr;
10768
10769	/*
10770	 * Vet all the input parameters and current type and state of the
10771	 * underlaying object.  Return with an error if anything is amiss.
10772	 */
10773	if (map == VM_MAP_NULL)
10774		return(KERN_INVALID_ARGUMENT);
10775
10776	if (control != VM_PURGABLE_SET_STATE &&
10777	    control != VM_PURGABLE_GET_STATE)
10778		return(KERN_INVALID_ARGUMENT);
10779
10780	if (control == VM_PURGABLE_SET_STATE &&
10781	    (((*state & ~(VM_PURGABLE_STATE_MASK|VM_VOLATILE_ORDER_MASK|VM_PURGABLE_ORDERING_MASK|VM_PURGABLE_BEHAVIOR_MASK|VM_VOLATILE_GROUP_MASK)) != 0) ||
10782	     ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
10783		return(KERN_INVALID_ARGUMENT);
10784
10785	vm_map_lock(map);
10786
10787	if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
10788
10789		/*
10790		 * Must pass a valid non-submap address.
10791		 */
10792		vm_map_unlock(map);
10793		return(KERN_INVALID_ADDRESS);
10794	}
10795
10796	if ((entry->protection & VM_PROT_WRITE) == 0) {
10797		/*
10798		 * Can't apply purgable controls to something you can't write.
10799		 */
10800		vm_map_unlock(map);
10801		return(KERN_PROTECTION_FAILURE);
10802	}
10803
10804	object = entry->object.vm_object;
10805	if (object == VM_OBJECT_NULL) {
10806		/*
10807		 * Object must already be present or it can't be purgable.
10808		 */
10809		vm_map_unlock(map);
10810		return KERN_INVALID_ARGUMENT;
10811	}
10812
10813	vm_object_lock(object);
10814
10815	if (entry->offset != 0 ||
10816	    entry->vme_end - entry->vme_start != object->size) {
10817		/*
10818		 * Can only apply purgable controls to the whole (existing)
10819		 * object at once.
10820		 */
10821		vm_map_unlock(map);
10822		vm_object_unlock(object);
10823		return KERN_INVALID_ARGUMENT;
10824	}
10825
10826	vm_map_unlock(map);
10827
10828	kr = vm_object_purgable_control(object, control, state);
10829
10830	vm_object_unlock(object);
10831
10832	return kr;
10833}
10834
10835kern_return_t
10836vm_map_page_info(
10837	vm_map_t	target_map,
10838	vm_map_offset_t	offset,
10839	int		*disposition,
10840	int		*ref_count)
10841{
10842	vm_map_entry_t	map_entry;
10843	vm_object_t	object;
10844	vm_page_t	m;
10845	kern_return_t	kr;
10846	kern_return_t	retval = KERN_SUCCESS;
10847	boolean_t	top_object = TRUE;
10848
10849	*disposition = 0;
10850	*ref_count = 0;
10851
10852	vm_map_lock_read(target_map);
10853
10854restart_page_query:
10855	if (!vm_map_lookup_entry(target_map, offset, &map_entry)) {
10856		vm_map_unlock_read(target_map);
10857		return KERN_FAILURE;
10858	}
10859	offset -= map_entry->vme_start;  /* adjust to offset within entry */
10860	offset += map_entry->offset;	 /* adjust to target object offset */
10861
10862	if (map_entry->object.vm_object != VM_OBJECT_NULL) {
10863		if (!map_entry->is_sub_map) {
10864			object = map_entry->object.vm_object;
10865		} else {
10866		        vm_map_t sub_map;
10867
10868			sub_map = map_entry->object.sub_map;
10869			vm_map_lock_read(sub_map);
10870			vm_map_unlock_read(target_map);
10871
10872			target_map = sub_map;
10873			goto restart_page_query;
10874		}
10875	} else {
10876		vm_map_unlock_read(target_map);
10877		return KERN_SUCCESS;
10878	}
10879	vm_object_lock(object);
10880	vm_map_unlock_read(target_map);
10881
10882	while (TRUE) {
10883		m = vm_page_lookup(object, offset);
10884
10885		if (m != VM_PAGE_NULL) {
10886			*disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
10887			break;
10888		} else {
10889#if MACH_PAGEMAP
10890			if (object->existence_map) {
10891				if (vm_external_state_get(object->existence_map, offset)
10892				    == VM_EXTERNAL_STATE_EXISTS) {
10893					/*
10894					 * this page has been paged out
10895					 */
10896				        *disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
10897					break;
10898				}
10899			} else
10900#endif
10901				if (object->internal &&
10902				   object->alive &&
10903				   !object->terminating &&
10904				   object->pager_ready) {
10905
10906				memory_object_t pager;
10907
10908				vm_object_paging_begin(object);
10909				pager = object->pager;
10910				vm_object_unlock(object);
10911
10912				kr = memory_object_data_request(
10913					pager,
10914					offset + object->paging_offset,
10915					0,	/* just poke the pager */
10916					VM_PROT_READ,
10917					NULL);
10918
10919				vm_object_lock(object);
10920				vm_object_paging_end(object);
10921
10922				if (kr == KERN_SUCCESS) {
10923					/*
10924					 * the pager has this page
10925					 */
10926				        *disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
10927					break;
10928				}
10929			}
10930			if (object->shadow != VM_OBJECT_NULL) {
10931			        vm_object_t shadow;
10932
10933				offset += object->shadow_offset;
10934				shadow = object->shadow;
10935
10936				vm_object_lock(shadow);
10937				vm_object_unlock(object);
10938
10939				object = shadow;
10940				top_object = FALSE;
10941			} else {
10942			        if (!object->internal)
10943				        break;
10944
10945				retval = KERN_FAILURE;
10946				goto page_query_done;
10947			}
10948		}
10949	}
10950	/* The ref_count is not strictly accurate, it measures the number   */
10951	/* of entities holding a ref on the object, they may not be mapping */
10952	/* the object or may not be mapping the section holding the         */
10953	/* target page but its still a ball park number and though an over- */
10954	/* count, it picks up the copy-on-write cases                       */
10955
10956	/* We could also get a picture of page sharing from pmap_attributes */
10957	/* but this would under count as only faulted-in mappings would     */
10958	/* show up.							    */
10959
10960	*ref_count = object->ref_count;
10961
10962	if (top_object == TRUE && object->shadow)
10963	        *disposition |= VM_PAGE_QUERY_PAGE_COPIED;
10964
10965	if (m == VM_PAGE_NULL)
10966	        goto page_query_done;
10967
10968	if (m->fictitious) {
10969		*disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
10970		goto page_query_done;
10971	}
10972	if (m->dirty || pmap_is_modified(m->phys_page))
10973		*disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
10974
10975	if (m->reference || pmap_is_referenced(m->phys_page))
10976		*disposition |= VM_PAGE_QUERY_PAGE_REF;
10977
10978	if (m->speculative)
10979		*disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
10980
10981	if (m->cs_validated)
10982		*disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
10983	if (m->cs_tainted)
10984		*disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
10985
10986page_query_done:
10987	vm_object_unlock(object);
10988
10989	return retval;
10990}
10991
10992/*
10993 *	vm_map_msync
10994 *
10995 *	Synchronises the memory range specified with its backing store
10996 *	image by either flushing or cleaning the contents to the appropriate
10997 *	memory manager engaging in a memory object synchronize dialog with
10998 *	the manager.  The client doesn't return until the manager issues
10999 *	m_o_s_completed message.  MIG Magically converts user task parameter
11000 *	to the task's address map.
11001 *
11002 *	interpretation of sync_flags
11003 *	VM_SYNC_INVALIDATE	- discard pages, only return precious
11004 *				  pages to manager.
11005 *
11006 *	VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
11007 *				- discard pages, write dirty or precious
11008 *				  pages back to memory manager.
11009 *
11010 *	VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
11011 *				- write dirty or precious pages back to
11012 *				  the memory manager.
11013 *
11014 *	VM_SYNC_CONTIGUOUS	- does everything normally, but if there
11015 *				  is a hole in the region, and we would
11016 *				  have returned KERN_SUCCESS, return
11017 *				  KERN_INVALID_ADDRESS instead.
11018 *
11019 *	NOTE
11020 *	The memory object attributes have not yet been implemented, this
11021 *	function will have to deal with the invalidate attribute
11022 *
11023 *	RETURNS
11024 *	KERN_INVALID_TASK		Bad task parameter
11025 *	KERN_INVALID_ARGUMENT		both sync and async were specified.
11026 *	KERN_SUCCESS			The usual.
11027 *	KERN_INVALID_ADDRESS		There was a hole in the region.
11028 */
11029
11030kern_return_t
11031vm_map_msync(
11032	vm_map_t		map,
11033	vm_map_address_t	address,
11034	vm_map_size_t		size,
11035	vm_sync_t		sync_flags)
11036{
11037	msync_req_t		msr;
11038	msync_req_t		new_msr;
11039	queue_chain_t		req_q;	/* queue of requests for this msync */
11040	vm_map_entry_t		entry;
11041	vm_map_size_t		amount_left;
11042	vm_object_offset_t	offset;
11043	boolean_t		do_sync_req;
11044	boolean_t		modifiable;
11045	boolean_t		had_hole = FALSE;
11046	memory_object_t		pager;
11047
11048	if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
11049	    (sync_flags & VM_SYNC_SYNCHRONOUS))
11050		return(KERN_INVALID_ARGUMENT);
11051
11052	/*
11053	 * align address and size on page boundaries
11054	 */
11055	size = vm_map_round_page(address + size) - vm_map_trunc_page(address);
11056	address = vm_map_trunc_page(address);
11057
11058        if (map == VM_MAP_NULL)
11059                return(KERN_INVALID_TASK);
11060
11061	if (size == 0)
11062		return(KERN_SUCCESS);
11063
11064	queue_init(&req_q);
11065	amount_left = size;
11066
11067	while (amount_left > 0) {
11068		vm_object_size_t	flush_size;
11069		vm_object_t		object;
11070
11071		vm_map_lock(map);
11072		if (!vm_map_lookup_entry(map,
11073					 vm_map_trunc_page(address), &entry)) {
11074
11075			vm_map_size_t	skip;
11076
11077			/*
11078			 * hole in the address map.
11079			 */
11080			had_hole = TRUE;
11081
11082			/*
11083			 * Check for empty map.
11084			 */
11085			if (entry == vm_map_to_entry(map) &&
11086			    entry->vme_next == entry) {
11087				vm_map_unlock(map);
11088				break;
11089			}
11090			/*
11091			 * Check that we don't wrap and that
11092			 * we have at least one real map entry.
11093			 */
11094			if ((map->hdr.nentries == 0) ||
11095			    (entry->vme_next->vme_start < address)) {
11096				vm_map_unlock(map);
11097				break;
11098			}
11099			/*
11100			 * Move up to the next entry if needed
11101			 */
11102			skip = (entry->vme_next->vme_start - address);
11103			if (skip >= amount_left)
11104				amount_left = 0;
11105			else
11106				amount_left -= skip;
11107			address = entry->vme_next->vme_start;
11108			vm_map_unlock(map);
11109			continue;
11110		}
11111
11112		offset = address - entry->vme_start;
11113
11114		/*
11115		 * do we have more to flush than is contained in this
11116		 * entry ?
11117		 */
11118		if (amount_left + entry->vme_start + offset > entry->vme_end) {
11119			flush_size = entry->vme_end -
11120				(entry->vme_start + offset);
11121		} else {
11122			flush_size = amount_left;
11123		}
11124		amount_left -= flush_size;
11125		address += flush_size;
11126
11127		if (entry->is_sub_map == TRUE) {
11128			vm_map_t	local_map;
11129			vm_map_offset_t	local_offset;
11130
11131			local_map = entry->object.sub_map;
11132			local_offset = entry->offset;
11133			vm_map_unlock(map);
11134			if (vm_map_msync(
11135				    local_map,
11136				    local_offset,
11137				    flush_size,
11138				    sync_flags) == KERN_INVALID_ADDRESS) {
11139				had_hole = TRUE;
11140			}
11141			continue;
11142		}
11143		object = entry->object.vm_object;
11144
11145		/*
11146		 * We can't sync this object if the object has not been
11147		 * created yet
11148		 */
11149		if (object == VM_OBJECT_NULL) {
11150			vm_map_unlock(map);
11151			continue;
11152		}
11153		offset += entry->offset;
11154		modifiable = (entry->protection & VM_PROT_WRITE)
11155			!= VM_PROT_NONE;
11156
11157                vm_object_lock(object);
11158
11159		if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
11160		        boolean_t kill_pages = 0;
11161
11162			if (sync_flags & VM_SYNC_KILLPAGES) {
11163			        if (object->ref_count == 1 && !entry->needs_copy && !object->shadow)
11164				        kill_pages = 1;
11165				else
11166				        kill_pages = -1;
11167			}
11168			if (kill_pages != -1)
11169			        vm_object_deactivate_pages(object, offset,
11170							   (vm_object_size_t)flush_size, kill_pages);
11171			vm_object_unlock(object);
11172			vm_map_unlock(map);
11173			continue;
11174		}
11175		/*
11176		 * We can't sync this object if there isn't a pager.
11177		 * Don't bother to sync internal objects, since there can't
11178		 * be any "permanent" storage for these objects anyway.
11179		 */
11180		if ((object->pager == MEMORY_OBJECT_NULL) ||
11181		    (object->internal) || (object->private)) {
11182			vm_object_unlock(object);
11183			vm_map_unlock(map);
11184			continue;
11185		}
11186		/*
11187		 * keep reference on the object until syncing is done
11188		 */
11189		vm_object_reference_locked(object);
11190		vm_object_unlock(object);
11191
11192		vm_map_unlock(map);
11193
11194		do_sync_req = vm_object_sync(object,
11195					     offset,
11196					     flush_size,
11197					     sync_flags & VM_SYNC_INVALIDATE,
11198					     (modifiable &&
11199					      (sync_flags & VM_SYNC_SYNCHRONOUS ||
11200					       sync_flags & VM_SYNC_ASYNCHRONOUS)),
11201					     sync_flags & VM_SYNC_SYNCHRONOUS);
11202		/*
11203		 * only send a m_o_s if we returned pages or if the entry
11204		 * is writable (ie dirty pages may have already been sent back)
11205		 */
11206		if (!do_sync_req && !modifiable) {
11207			if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
11208				/*
11209				 * clear out the clustering and read-ahead hints
11210				 */
11211				vm_object_lock(object);
11212
11213				object->pages_created = 0;
11214				object->pages_used = 0;
11215				object->sequential = 0;
11216				object->last_alloc = 0;
11217
11218				vm_object_unlock(object);
11219			}
11220			vm_object_deallocate(object);
11221			continue;
11222		}
11223		msync_req_alloc(new_msr);
11224
11225                vm_object_lock(object);
11226		offset += object->paging_offset;
11227
11228		new_msr->offset = offset;
11229		new_msr->length = flush_size;
11230		new_msr->object = object;
11231		new_msr->flag = VM_MSYNC_SYNCHRONIZING;
11232	re_iterate:
11233
11234		/*
11235		 * We can't sync this object if there isn't a pager.  The
11236		 * pager can disappear anytime we're not holding the object
11237		 * lock.  So this has to be checked anytime we goto re_iterate.
11238		 */
11239
11240		pager = object->pager;
11241
11242		if (pager == MEMORY_OBJECT_NULL) {
11243			vm_object_unlock(object);
11244			vm_object_deallocate(object);
11245			continue;
11246		}
11247
11248		queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
11249			/*
11250			 * need to check for overlapping entry, if found, wait
11251			 * on overlapping msr to be done, then reiterate
11252			 */
11253			msr_lock(msr);
11254			if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
11255			    ((offset >= msr->offset &&
11256			      offset < (msr->offset + msr->length)) ||
11257			     (msr->offset >= offset &&
11258			      msr->offset < (offset + flush_size))))
11259			{
11260				assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
11261				msr_unlock(msr);
11262				vm_object_unlock(object);
11263				thread_block(THREAD_CONTINUE_NULL);
11264				vm_object_lock(object);
11265				goto re_iterate;
11266			}
11267			msr_unlock(msr);
11268		}/* queue_iterate */
11269
11270		queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
11271
11272		vm_object_paging_begin(object);
11273		vm_object_unlock(object);
11274
11275		queue_enter(&req_q, new_msr, msync_req_t, req_q);
11276
11277		(void) memory_object_synchronize(
11278			pager,
11279			offset,
11280			flush_size,
11281			sync_flags & ~VM_SYNC_CONTIGUOUS);
11282
11283		vm_object_lock(object);
11284		vm_object_paging_end(object);
11285		vm_object_unlock(object);
11286	}/* while */
11287
11288	/*
11289	 * wait for memory_object_sychronize_completed messages from pager(s)
11290	 */
11291
11292	while (!queue_empty(&req_q)) {
11293		msr = (msync_req_t)queue_first(&req_q);
11294		msr_lock(msr);
11295		while(msr->flag != VM_MSYNC_DONE) {
11296			assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
11297			msr_unlock(msr);
11298			thread_block(THREAD_CONTINUE_NULL);
11299			msr_lock(msr);
11300		}/* while */
11301		queue_remove(&req_q, msr, msync_req_t, req_q);
11302		msr_unlock(msr);
11303		vm_object_deallocate(msr->object);
11304		msync_req_free(msr);
11305	}/* queue_iterate */
11306
11307	/* for proper msync() behaviour */
11308	if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
11309		return(KERN_INVALID_ADDRESS);
11310
11311	return(KERN_SUCCESS);
11312}/* vm_msync */
11313
11314/*
11315 *	Routine:	convert_port_entry_to_map
11316 *	Purpose:
11317 *		Convert from a port specifying an entry or a task
11318 *		to a map. Doesn't consume the port ref; produces a map ref,
11319 *		which may be null.  Unlike convert_port_to_map, the
11320 *		port may be task or a named entry backed.
11321 *	Conditions:
11322 *		Nothing locked.
11323 */
11324
11325
11326vm_map_t
11327convert_port_entry_to_map(
11328	ipc_port_t	port)
11329{
11330	vm_map_t map;
11331	vm_named_entry_t	named_entry;
11332	uint32_t	try_failed_count = 0;
11333
11334	if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
11335		while(TRUE) {
11336			ip_lock(port);
11337			if(ip_active(port) && (ip_kotype(port)
11338					       == IKOT_NAMED_ENTRY)) {
11339				named_entry =
11340					(vm_named_entry_t)port->ip_kobject;
11341				if (!(mutex_try(&(named_entry)->Lock))) {
11342                       			ip_unlock(port);
11343
11344					try_failed_count++;
11345                       			mutex_pause(try_failed_count);
11346                       			continue;
11347                		}
11348				named_entry->ref_count++;
11349				mutex_unlock(&(named_entry)->Lock);
11350				ip_unlock(port);
11351				if ((named_entry->is_sub_map) &&
11352				    (named_entry->protection
11353				     & VM_PROT_WRITE)) {
11354					map = named_entry->backing.map;
11355				} else {
11356					mach_destroy_memory_entry(port);
11357					return VM_MAP_NULL;
11358				}
11359				vm_map_reference_swap(map);
11360				mach_destroy_memory_entry(port);
11361				break;
11362			}
11363			else
11364				return VM_MAP_NULL;
11365		}
11366	}
11367	else
11368		map = convert_port_to_map(port);
11369
11370	return map;
11371}
11372
11373/*
11374 *	Routine:	convert_port_entry_to_object
11375 *	Purpose:
11376 *		Convert from a port specifying a named entry to an
11377 *		object. Doesn't consume the port ref; produces a map ref,
11378 *		which may be null.
11379 *	Conditions:
11380 *		Nothing locked.
11381 */
11382
11383
11384vm_object_t
11385convert_port_entry_to_object(
11386	ipc_port_t	port)
11387{
11388	vm_object_t object;
11389	vm_named_entry_t	named_entry;
11390	uint32_t	try_failed_count = 0;
11391
11392	if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
11393		while(TRUE) {
11394			ip_lock(port);
11395			if(ip_active(port) && (ip_kotype(port)
11396					       == IKOT_NAMED_ENTRY)) {
11397				named_entry =
11398					(vm_named_entry_t)port->ip_kobject;
11399				if (!(mutex_try(&(named_entry)->Lock))) {
11400                       			ip_unlock(port);
11401
11402					try_failed_count++;
11403                       			mutex_pause(try_failed_count);
11404                       			continue;
11405                		}
11406				named_entry->ref_count++;
11407				mutex_unlock(&(named_entry)->Lock);
11408				ip_unlock(port);
11409				if ((!named_entry->is_sub_map) &&
11410				    (!named_entry->is_pager) &&
11411				    (named_entry->protection
11412				     & VM_PROT_WRITE)) {
11413					object = named_entry->backing.object;
11414				} else {
11415					mach_destroy_memory_entry(port);
11416					return (vm_object_t)NULL;
11417				}
11418				vm_object_reference(named_entry->backing.object);
11419				mach_destroy_memory_entry(port);
11420				break;
11421			}
11422			else
11423				return (vm_object_t)NULL;
11424		}
11425	} else {
11426		return (vm_object_t)NULL;
11427	}
11428
11429	return object;
11430}
11431
11432/*
11433 * Export routines to other components for the things we access locally through
11434 * macros.
11435 */
11436#undef current_map
11437vm_map_t
11438current_map(void)
11439{
11440	return (current_map_fast());
11441}
11442
11443/*
11444 *	vm_map_reference:
11445 *
11446 *	Most code internal to the osfmk will go through a
11447 *	macro defining this.  This is always here for the
11448 *	use of other kernel components.
11449 */
11450#undef vm_map_reference
11451void
11452vm_map_reference(
11453	register vm_map_t	map)
11454{
11455	if (map == VM_MAP_NULL)
11456		return;
11457
11458	mutex_lock(&map->s_lock);
11459#if	TASK_SWAPPER
11460	assert(map->res_count > 0);
11461	assert(map->ref_count >= map->res_count);
11462	map->res_count++;
11463#endif
11464	map->ref_count++;
11465	mutex_unlock(&map->s_lock);
11466}
11467
11468/*
11469 *	vm_map_deallocate:
11470 *
11471 *	Removes a reference from the specified map,
11472 *	destroying it if no references remain.
11473 *	The map should not be locked.
11474 */
11475void
11476vm_map_deallocate(
11477	register vm_map_t	map)
11478{
11479	unsigned int		ref;
11480
11481	if (map == VM_MAP_NULL)
11482		return;
11483
11484	mutex_lock(&map->s_lock);
11485	ref = --map->ref_count;
11486	if (ref > 0) {
11487		vm_map_res_deallocate(map);
11488		mutex_unlock(&map->s_lock);
11489		return;
11490	}
11491	assert(map->ref_count == 0);
11492	mutex_unlock(&map->s_lock);
11493
11494#if	TASK_SWAPPER
11495	/*
11496	 * The map residence count isn't decremented here because
11497	 * the vm_map_delete below will traverse the entire map,
11498	 * deleting entries, and the residence counts on objects
11499	 * and sharing maps will go away then.
11500	 */
11501#endif
11502
11503	vm_map_destroy(map, VM_MAP_NO_FLAGS);
11504}
11505
11506
11507void
11508vm_map_disable_NX(vm_map_t map)
11509{
11510        if (map == NULL)
11511	        return;
11512        if (map->pmap == NULL)
11513	        return;
11514
11515        pmap_disable_NX(map->pmap);
11516}
11517
11518/* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
11519 * more descriptive.
11520 */
11521void
11522vm_map_set_32bit(vm_map_t map)
11523{
11524	map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
11525}
11526
11527
11528void
11529vm_map_set_64bit(vm_map_t map)
11530{
11531	map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
11532}
11533
11534vm_map_offset_t
11535vm_compute_max_offset(unsigned is64)
11536{
11537	return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
11538}
11539
11540boolean_t
11541vm_map_is_64bit(
11542		vm_map_t map)
11543{
11544	return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
11545}
11546
11547boolean_t
11548vm_map_has_4GB_pagezero(
11549		vm_map_t map)
11550{
11551	/*
11552	 * XXX FBDP
11553	 * We should lock the VM map (for read) here but we can get away
11554	 * with it for now because there can't really be any race condition:
11555	 * the VM map's min_offset is changed only when the VM map is created
11556	 * and when the zero page is established (when the binary gets loaded),
11557	 * and this routine gets called only when the task terminates and the
11558	 * VM map is being torn down, and when a new map is created via
11559	 * load_machfile()/execve().
11560	 */
11561	return (map->min_offset >= 0x100000000ULL);
11562}
11563
11564void
11565vm_map_set_4GB_pagezero(vm_map_t map)
11566{
11567	pmap_set_4GB_pagezero(map->pmap);
11568}
11569
11570void
11571vm_map_clear_4GB_pagezero(vm_map_t map)
11572{
11573	pmap_clear_4GB_pagezero(map->pmap);
11574}
11575
11576/*
11577 * Raise a VM map's minimum offset.
11578 * To strictly enforce "page zero" reservation.
11579 */
11580kern_return_t
11581vm_map_raise_min_offset(
11582	vm_map_t	map,
11583	vm_map_offset_t	new_min_offset)
11584{
11585	vm_map_entry_t	first_entry;
11586
11587	new_min_offset = vm_map_round_page(new_min_offset);
11588
11589	vm_map_lock(map);
11590
11591	if (new_min_offset < map->min_offset) {
11592		/*
11593		 * Can't move min_offset backwards, as that would expose
11594		 * a part of the address space that was previously, and for
11595		 * possibly good reasons, inaccessible.
11596		 */
11597		vm_map_unlock(map);
11598		return KERN_INVALID_ADDRESS;
11599	}
11600
11601	first_entry = vm_map_first_entry(map);
11602	if (first_entry != vm_map_to_entry(map) &&
11603	    first_entry->vme_start < new_min_offset) {
11604		/*
11605		 * Some memory was already allocated below the new
11606		 * minimun offset.  It's too late to change it now...
11607		 */
11608		vm_map_unlock(map);
11609		return KERN_NO_SPACE;
11610	}
11611
11612	map->min_offset = new_min_offset;
11613
11614	vm_map_unlock(map);
11615
11616	return KERN_SUCCESS;
11617}
11618
11619/*
11620 * Set the limit on the maximum amount of user wired memory allowed for this map.
11621 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
11622 * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
11623 * don't have to reach over to the BSD data structures.
11624 */
11625
11626void
11627vm_map_set_user_wire_limit(vm_map_t 	map,
11628			   vm_size_t	limit)
11629{
11630	map->user_wire_limit = limit;
11631}
11632
11633void		vm_map_set_prot_copy_allow(vm_map_t		map,
11634					   boolean_t		allow)
11635{
11636	vm_map_lock(map);
11637	map->prot_copy_allow = allow;
11638	vm_map_unlock(map);
11639};
11640