1/*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
49 *  School of Computer Science
50 *  Carnegie Mellon University
51 *  Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 *	File:	vm/vm_user.c
60 *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 *	User-exported virtual memory functions.
63 */
64
65/*
66 * There are three implementations of the "XXX_allocate" functionality in
67 * the kernel: mach_vm_allocate (for any task on the platform), vm_allocate
68 * (for a task with the same address space size, especially the current task),
69 * and vm32_vm_allocate (for the specific case of a 32-bit task). vm_allocate
70 * in the kernel should only be used on the kernel_task. vm32_vm_allocate only
71 * makes sense on platforms where a user task can either be 32 or 64, or the kernel
72 * task can be 32 or 64. mach_vm_allocate makes sense everywhere, and is preferred
73 * for new code.
74 *
75 * The entrypoints into the kernel are more complex. All platforms support a
76 * mach_vm_allocate-style API (subsystem 4800) which operates with the largest
77 * size types for the platform. On platforms that only support U32/K32,
78 * subsystem 4800 is all you need. On platforms that support both U32 and U64,
79 * subsystem 3800 is used disambiguate the size of parameters, and they will
80 * always be 32-bit and call into the vm32_vm_allocate APIs. On non-U32/K32 platforms,
81 * the MIG glue should never call into vm_allocate directly, because the calling
82 * task and kernel_task are unlikely to use the same size parameters
83 *
84 * New VM call implementations should be added here and to mach_vm.defs
85 * (subsystem 4800), and use mach_vm_* "wide" types.
86 */
87
88#include <debug.h>
89
90#include <vm_cpm.h>
91#include <mach/boolean.h>
92#include <mach/kern_return.h>
93#include <mach/mach_types.h>	/* to get vm_address_t */
94#include <mach/memory_object.h>
95#include <mach/std_types.h>	/* to get pointer_t */
96#include <mach/upl.h>
97#include <mach/vm_attributes.h>
98#include <mach/vm_param.h>
99#include <mach/vm_statistics.h>
100#include <mach/mach_syscalls.h>
101
102#include <mach/host_priv_server.h>
103#include <mach/mach_vm_server.h>
104#include <mach/vm_map_server.h>
105
106#include <kern/host.h>
107#include <kern/kalloc.h>
108#include <kern/task.h>
109#include <kern/misc_protos.h>
110#include <vm/vm_fault.h>
111#include <vm/vm_map.h>
112#include <vm/vm_object.h>
113#include <vm/vm_page.h>
114#include <vm/memory_object.h>
115#include <vm/vm_pageout.h>
116#include <vm/vm_protos.h>
117
118vm_size_t        upl_offset_to_pagelist = 0;
119
120#if	VM_CPM
121#include <vm/cpm.h>
122#endif	/* VM_CPM */
123
124ipc_port_t	dynamic_pager_control_port=NULL;
125
126/*
127 *	mach_vm_allocate allocates "zero fill" memory in the specfied
128 *	map.
129 */
130kern_return_t
131mach_vm_allocate(
132	vm_map_t		map,
133	mach_vm_offset_t	*addr,
134	mach_vm_size_t	size,
135	int			flags)
136{
137	vm_map_offset_t map_addr;
138	vm_map_size_t	map_size;
139	kern_return_t	result;
140	boolean_t	anywhere;
141
142	/* filter out any kernel-only flags */
143   	if (flags & ~VM_FLAGS_USER_ALLOCATE)
144		return KERN_INVALID_ARGUMENT;
145
146	if (map == VM_MAP_NULL)
147		return(KERN_INVALID_ARGUMENT);
148
149	if (size == 0) {
150		*addr = 0;
151		return(KERN_SUCCESS);
152	}
153
154	anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
155	if (anywhere) {
156		/*
157		 * No specific address requested, so start candidate address
158		 * search at the minimum address in the map.  However, if that
159		 * minimum is 0, bump it up by PAGE_SIZE.  We want to limit
160		 * allocations of PAGEZERO to explicit requests since its
161		 * normal use is to catch dereferences of NULL and many
162		 * applications also treat pointers with a value of 0 as
163		 * special and suddenly having address 0 contain useable
164		 * memory would tend to confuse those applications.
165		 */
166		map_addr = vm_map_min(map);
167		if (map_addr == 0)
168			map_addr += PAGE_SIZE;
169	} else
170		map_addr = vm_map_trunc_page(*addr);
171	map_size = vm_map_round_page(size);
172	if (map_size == 0) {
173	  return(KERN_INVALID_ARGUMENT);
174	}
175
176	result = vm_map_enter(
177			map,
178			&map_addr,
179			map_size,
180			(vm_map_offset_t)0,
181			flags,
182			VM_OBJECT_NULL,
183			(vm_object_offset_t)0,
184			FALSE,
185			VM_PROT_DEFAULT,
186			VM_PROT_ALL,
187			VM_INHERIT_DEFAULT);
188
189	*addr = map_addr;
190	return(result);
191}
192
193/*
194 *	vm_allocate
195 *	Legacy routine that allocates "zero fill" memory in the specfied
196 *	map (which is limited to the same size as the kernel).
197 */
198kern_return_t
199vm_allocate(
200	vm_map_t	map,
201	vm_offset_t	*addr,
202	vm_size_t	size,
203	int		flags)
204{
205	vm_map_offset_t map_addr;
206	vm_map_size_t	map_size;
207	kern_return_t	result;
208	boolean_t	anywhere;
209
210	/* filter out any kernel-only flags */
211	if (flags & ~VM_FLAGS_USER_ALLOCATE)
212		return KERN_INVALID_ARGUMENT;
213
214	if (map == VM_MAP_NULL)
215		return(KERN_INVALID_ARGUMENT);
216	if (size == 0) {
217		*addr = 0;
218		return(KERN_SUCCESS);
219	}
220
221	anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
222	if (anywhere) {
223		/*
224		 * No specific address requested, so start candidate address
225		 * search at the minimum address in the map.  However, if that
226		 * minimum is 0, bump it up by PAGE_SIZE.  We want to limit
227		 * allocations of PAGEZERO to explicit requests since its
228		 * normal use is to catch dereferences of NULL and many
229		 * applications also treat pointers with a value of 0 as
230		 * special and suddenly having address 0 contain useable
231		 * memory would tend to confuse those applications.
232		 */
233		map_addr = vm_map_min(map);
234		if (map_addr == 0)
235			map_addr += PAGE_SIZE;
236	} else
237		map_addr = vm_map_trunc_page(*addr);
238	map_size = vm_map_round_page(size);
239	if (map_size == 0) {
240	  return(KERN_INVALID_ARGUMENT);
241	}
242
243	result = vm_map_enter(
244			map,
245			&map_addr,
246			map_size,
247			(vm_map_offset_t)0,
248			flags,
249			VM_OBJECT_NULL,
250			(vm_object_offset_t)0,
251			FALSE,
252			VM_PROT_DEFAULT,
253			VM_PROT_ALL,
254			VM_INHERIT_DEFAULT);
255
256	*addr = CAST_DOWN(vm_offset_t, map_addr);
257	return(result);
258}
259
260/*
261 *	mach_vm_deallocate -
262 *	deallocates the specified range of addresses in the
263 *	specified address map.
264 */
265kern_return_t
266mach_vm_deallocate(
267	vm_map_t		map,
268	mach_vm_offset_t	start,
269	mach_vm_size_t	size)
270{
271	if ((map == VM_MAP_NULL) || (start + size < start))
272		return(KERN_INVALID_ARGUMENT);
273
274	if (size == (mach_vm_offset_t) 0)
275		return(KERN_SUCCESS);
276
277	return(vm_map_remove(map, vm_map_trunc_page(start),
278			     vm_map_round_page(start+size), VM_MAP_NO_FLAGS));
279}
280
281/*
282 *	vm_deallocate -
283 *	deallocates the specified range of addresses in the
284 *	specified address map (limited to addresses the same
285 *	size as the kernel).
286 */
287kern_return_t
288vm_deallocate(
289	register vm_map_t	map,
290	vm_offset_t		start,
291	vm_size_t		size)
292{
293	if ((map == VM_MAP_NULL) || (start + size < start))
294		return(KERN_INVALID_ARGUMENT);
295
296	if (size == (vm_offset_t) 0)
297		return(KERN_SUCCESS);
298
299	return(vm_map_remove(map, vm_map_trunc_page(start),
300			     vm_map_round_page(start+size), VM_MAP_NO_FLAGS));
301}
302
303/*
304 *	mach_vm_inherit -
305 *	Sets the inheritance of the specified range in the
306 *	specified map.
307 */
308kern_return_t
309mach_vm_inherit(
310	vm_map_t		map,
311	mach_vm_offset_t	start,
312	mach_vm_size_t	size,
313	vm_inherit_t		new_inheritance)
314{
315	if ((map == VM_MAP_NULL) || (start + size < start) ||
316	    (new_inheritance > VM_INHERIT_LAST_VALID))
317                return(KERN_INVALID_ARGUMENT);
318
319	if (size == 0)
320		return KERN_SUCCESS;
321
322	return(vm_map_inherit(map,
323			      vm_map_trunc_page(start),
324			      vm_map_round_page(start+size),
325			      new_inheritance));
326}
327
328/*
329 *	vm_inherit -
330 *	Sets the inheritance of the specified range in the
331 *	specified map (range limited to addresses
332 */
333kern_return_t
334vm_inherit(
335	register vm_map_t	map,
336	vm_offset_t		start,
337	vm_size_t		size,
338	vm_inherit_t		new_inheritance)
339{
340	if ((map == VM_MAP_NULL) || (start + size < start) ||
341	    (new_inheritance > VM_INHERIT_LAST_VALID))
342                return(KERN_INVALID_ARGUMENT);
343
344	if (size == 0)
345		return KERN_SUCCESS;
346
347	return(vm_map_inherit(map,
348			      vm_map_trunc_page(start),
349			      vm_map_round_page(start+size),
350			      new_inheritance));
351}
352
353/*
354 *	mach_vm_protect -
355 *	Sets the protection of the specified range in the
356 *	specified map.
357 */
358
359kern_return_t
360mach_vm_protect(
361	vm_map_t		map,
362	mach_vm_offset_t	start,
363	mach_vm_size_t	size,
364	boolean_t		set_maximum,
365	vm_prot_t		new_protection)
366{
367	if ((map == VM_MAP_NULL) || (start + size < start) ||
368	    (new_protection & ~(VM_PROT_ALL | VM_PROT_COPY)))
369		return(KERN_INVALID_ARGUMENT);
370
371	if (size == 0)
372		return KERN_SUCCESS;
373
374	return(vm_map_protect(map,
375			      vm_map_trunc_page(start),
376			      vm_map_round_page(start+size),
377			      new_protection,
378			      set_maximum));
379}
380
381/*
382 *	vm_protect -
383 *	Sets the protection of the specified range in the
384 *	specified map. Addressability of the range limited
385 *	to the same size as the kernel.
386 */
387
388kern_return_t
389vm_protect(
390	vm_map_t		map,
391	vm_offset_t		start,
392	vm_size_t		size,
393	boolean_t		set_maximum,
394	vm_prot_t		new_protection)
395{
396	if ((map == VM_MAP_NULL) || (start + size < start) ||
397	    (new_protection & ~(VM_PROT_ALL | VM_PROT_COPY)))
398		return(KERN_INVALID_ARGUMENT);
399
400	if (size == 0)
401		return KERN_SUCCESS;
402
403	return(vm_map_protect(map,
404			      vm_map_trunc_page(start),
405			      vm_map_round_page(start+size),
406			      new_protection,
407			      set_maximum));
408}
409
410/*
411 * mach_vm_machine_attributes -
412 * Handle machine-specific attributes for a mapping, such
413 * as cachability, migrability, etc.
414 */
415kern_return_t
416mach_vm_machine_attribute(
417	vm_map_t			map,
418	mach_vm_address_t		addr,
419	mach_vm_size_t		size,
420	vm_machine_attribute_t	attribute,
421	vm_machine_attribute_val_t* value)		/* IN/OUT */
422{
423	if ((map == VM_MAP_NULL) || (addr + size < addr))
424		return(KERN_INVALID_ARGUMENT);
425
426	if (size == 0)
427		return KERN_SUCCESS;
428
429	return vm_map_machine_attribute(map,
430				vm_map_trunc_page(addr),
431				vm_map_round_page(addr+size),
432				attribute,
433				value);
434}
435
436/*
437 * vm_machine_attribute -
438 * Handle machine-specific attributes for a mapping, such
439 * as cachability, migrability, etc. Limited addressability
440 * (same range limits as for the native kernel map).
441 */
442kern_return_t
443vm_machine_attribute(
444	vm_map_t	map,
445	vm_address_t	addr,
446	vm_size_t	size,
447	vm_machine_attribute_t	attribute,
448	vm_machine_attribute_val_t* value)		/* IN/OUT */
449{
450	if ((map == VM_MAP_NULL) || (addr + size < addr))
451		return(KERN_INVALID_ARGUMENT);
452
453	if (size == 0)
454		return KERN_SUCCESS;
455
456	return vm_map_machine_attribute(map,
457				vm_map_trunc_page(addr),
458				vm_map_round_page(addr+size),
459				attribute,
460				value);
461}
462
463/*
464 * mach_vm_read -
465 * Read/copy a range from one address space and return it to the caller.
466 *
467 * It is assumed that the address for the returned memory is selected by
468 * the IPC implementation as part of receiving the reply to this call.
469 * If IPC isn't used, the caller must deal with the vm_map_copy_t object
470 * that gets returned.
471 *
472 * JMM - because of mach_msg_type_number_t, this call is limited to a
473 * single 4GB region at this time.
474 *
475 */
476kern_return_t
477mach_vm_read(
478	vm_map_t		map,
479	mach_vm_address_t	addr,
480	mach_vm_size_t	size,
481	pointer_t		*data,
482	mach_msg_type_number_t	*data_size)
483{
484	kern_return_t	error;
485	vm_map_copy_t	ipc_address;
486
487	if (map == VM_MAP_NULL)
488		return(KERN_INVALID_ARGUMENT);
489
490	if ((mach_msg_type_number_t) size != size)
491		return KERN_INVALID_ARGUMENT;
492
493	error = vm_map_copyin(map,
494			(vm_map_address_t)addr,
495			(vm_map_size_t)size,
496			FALSE,	/* src_destroy */
497			&ipc_address);
498
499	if (KERN_SUCCESS == error) {
500		*data = (pointer_t) ipc_address;
501		*data_size = (mach_msg_type_number_t) size;
502		assert(*data_size == size);
503	}
504	return(error);
505}
506
507/*
508 * vm_read -
509 * Read/copy a range from one address space and return it to the caller.
510 * Limited addressability (same range limits as for the native kernel map).
511 *
512 * It is assumed that the address for the returned memory is selected by
513 * the IPC implementation as part of receiving the reply to this call.
514 * If IPC isn't used, the caller must deal with the vm_map_copy_t object
515 * that gets returned.
516 */
517kern_return_t
518vm_read(
519	vm_map_t		map,
520	vm_address_t		addr,
521	vm_size_t		size,
522	pointer_t		*data,
523	mach_msg_type_number_t	*data_size)
524{
525	kern_return_t	error;
526	vm_map_copy_t	ipc_address;
527
528	if (map == VM_MAP_NULL)
529		return(KERN_INVALID_ARGUMENT);
530
531	if (size > (unsigned)(mach_msg_type_number_t) -1) {
532		/*
533		 * The kernel could handle a 64-bit "size" value, but
534		 * it could not return the size of the data in "*data_size"
535		 * without overflowing.
536		 * Let's reject this "size" as invalid.
537		 */
538		return KERN_INVALID_ARGUMENT;
539	}
540
541	error = vm_map_copyin(map,
542			(vm_map_address_t)addr,
543			(vm_map_size_t)size,
544			FALSE,	/* src_destroy */
545			&ipc_address);
546
547	if (KERN_SUCCESS == error) {
548		*data = (pointer_t) ipc_address;
549		*data_size = (mach_msg_type_number_t) size;
550		assert(*data_size == size);
551	}
552	return(error);
553}
554
555/*
556 * mach_vm_read_list -
557 * Read/copy a list of address ranges from specified map.
558 *
559 * MIG does not know how to deal with a returned array of
560 * vm_map_copy_t structures, so we have to do the copyout
561 * manually here.
562 */
563kern_return_t
564mach_vm_read_list(
565	vm_map_t			map,
566	mach_vm_read_entry_t		data_list,
567	natural_t			count)
568{
569	mach_msg_type_number_t	i;
570	kern_return_t	error;
571	vm_map_copy_t	copy;
572
573	if (map == VM_MAP_NULL ||
574	    count > VM_MAP_ENTRY_MAX)
575		return(KERN_INVALID_ARGUMENT);
576
577	error = KERN_SUCCESS;
578	for(i=0; i<count; i++) {
579		vm_map_address_t map_addr;
580		vm_map_size_t map_size;
581
582		map_addr = (vm_map_address_t)(data_list[i].address);
583		map_size = (vm_map_size_t)(data_list[i].size);
584
585		if(map_size != 0) {
586			error = vm_map_copyin(map,
587					map_addr,
588					map_size,
589					FALSE,	/* src_destroy */
590					&copy);
591			if (KERN_SUCCESS == error) {
592				error = vm_map_copyout(
593						current_task()->map,
594						&map_addr,
595						copy);
596				if (KERN_SUCCESS == error) {
597					data_list[i].address = map_addr;
598					continue;
599				}
600				vm_map_copy_discard(copy);
601			}
602		}
603		data_list[i].address = (mach_vm_address_t)0;
604		data_list[i].size = (mach_vm_size_t)0;
605	}
606	return(error);
607}
608
609/*
610 * vm_read_list -
611 * Read/copy a list of address ranges from specified map.
612 *
613 * MIG does not know how to deal with a returned array of
614 * vm_map_copy_t structures, so we have to do the copyout
615 * manually here.
616 *
617 * The source and destination ranges are limited to those
618 * that can be described with a vm_address_t (i.e. same
619 * size map as the kernel).
620 *
621 * JMM - If the result of the copyout is an address range
622 * that cannot be described with a vm_address_t (i.e. the
623 * caller had a larger address space but used this call
624 * anyway), it will result in a truncated address being
625 * returned (and a likely confused caller).
626 */
627
628kern_return_t
629vm_read_list(
630	vm_map_t		map,
631	vm_read_entry_t	data_list,
632	natural_t		count)
633{
634	mach_msg_type_number_t	i;
635	kern_return_t	error;
636	vm_map_copy_t	copy;
637
638	if (map == VM_MAP_NULL ||
639	    count > VM_MAP_ENTRY_MAX)
640		return(KERN_INVALID_ARGUMENT);
641
642	error = KERN_SUCCESS;
643	for(i=0; i<count; i++) {
644		vm_map_address_t map_addr;
645		vm_map_size_t map_size;
646
647		map_addr = (vm_map_address_t)(data_list[i].address);
648		map_size = (vm_map_size_t)(data_list[i].size);
649
650		if(map_size != 0) {
651			error = vm_map_copyin(map,
652					map_addr,
653					map_size,
654					FALSE,	/* src_destroy */
655					&copy);
656			if (KERN_SUCCESS == error) {
657				error = vm_map_copyout(current_task()->map,
658						&map_addr,
659						copy);
660				if (KERN_SUCCESS == error) {
661					data_list[i].address =
662						CAST_DOWN(vm_offset_t, map_addr);
663					continue;
664				}
665				vm_map_copy_discard(copy);
666			}
667		}
668		data_list[i].address = (mach_vm_address_t)0;
669		data_list[i].size = (mach_vm_size_t)0;
670	}
671	return(error);
672}
673
674/*
675 * mach_vm_read_overwrite -
676 * Overwrite a range of the current map with data from the specified
677 * map/address range.
678 *
679 * In making an assumption that the current thread is local, it is
680 * no longer cluster-safe without a fully supportive local proxy
681 * thread/task (but we don't support cluster's anymore so this is moot).
682 */
683
684kern_return_t
685mach_vm_read_overwrite(
686	vm_map_t		map,
687	mach_vm_address_t	address,
688	mach_vm_size_t	size,
689	mach_vm_address_t	data,
690	mach_vm_size_t	*data_size)
691{
692	kern_return_t	error;
693	vm_map_copy_t	copy;
694
695	if (map == VM_MAP_NULL)
696		return(KERN_INVALID_ARGUMENT);
697
698	error = vm_map_copyin(map, (vm_map_address_t)address,
699				(vm_map_size_t)size, FALSE, &copy);
700
701	if (KERN_SUCCESS == error) {
702		error = vm_map_copy_overwrite(current_thread()->map,
703 					(vm_map_address_t)data,
704					copy, FALSE);
705		if (KERN_SUCCESS == error) {
706			*data_size = size;
707			return error;
708		}
709		vm_map_copy_discard(copy);
710	}
711	return(error);
712}
713
714/*
715 * vm_read_overwrite -
716 * Overwrite a range of the current map with data from the specified
717 * map/address range.
718 *
719 * This routine adds the additional limitation that the source and
720 * destination ranges must be describable with vm_address_t values
721 * (i.e. the same size address spaces as the kernel, or at least the
722 * the ranges are in that first portion of the respective address
723 * spaces).
724 */
725
726kern_return_t
727vm_read_overwrite(
728	vm_map_t	map,
729	vm_address_t	address,
730	vm_size_t	size,
731	vm_address_t	data,
732	vm_size_t	*data_size)
733{
734	kern_return_t	error;
735	vm_map_copy_t	copy;
736
737	if (map == VM_MAP_NULL)
738		return(KERN_INVALID_ARGUMENT);
739
740	error = vm_map_copyin(map, (vm_map_address_t)address,
741				(vm_map_size_t)size, FALSE, &copy);
742
743	if (KERN_SUCCESS == error) {
744		error = vm_map_copy_overwrite(current_thread()->map,
745 					(vm_map_address_t)data,
746					copy, FALSE);
747		if (KERN_SUCCESS == error) {
748			*data_size = size;
749			return error;
750		}
751		vm_map_copy_discard(copy);
752	}
753	return(error);
754}
755
756
757/*
758 * mach_vm_write -
759 * Overwrite the specified address range with the data provided
760 * (from the current map).
761 */
762kern_return_t
763mach_vm_write(
764	vm_map_t			map,
765	mach_vm_address_t		address,
766	pointer_t			data,
767	__unused mach_msg_type_number_t	size)
768{
769	if (map == VM_MAP_NULL)
770		return KERN_INVALID_ARGUMENT;
771
772	return vm_map_copy_overwrite(map, (vm_map_address_t)address,
773		(vm_map_copy_t) data, FALSE /* interruptible XXX */);
774}
775
776/*
777 * vm_write -
778 * Overwrite the specified address range with the data provided
779 * (from the current map).
780 *
781 * The addressability of the range of addresses to overwrite is
782 * limited bu the use of a vm_address_t (same size as kernel map).
783 * Either the target map is also small, or the range is in the
784 * low addresses within it.
785 */
786kern_return_t
787vm_write(
788	vm_map_t			map,
789	vm_address_t			address,
790	pointer_t			data,
791	__unused mach_msg_type_number_t	size)
792{
793	if (map == VM_MAP_NULL)
794		return KERN_INVALID_ARGUMENT;
795
796	return vm_map_copy_overwrite(map, (vm_map_address_t)address,
797		(vm_map_copy_t) data, FALSE /* interruptible XXX */);
798}
799
800/*
801 * mach_vm_copy -
802 * Overwrite one range of the specified map with the contents of
803 * another range within that same map (i.e. both address ranges
804 * are "over there").
805 */
806kern_return_t
807mach_vm_copy(
808	vm_map_t		map,
809	mach_vm_address_t	source_address,
810	mach_vm_size_t	size,
811	mach_vm_address_t	dest_address)
812{
813	vm_map_copy_t copy;
814	kern_return_t kr;
815
816	if (map == VM_MAP_NULL)
817		return KERN_INVALID_ARGUMENT;
818
819	kr = vm_map_copyin(map, (vm_map_address_t)source_address,
820			   (vm_map_size_t)size, FALSE, &copy);
821
822	if (KERN_SUCCESS == kr) {
823		kr = vm_map_copy_overwrite(map,
824				(vm_map_address_t)dest_address,
825				copy, FALSE /* interruptible XXX */);
826
827		if (KERN_SUCCESS != kr)
828			vm_map_copy_discard(copy);
829	}
830	return kr;
831}
832
833kern_return_t
834vm_copy(
835	vm_map_t	map,
836	vm_address_t	source_address,
837	vm_size_t	size,
838	vm_address_t	dest_address)
839{
840	vm_map_copy_t copy;
841	kern_return_t kr;
842
843	if (map == VM_MAP_NULL)
844		return KERN_INVALID_ARGUMENT;
845
846	kr = vm_map_copyin(map, (vm_map_address_t)source_address,
847			   (vm_map_size_t)size, FALSE, &copy);
848
849	if (KERN_SUCCESS == kr) {
850		kr = vm_map_copy_overwrite(map,
851				(vm_map_address_t)dest_address,
852				copy, FALSE /* interruptible XXX */);
853
854		if (KERN_SUCCESS != kr)
855			vm_map_copy_discard(copy);
856	}
857	return kr;
858}
859
860/*
861 * mach_vm_map -
862 * Map some range of an object into an address space.
863 *
864 * The object can be one of several types of objects:
865 *	NULL - anonymous memory
866 *	a named entry - a range within another address space
867 *	                or a range within a memory object
868 *	a whole memory object
869 *
870 */
871kern_return_t
872mach_vm_map(
873	vm_map_t		target_map,
874	mach_vm_offset_t	*address,
875	mach_vm_size_t	initial_size,
876	mach_vm_offset_t	mask,
877	int			flags,
878	ipc_port_t		port,
879	vm_object_offset_t	offset,
880	boolean_t		copy,
881	vm_prot_t		cur_protection,
882	vm_prot_t		max_protection,
883	vm_inherit_t		inheritance)
884{
885	kern_return_t		kr;
886	vm_map_offset_t 	vmmaddr;
887
888	vmmaddr = (vm_map_offset_t) *address;
889
890	/* filter out any kernel-only flags */
891	if (flags & ~VM_FLAGS_USER_MAP)
892		return KERN_INVALID_ARGUMENT;
893
894	kr = vm_map_enter_mem_object(target_map,
895				       &vmmaddr,
896				       initial_size,
897				       mask,
898				       flags,
899				       port,
900				       offset,
901				       copy,
902				       cur_protection,
903				       max_protection,
904				       inheritance);
905
906	*address = vmmaddr;
907	return kr;
908}
909
910
911/* legacy interface */
912kern_return_t
913vm_map_64(
914	vm_map_t		target_map,
915	vm_offset_t		*address,
916	vm_size_t		size,
917	vm_offset_t		mask,
918	int			flags,
919	ipc_port_t		port,
920	vm_object_offset_t	offset,
921	boolean_t		copy,
922	vm_prot_t		cur_protection,
923	vm_prot_t		max_protection,
924	vm_inherit_t		inheritance)
925{
926	mach_vm_address_t map_addr;
927	mach_vm_size_t map_size;
928	mach_vm_offset_t map_mask;
929	kern_return_t kr;
930
931	map_addr = (mach_vm_address_t)*address;
932	map_size = (mach_vm_size_t)size;
933	map_mask = (mach_vm_offset_t)mask;
934
935	kr = mach_vm_map(target_map, &map_addr, map_size, map_mask, flags,
936			 port, offset, copy,
937			 cur_protection, max_protection, inheritance);
938	*address = CAST_DOWN(vm_offset_t, map_addr);
939	return kr;
940}
941
942/* temporary, until world build */
943kern_return_t
944vm_map(
945	vm_map_t		target_map,
946	vm_offset_t		*address,
947	vm_size_t		size,
948	vm_offset_t		mask,
949	int			flags,
950	ipc_port_t		port,
951	vm_offset_t		offset,
952	boolean_t		copy,
953	vm_prot_t		cur_protection,
954	vm_prot_t		max_protection,
955	vm_inherit_t		inheritance)
956{
957	mach_vm_address_t map_addr;
958	mach_vm_size_t map_size;
959	mach_vm_offset_t map_mask;
960	vm_object_offset_t obj_offset;
961	kern_return_t kr;
962
963	map_addr = (mach_vm_address_t)*address;
964	map_size = (mach_vm_size_t)size;
965	map_mask = (mach_vm_offset_t)mask;
966	obj_offset = (vm_object_offset_t)offset;
967
968	kr = mach_vm_map(target_map, &map_addr, map_size, map_mask, flags,
969			 port, obj_offset, copy,
970			 cur_protection, max_protection, inheritance);
971	*address = CAST_DOWN(vm_offset_t, map_addr);
972	return kr;
973}
974
975/*
976 * mach_vm_remap -
977 * Remap a range of memory from one task into another,
978 * to another address range within the same task, or
979 * over top of itself (with altered permissions and/or
980 * as an in-place copy of itself).
981 */
982
983kern_return_t
984mach_vm_remap(
985	vm_map_t		target_map,
986	mach_vm_offset_t	*address,
987	mach_vm_size_t	size,
988	mach_vm_offset_t	mask,
989	int			flags,
990	vm_map_t		src_map,
991	mach_vm_offset_t	memory_address,
992	boolean_t		copy,
993	vm_prot_t		*cur_protection,
994	vm_prot_t		*max_protection,
995	vm_inherit_t		inheritance)
996{
997	vm_map_offset_t		map_addr;
998	kern_return_t		kr;
999
1000	if (VM_MAP_NULL == target_map || VM_MAP_NULL == src_map)
1001		return KERN_INVALID_ARGUMENT;
1002
1003	/* filter out any kernel-only flags */
1004	if (flags & ~VM_FLAGS_USER_REMAP)
1005		return KERN_INVALID_ARGUMENT;
1006
1007	map_addr = (vm_map_offset_t)*address;
1008
1009	kr = vm_map_remap(target_map,
1010			  &map_addr,
1011			  size,
1012			  mask,
1013			  flags,
1014			  src_map,
1015			  memory_address,
1016			  copy,
1017			  cur_protection,
1018			  max_protection,
1019			  inheritance);
1020	*address = map_addr;
1021	return kr;
1022}
1023
1024/*
1025 * vm_remap -
1026 * Remap a range of memory from one task into another,
1027 * to another address range within the same task, or
1028 * over top of itself (with altered permissions and/or
1029 * as an in-place copy of itself).
1030 *
1031 * The addressability of the source and target address
1032 * range is limited by the size of vm_address_t (in the
1033 * kernel context).
1034 */
1035kern_return_t
1036vm_remap(
1037	vm_map_t		target_map,
1038	vm_offset_t		*address,
1039	vm_size_t		size,
1040	vm_offset_t		mask,
1041	int			flags,
1042	vm_map_t		src_map,
1043	vm_offset_t		memory_address,
1044	boolean_t		copy,
1045	vm_prot_t		*cur_protection,
1046	vm_prot_t		*max_protection,
1047	vm_inherit_t		inheritance)
1048{
1049	vm_map_offset_t		map_addr;
1050	kern_return_t		kr;
1051
1052	if (VM_MAP_NULL == target_map || VM_MAP_NULL == src_map)
1053		return KERN_INVALID_ARGUMENT;
1054
1055	/* filter out any kernel-only flags */
1056	if (flags & ~VM_FLAGS_USER_REMAP)
1057		return KERN_INVALID_ARGUMENT;
1058
1059	map_addr = (vm_map_offset_t)*address;
1060
1061	kr = vm_map_remap(target_map,
1062			  &map_addr,
1063			  size,
1064			  mask,
1065			  flags,
1066			  src_map,
1067			  memory_address,
1068			  copy,
1069			  cur_protection,
1070			  max_protection,
1071			  inheritance);
1072	*address = CAST_DOWN(vm_offset_t, map_addr);
1073	return kr;
1074}
1075
1076/*
1077 * NOTE: these routine (and this file) will no longer require mach_host_server.h
1078 * when mach_vm_wire and vm_wire are changed to use ledgers.
1079 */
1080#include <mach/mach_host_server.h>
1081/*
1082 *	mach_vm_wire
1083 *	Specify that the range of the virtual address space
1084 *	of the target task must not cause page faults for
1085 *	the indicated accesses.
1086 *
1087 *	[ To unwire the pages, specify VM_PROT_NONE. ]
1088 */
1089kern_return_t
1090mach_vm_wire(
1091	host_priv_t		host_priv,
1092	vm_map_t		map,
1093	mach_vm_offset_t	start,
1094	mach_vm_size_t	size,
1095	vm_prot_t		access)
1096{
1097	kern_return_t		rc;
1098
1099	if (host_priv == HOST_PRIV_NULL)
1100		return KERN_INVALID_HOST;
1101
1102	assert(host_priv == &realhost);
1103
1104	if (map == VM_MAP_NULL)
1105		return KERN_INVALID_TASK;
1106
1107	if (access & ~VM_PROT_ALL || (start + size < start))
1108		return KERN_INVALID_ARGUMENT;
1109
1110	if (access != VM_PROT_NONE) {
1111		rc = vm_map_wire(map, vm_map_trunc_page(start),
1112				 vm_map_round_page(start+size), access, TRUE);
1113	} else {
1114		rc = vm_map_unwire(map, vm_map_trunc_page(start),
1115				   vm_map_round_page(start+size), TRUE);
1116	}
1117	return rc;
1118}
1119
1120/*
1121 *	vm_wire -
1122 *	Specify that the range of the virtual address space
1123 *	of the target task must not cause page faults for
1124 *	the indicated accesses.
1125 *
1126 *	[ To unwire the pages, specify VM_PROT_NONE. ]
1127 */
1128kern_return_t
1129vm_wire(
1130	host_priv_t		host_priv,
1131	register vm_map_t	map,
1132	vm_offset_t		start,
1133	vm_size_t		size,
1134	vm_prot_t		access)
1135{
1136	kern_return_t		rc;
1137
1138	if (host_priv == HOST_PRIV_NULL)
1139		return KERN_INVALID_HOST;
1140
1141	assert(host_priv == &realhost);
1142
1143	if (map == VM_MAP_NULL)
1144		return KERN_INVALID_TASK;
1145
1146	if ((access & ~VM_PROT_ALL) || (start + size < start))
1147		return KERN_INVALID_ARGUMENT;
1148
1149	if (size == 0) {
1150		rc = KERN_SUCCESS;
1151	} else if (access != VM_PROT_NONE) {
1152		rc = vm_map_wire(map, vm_map_trunc_page(start),
1153				 vm_map_round_page(start+size), access, TRUE);
1154	} else {
1155		rc = vm_map_unwire(map, vm_map_trunc_page(start),
1156				   vm_map_round_page(start+size), TRUE);
1157	}
1158	return rc;
1159}
1160
1161/*
1162 *	vm_msync
1163 *
1164 *	Synchronises the memory range specified with its backing store
1165 *	image by either flushing or cleaning the contents to the appropriate
1166 *	memory manager.
1167 *
1168 *	interpretation of sync_flags
1169 *	VM_SYNC_INVALIDATE	- discard pages, only return precious
1170 *				  pages to manager.
1171 *
1172 *	VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
1173 *				- discard pages, write dirty or precious
1174 *				  pages back to memory manager.
1175 *
1176 *	VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
1177 *				- write dirty or precious pages back to
1178 *				  the memory manager.
1179 *
1180 *	VM_SYNC_CONTIGUOUS	- does everything normally, but if there
1181 *				  is a hole in the region, and we would
1182 *				  have returned KERN_SUCCESS, return
1183 *				  KERN_INVALID_ADDRESS instead.
1184 *
1185 *	RETURNS
1186 *	KERN_INVALID_TASK		Bad task parameter
1187 *	KERN_INVALID_ARGUMENT		both sync and async were specified.
1188 *	KERN_SUCCESS			The usual.
1189 *	KERN_INVALID_ADDRESS		There was a hole in the region.
1190 */
1191
1192kern_return_t
1193mach_vm_msync(
1194	vm_map_t		map,
1195	mach_vm_address_t	address,
1196	mach_vm_size_t	size,
1197	vm_sync_t		sync_flags)
1198{
1199
1200	if (map == VM_MAP_NULL)
1201		return(KERN_INVALID_TASK);
1202
1203	return vm_map_msync(map, (vm_map_address_t)address,
1204			(vm_map_size_t)size, sync_flags);
1205}
1206
1207/*
1208 *	vm_msync
1209 *
1210 *	Synchronises the memory range specified with its backing store
1211 *	image by either flushing or cleaning the contents to the appropriate
1212 *	memory manager.
1213 *
1214 *	interpretation of sync_flags
1215 *	VM_SYNC_INVALIDATE	- discard pages, only return precious
1216 *				  pages to manager.
1217 *
1218 *	VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
1219 *				- discard pages, write dirty or precious
1220 *				  pages back to memory manager.
1221 *
1222 *	VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
1223 *				- write dirty or precious pages back to
1224 *				  the memory manager.
1225 *
1226 *	VM_SYNC_CONTIGUOUS	- does everything normally, but if there
1227 *				  is a hole in the region, and we would
1228 *				  have returned KERN_SUCCESS, return
1229 *				  KERN_INVALID_ADDRESS instead.
1230 *
1231 *	The addressability of the range is limited to that which can
1232 *	be described by a vm_address_t.
1233 *
1234 *	RETURNS
1235 *	KERN_INVALID_TASK		Bad task parameter
1236 *	KERN_INVALID_ARGUMENT		both sync and async were specified.
1237 *	KERN_SUCCESS			The usual.
1238 *	KERN_INVALID_ADDRESS		There was a hole in the region.
1239 */
1240
1241kern_return_t
1242vm_msync(
1243	vm_map_t	map,
1244	vm_address_t	address,
1245	vm_size_t	size,
1246	vm_sync_t	sync_flags)
1247{
1248
1249	if (map == VM_MAP_NULL)
1250		return(KERN_INVALID_TASK);
1251
1252	return vm_map_msync(map, (vm_map_address_t)address,
1253			(vm_map_size_t)size, sync_flags);
1254}
1255
1256
1257int
1258vm_toggle_entry_reuse(int toggle, int *old_value)
1259{
1260	vm_map_t map = current_map();
1261
1262	if(toggle == VM_TOGGLE_GETVALUE && old_value != NULL){
1263		*old_value = map->disable_vmentry_reuse;
1264	} else if(toggle == VM_TOGGLE_SET){
1265		vm_map_lock(map);
1266		map->disable_vmentry_reuse = TRUE;
1267		if (map->first_free == vm_map_to_entry(map)) {
1268			map->highest_entry_end = vm_map_min(map);
1269		} else {
1270			map->highest_entry_end = map->first_free->vme_end;
1271		}
1272		vm_map_unlock(map);
1273	} else if (toggle == VM_TOGGLE_CLEAR){
1274		vm_map_lock(map);
1275		map->disable_vmentry_reuse = FALSE;
1276		vm_map_unlock(map);
1277	} else
1278		return KERN_INVALID_ARGUMENT;
1279
1280	return KERN_SUCCESS;
1281}
1282
1283/*
1284 *	mach_vm_behavior_set
1285 *
1286 *	Sets the paging behavior attribute for the  specified range
1287 *	in the specified map.
1288 *
1289 *	This routine will fail with KERN_INVALID_ADDRESS if any address
1290 *	in [start,start+size) is not a valid allocated memory region.
1291 */
1292kern_return_t
1293mach_vm_behavior_set(
1294	vm_map_t		map,
1295	mach_vm_offset_t	start,
1296	mach_vm_size_t	size,
1297	vm_behavior_t		new_behavior)
1298{
1299	if ((map == VM_MAP_NULL) || (start + size < start))
1300		return(KERN_INVALID_ARGUMENT);
1301
1302	if (size == 0)
1303		return KERN_SUCCESS;
1304
1305	return(vm_map_behavior_set(map, vm_map_trunc_page(start),
1306				   vm_map_round_page(start+size), new_behavior));
1307}
1308
1309/*
1310 *	vm_behavior_set
1311 *
1312 *	Sets the paging behavior attribute for the  specified range
1313 *	in the specified map.
1314 *
1315 *	This routine will fail with KERN_INVALID_ADDRESS if any address
1316 *	in [start,start+size) is not a valid allocated memory region.
1317 *
1318 *	This routine is potentially limited in addressibility by the
1319 *	use of vm_offset_t (if the map provided is larger than the
1320 *	kernel's).
1321 */
1322kern_return_t
1323vm_behavior_set(
1324	vm_map_t		map,
1325	vm_offset_t		start,
1326	vm_size_t		size,
1327	vm_behavior_t		new_behavior)
1328{
1329	if ((map == VM_MAP_NULL) || (start + size < start))
1330		return(KERN_INVALID_ARGUMENT);
1331
1332	if (size == 0)
1333		return KERN_SUCCESS;
1334
1335	return(vm_map_behavior_set(map, vm_map_trunc_page(start),
1336				   vm_map_round_page(start+size), new_behavior));
1337}
1338
1339/*
1340 *	mach_vm_region:
1341 *
1342 *	User call to obtain information about a region in
1343 *	a task's address map. Currently, only one flavor is
1344 *	supported.
1345 *
1346 *	XXX The reserved and behavior fields cannot be filled
1347 *	    in until the vm merge from the IK is completed, and
1348 *	    vm_reserve is implemented.
1349 *
1350 *	XXX Dependency: syscall_vm_region() also supports only one flavor.
1351 */
1352
1353kern_return_t
1354mach_vm_region(
1355	vm_map_t		 map,
1356	mach_vm_offset_t	*address,		/* IN/OUT */
1357	mach_vm_size_t	*size,			/* OUT */
1358	vm_region_flavor_t	 flavor,		/* IN */
1359	vm_region_info_t	 info,			/* OUT */
1360	mach_msg_type_number_t	*count,			/* IN/OUT */
1361	mach_port_t		*object_name)		/* OUT */
1362{
1363	vm_map_offset_t 	map_addr;
1364	vm_map_size_t 		map_size;
1365	kern_return_t		kr;
1366
1367	if (VM_MAP_NULL == map)
1368		return KERN_INVALID_ARGUMENT;
1369
1370	map_addr = (vm_map_offset_t)*address;
1371	map_size = (vm_map_size_t)*size;
1372
1373	/* legacy conversion */
1374	if (VM_REGION_BASIC_INFO == flavor)
1375		flavor = VM_REGION_BASIC_INFO_64;
1376
1377	kr = vm_map_region(map,
1378			   &map_addr, &map_size,
1379			   flavor, info, count,
1380			   object_name);
1381
1382	*address = map_addr;
1383	*size = map_size;
1384	return kr;
1385}
1386
1387/*
1388 *	vm_region_64 and vm_region:
1389 *
1390 *	User call to obtain information about a region in
1391 *	a task's address map. Currently, only one flavor is
1392 *	supported.
1393 *
1394 *	XXX The reserved and behavior fields cannot be filled
1395 *	    in until the vm merge from the IK is completed, and
1396 *	    vm_reserve is implemented.
1397 *
1398 *	XXX Dependency: syscall_vm_region() also supports only one flavor.
1399 */
1400
1401kern_return_t
1402vm_region_64(
1403	vm_map_t		 map,
1404	vm_offset_t	        *address,		/* IN/OUT */
1405	vm_size_t		*size,			/* OUT */
1406	vm_region_flavor_t	 flavor,		/* IN */
1407	vm_region_info_t	 info,			/* OUT */
1408	mach_msg_type_number_t	*count,			/* IN/OUT */
1409	mach_port_t		*object_name)		/* OUT */
1410{
1411	vm_map_offset_t 	map_addr;
1412	vm_map_size_t 		map_size;
1413	kern_return_t		kr;
1414
1415	if (VM_MAP_NULL == map)
1416		return KERN_INVALID_ARGUMENT;
1417
1418	map_addr = (vm_map_offset_t)*address;
1419	map_size = (vm_map_size_t)*size;
1420
1421	/* legacy conversion */
1422	if (VM_REGION_BASIC_INFO == flavor)
1423		flavor = VM_REGION_BASIC_INFO_64;
1424
1425	kr = vm_map_region(map,
1426			   &map_addr, &map_size,
1427			   flavor, info, count,
1428			   object_name);
1429
1430	*address = CAST_DOWN(vm_offset_t, map_addr);
1431	*size = CAST_DOWN(vm_size_t, map_size);
1432
1433	if (KERN_SUCCESS == kr && map_addr + map_size > VM_MAX_ADDRESS)
1434		return KERN_INVALID_ADDRESS;
1435	return kr;
1436}
1437
1438kern_return_t
1439vm_region(
1440	vm_map_t			map,
1441	vm_address_t	      		*address,	/* IN/OUT */
1442	vm_size_t			*size,		/* OUT */
1443	vm_region_flavor_t	 	flavor,	/* IN */
1444	vm_region_info_t	 	info,		/* OUT */
1445	mach_msg_type_number_t	*count,	/* IN/OUT */
1446	mach_port_t			*object_name)	/* OUT */
1447{
1448	vm_map_address_t 	map_addr;
1449	vm_map_size_t 		map_size;
1450	kern_return_t		kr;
1451
1452	if (VM_MAP_NULL == map)
1453		return KERN_INVALID_ARGUMENT;
1454
1455	map_addr = (vm_map_address_t)*address;
1456	map_size = (vm_map_size_t)*size;
1457
1458	kr = vm_map_region(map,
1459			   &map_addr, &map_size,
1460			   flavor, info, count,
1461			   object_name);
1462
1463	*address = CAST_DOWN(vm_address_t, map_addr);
1464	*size = CAST_DOWN(vm_size_t, map_size);
1465
1466	if (KERN_SUCCESS == kr && map_addr + map_size > VM_MAX_ADDRESS)
1467		return KERN_INVALID_ADDRESS;
1468	return kr;
1469}
1470
1471/*
1472 *	vm_region_recurse: A form of vm_region which follows the
1473 *	submaps in a target map
1474 *
1475 */
1476kern_return_t
1477mach_vm_region_recurse(
1478	vm_map_t			map,
1479	mach_vm_address_t		*address,
1480	mach_vm_size_t		*size,
1481	uint32_t			*depth,
1482	vm_region_recurse_info_t	info,
1483	mach_msg_type_number_t 	*infoCnt)
1484{
1485	vm_map_address_t	map_addr;
1486	vm_map_size_t		map_size;
1487	kern_return_t		kr;
1488
1489	if (VM_MAP_NULL == map)
1490		return KERN_INVALID_ARGUMENT;
1491
1492	map_addr = (vm_map_address_t)*address;
1493	map_size = (vm_map_size_t)*size;
1494
1495	kr = vm_map_region_recurse_64(
1496			map,
1497			&map_addr,
1498			&map_size,
1499			depth,
1500			(vm_region_submap_info_64_t)info,
1501			infoCnt);
1502
1503	*address = map_addr;
1504	*size = map_size;
1505	return kr;
1506}
1507
1508/*
1509 *	vm_region_recurse: A form of vm_region which follows the
1510 *	submaps in a target map
1511 *
1512 */
1513kern_return_t
1514vm_region_recurse_64(
1515	vm_map_t			map,
1516	vm_address_t			*address,
1517	vm_size_t			*size,
1518	uint32_t			*depth,
1519	vm_region_recurse_info_64_t	info,
1520	mach_msg_type_number_t 	*infoCnt)
1521{
1522	vm_map_address_t	map_addr;
1523	vm_map_size_t		map_size;
1524	kern_return_t		kr;
1525
1526	if (VM_MAP_NULL == map)
1527		return KERN_INVALID_ARGUMENT;
1528
1529	map_addr = (vm_map_address_t)*address;
1530	map_size = (vm_map_size_t)*size;
1531
1532	kr = vm_map_region_recurse_64(
1533			map,
1534			&map_addr,
1535			&map_size,
1536			depth,
1537			(vm_region_submap_info_64_t)info,
1538			infoCnt);
1539
1540	*address = CAST_DOWN(vm_address_t, map_addr);
1541	*size = CAST_DOWN(vm_size_t, map_size);
1542
1543	if (KERN_SUCCESS == kr && map_addr + map_size > VM_MAX_ADDRESS)
1544		return KERN_INVALID_ADDRESS;
1545	return kr;
1546}
1547
1548kern_return_t
1549vm_region_recurse(
1550	vm_map_t			map,
1551	vm_offset_t	       	*address,	/* IN/OUT */
1552	vm_size_t			*size,		/* OUT */
1553	natural_t	 		*depth,	/* IN/OUT */
1554	vm_region_recurse_info_t	info32,	/* IN/OUT */
1555	mach_msg_type_number_t	*infoCnt)	/* IN/OUT */
1556{
1557	vm_region_submap_info_data_64_t info64;
1558	vm_region_submap_info_t info;
1559	vm_map_address_t	map_addr;
1560	vm_map_size_t		map_size;
1561	kern_return_t		kr;
1562
1563	if (VM_MAP_NULL == map || *infoCnt < VM_REGION_SUBMAP_INFO_COUNT)
1564		return KERN_INVALID_ARGUMENT;
1565
1566
1567	map_addr = (vm_map_address_t)*address;
1568	map_size = (vm_map_size_t)*size;
1569	info = (vm_region_submap_info_t)info32;
1570	*infoCnt = VM_REGION_SUBMAP_INFO_COUNT_64;
1571
1572	kr = vm_map_region_recurse_64(map, &map_addr,&map_size,
1573				      depth, &info64, infoCnt);
1574
1575	info->protection = info64.protection;
1576	info->max_protection = info64.max_protection;
1577	info->inheritance = info64.inheritance;
1578	info->offset = (uint32_t)info64.offset; /* trouble-maker */
1579        info->user_tag = info64.user_tag;
1580        info->pages_resident = info64.pages_resident;
1581        info->pages_shared_now_private = info64.pages_shared_now_private;
1582        info->pages_swapped_out = info64.pages_swapped_out;
1583        info->pages_dirtied = info64.pages_dirtied;
1584        info->ref_count = info64.ref_count;
1585        info->shadow_depth = info64.shadow_depth;
1586        info->external_pager = info64.external_pager;
1587        info->share_mode = info64.share_mode;
1588	info->is_submap = info64.is_submap;
1589	info->behavior = info64.behavior;
1590	info->object_id = info64.object_id;
1591	info->user_wired_count = info64.user_wired_count;
1592
1593	*address = CAST_DOWN(vm_address_t, map_addr);
1594	*size = CAST_DOWN(vm_size_t, map_size);
1595	*infoCnt = VM_REGION_SUBMAP_INFO_COUNT;
1596
1597	if (KERN_SUCCESS == kr && map_addr + map_size > VM_MAX_ADDRESS)
1598		return KERN_INVALID_ADDRESS;
1599	return kr;
1600}
1601
1602kern_return_t
1603mach_vm_purgable_control(
1604	vm_map_t		map,
1605	mach_vm_offset_t	address,
1606	vm_purgable_t		control,
1607	int			*state)
1608{
1609	if (VM_MAP_NULL == map)
1610		return KERN_INVALID_ARGUMENT;
1611
1612	return vm_map_purgable_control(map,
1613				       vm_map_trunc_page(address),
1614				       control,
1615				       state);
1616}
1617
1618kern_return_t
1619vm_purgable_control(
1620	vm_map_t		map,
1621	vm_offset_t		address,
1622	vm_purgable_t		control,
1623	int			*state)
1624{
1625	if (VM_MAP_NULL == map)
1626		return KERN_INVALID_ARGUMENT;
1627
1628	return vm_map_purgable_control(map,
1629				       vm_map_trunc_page(address),
1630				       control,
1631				       state);
1632}
1633
1634
1635/*
1636 *	Ordinarily, the right to allocate CPM is restricted
1637 *	to privileged applications (those that can gain access
1638 *	to the host priv port).  Set this variable to zero if
1639 *	you want to let any application allocate CPM.
1640 */
1641unsigned int	vm_allocate_cpm_privileged = 0;
1642
1643/*
1644 *	Allocate memory in the specified map, with the caveat that
1645 *	the memory is physically contiguous.  This call may fail
1646 *	if the system can't find sufficient contiguous memory.
1647 *	This call may cause or lead to heart-stopping amounts of
1648 *	paging activity.
1649 *
1650 *	Memory obtained from this call should be freed in the
1651 *	normal way, viz., via vm_deallocate.
1652 */
1653kern_return_t
1654vm_allocate_cpm(
1655	host_priv_t		host_priv,
1656	vm_map_t		map,
1657	vm_address_t		*addr,
1658	vm_size_t		size,
1659	int			flags)
1660{
1661	vm_map_address_t	map_addr;
1662	vm_map_size_t		map_size;
1663	kern_return_t		kr;
1664
1665	if (vm_allocate_cpm_privileged && HOST_PRIV_NULL == host_priv)
1666		return KERN_INVALID_HOST;
1667
1668	if (VM_MAP_NULL == map)
1669		return KERN_INVALID_ARGUMENT;
1670
1671	map_addr = (vm_map_address_t)*addr;
1672	map_size = (vm_map_size_t)size;
1673
1674	kr = vm_map_enter_cpm(map,
1675			      &map_addr,
1676			      map_size,
1677			      flags);
1678
1679	*addr = CAST_DOWN(vm_address_t, map_addr);
1680	return kr;
1681}
1682
1683
1684kern_return_t
1685mach_vm_page_query(
1686	vm_map_t		map,
1687	mach_vm_offset_t	offset,
1688	int			*disposition,
1689	int			*ref_count)
1690{
1691	if (VM_MAP_NULL == map)
1692		return KERN_INVALID_ARGUMENT;
1693
1694	return vm_map_page_query_internal(map,
1695					  vm_map_trunc_page(offset),
1696					  disposition, ref_count);
1697}
1698
1699kern_return_t
1700vm_map_page_query(
1701	vm_map_t		map,
1702	vm_offset_t		offset,
1703	int			*disposition,
1704	int			*ref_count)
1705{
1706	if (VM_MAP_NULL == map)
1707		return KERN_INVALID_ARGUMENT;
1708
1709	return vm_map_page_query_internal(map,
1710					  vm_map_trunc_page(offset),
1711					  disposition, ref_count);
1712}
1713
1714kern_return_t
1715mach_vm_page_info(
1716	vm_map_t		map,
1717	mach_vm_address_t	address,
1718	vm_page_info_flavor_t	flavor,
1719	vm_page_info_t		info,
1720	mach_msg_type_number_t	*count)
1721{
1722	kern_return_t	kr;
1723
1724	if (map == VM_MAP_NULL) {
1725		return KERN_INVALID_ARGUMENT;
1726	}
1727
1728	kr = vm_map_page_info(map, address, flavor, info, count);
1729	return kr;
1730}
1731
1732/* map a (whole) upl into an address space */
1733kern_return_t
1734vm_upl_map(
1735	vm_map_t		map,
1736	upl_t			upl,
1737	vm_address_t		*dst_addr)
1738{
1739	vm_map_offset_t		map_addr;
1740	kern_return_t		kr;
1741
1742	if (VM_MAP_NULL == map)
1743		return KERN_INVALID_ARGUMENT;
1744
1745	kr = vm_map_enter_upl(map, upl, &map_addr);
1746	*dst_addr = CAST_DOWN(vm_address_t, map_addr);
1747	return kr;
1748}
1749
1750kern_return_t
1751vm_upl_unmap(
1752	vm_map_t		map,
1753	upl_t 			upl)
1754{
1755	if (VM_MAP_NULL == map)
1756		return KERN_INVALID_ARGUMENT;
1757
1758	return (vm_map_remove_upl(map, upl));
1759}
1760
1761/* Retrieve a upl for an object underlying an address range in a map */
1762
1763kern_return_t
1764vm_map_get_upl(
1765	vm_map_t		map,
1766	vm_map_offset_t		map_offset,
1767	upl_size_t		*upl_size,
1768	upl_t			*upl,
1769	upl_page_info_array_t	page_list,
1770	unsigned int		*count,
1771	int			*flags,
1772	int             	force_data_sync)
1773{
1774	int 		map_flags;
1775	kern_return_t	kr;
1776
1777	if (VM_MAP_NULL == map)
1778		return KERN_INVALID_ARGUMENT;
1779
1780	map_flags = *flags & ~UPL_NOZEROFILL;
1781	if (force_data_sync)
1782		map_flags |= UPL_FORCE_DATA_SYNC;
1783
1784	kr = vm_map_create_upl(map,
1785			       map_offset,
1786			       upl_size,
1787			       upl,
1788			       page_list,
1789			       count,
1790			       &map_flags);
1791
1792	*flags = (map_flags & ~UPL_FORCE_DATA_SYNC);
1793	return kr;
1794}
1795
1796/*
1797 * mach_make_memory_entry_64
1798 *
1799 * Think of it as a two-stage vm_remap() operation.  First
1800 * you get a handle.  Second, you get map that handle in
1801 * somewhere else. Rather than doing it all at once (and
1802 * without needing access to the other whole map).
1803 */
1804
1805kern_return_t
1806mach_make_memory_entry_64(
1807	vm_map_t		target_map,
1808	memory_object_size_t	*size,
1809	memory_object_offset_t offset,
1810	vm_prot_t		permission,
1811	ipc_port_t		*object_handle,
1812	ipc_port_t		parent_handle)
1813{
1814	vm_map_version_t	version;
1815	vm_named_entry_t	parent_entry;
1816	vm_named_entry_t	user_entry;
1817	ipc_port_t		user_handle;
1818	kern_return_t		kr;
1819	vm_map_t		real_map;
1820
1821	/* needed for call to vm_map_lookup_locked */
1822	boolean_t		wired;
1823	vm_object_offset_t	obj_off;
1824	vm_prot_t		prot;
1825	struct vm_object_fault_info	fault_info;
1826	vm_object_t		object;
1827	vm_object_t		shadow_object;
1828
1829	/* needed for direct map entry manipulation */
1830	vm_map_entry_t		map_entry;
1831	vm_map_entry_t		next_entry;
1832	vm_map_t		local_map;
1833	vm_map_t		original_map = target_map;
1834	vm_map_size_t		total_size;
1835	vm_map_size_t		map_size;
1836	vm_map_offset_t		map_offset;
1837	vm_map_offset_t		local_offset;
1838	vm_object_size_t	mappable_size;
1839
1840	unsigned int		access;
1841	vm_prot_t		protections;
1842	vm_prot_t		original_protections, mask_protections;
1843	unsigned int		wimg_mode;
1844
1845	boolean_t		force_shadow = FALSE;
1846
1847	if (((permission & 0x00FF0000) &
1848	     ~(MAP_MEM_ONLY |
1849	       MAP_MEM_NAMED_CREATE |
1850	       MAP_MEM_PURGABLE |
1851	       MAP_MEM_NAMED_REUSE))) {
1852		/*
1853		 * Unknown flag: reject for forward compatibility.
1854		 */
1855		return KERN_INVALID_VALUE;
1856	}
1857
1858	if (parent_handle != IP_NULL &&
1859	    ip_kotype(parent_handle) == IKOT_NAMED_ENTRY) {
1860		parent_entry = (vm_named_entry_t) parent_handle->ip_kobject;
1861	} else {
1862		parent_entry = NULL;
1863	}
1864
1865	original_protections = permission & VM_PROT_ALL;
1866	protections = original_protections;
1867	mask_protections = permission & VM_PROT_IS_MASK;
1868	access = GET_MAP_MEM(permission);
1869
1870	user_handle = IP_NULL;
1871	user_entry = NULL;
1872
1873	map_offset = vm_map_trunc_page(offset);
1874	map_size = vm_map_round_page(*size);
1875
1876	if (permission & MAP_MEM_ONLY) {
1877		boolean_t		parent_is_object;
1878
1879		if (parent_entry == NULL) {
1880			return KERN_INVALID_ARGUMENT;
1881		}
1882
1883		parent_is_object = !(parent_entry->is_sub_map || parent_entry->is_pager);
1884		object = parent_entry->backing.object;
1885		if(parent_is_object && object != VM_OBJECT_NULL)
1886			wimg_mode = object->wimg_bits;
1887		else
1888			wimg_mode = VM_WIMG_USE_DEFAULT;
1889		if((access != GET_MAP_MEM(parent_entry->protection)) &&
1890				!(parent_entry->protection & VM_PROT_WRITE)) {
1891			return KERN_INVALID_RIGHT;
1892		}
1893		if(access == MAP_MEM_IO) {
1894		   SET_MAP_MEM(access, parent_entry->protection);
1895		   wimg_mode = VM_WIMG_IO;
1896		} else if (access == MAP_MEM_COPYBACK) {
1897		   SET_MAP_MEM(access, parent_entry->protection);
1898		   wimg_mode = VM_WIMG_USE_DEFAULT;
1899		} else if (access == MAP_MEM_INNERWBACK) {
1900		   SET_MAP_MEM(access, parent_entry->protection);
1901		   wimg_mode = VM_WIMG_INNERWBACK;
1902		} else if (access == MAP_MEM_WTHRU) {
1903		   SET_MAP_MEM(access, parent_entry->protection);
1904		   wimg_mode = VM_WIMG_WTHRU;
1905		} else if (access == MAP_MEM_WCOMB) {
1906		   SET_MAP_MEM(access, parent_entry->protection);
1907		   wimg_mode = VM_WIMG_WCOMB;
1908		}
1909		if (parent_is_object && object &&
1910			(access != MAP_MEM_NOOP) &&
1911			(!(object->nophyscache))) {
1912
1913			if (object->wimg_bits != wimg_mode) {
1914				vm_object_lock(object);
1915				vm_object_change_wimg_mode(object, wimg_mode);
1916				vm_object_unlock(object);
1917			}
1918		}
1919		if (object_handle)
1920			*object_handle = IP_NULL;
1921		return KERN_SUCCESS;
1922	}
1923
1924	if(permission & MAP_MEM_NAMED_CREATE) {
1925		kr = mach_memory_entry_allocate(&user_entry, &user_handle);
1926		if (kr != KERN_SUCCESS) {
1927			return KERN_FAILURE;
1928		}
1929
1930		/*
1931		 * Force the creation of the VM object now.
1932		 */
1933		if (map_size > (vm_map_size_t) ANON_MAX_SIZE) {
1934			/*
1935			 * LP64todo - for now, we can only allocate 4GB-4096
1936			 * internal objects because the default pager can't
1937			 * page bigger ones.  Remove this when it can.
1938			 */
1939			kr = KERN_FAILURE;
1940			goto make_mem_done;
1941		}
1942
1943		object = vm_object_allocate(map_size);
1944		assert(object != VM_OBJECT_NULL);
1945
1946		if (permission & MAP_MEM_PURGABLE) {
1947			if (! (permission & VM_PROT_WRITE)) {
1948				/* if we can't write, we can't purge */
1949				vm_object_deallocate(object);
1950				kr = KERN_INVALID_ARGUMENT;
1951				goto make_mem_done;
1952			}
1953			object->purgable = VM_PURGABLE_NONVOLATILE;
1954		}
1955
1956		/*
1957		 * The VM object is brand new and nobody else knows about it,
1958		 * so we don't need to lock it.
1959		 */
1960
1961		wimg_mode = object->wimg_bits;
1962		if (access == MAP_MEM_IO) {
1963			wimg_mode = VM_WIMG_IO;
1964		} else if (access == MAP_MEM_COPYBACK) {
1965			wimg_mode = VM_WIMG_USE_DEFAULT;
1966		} else if (access == MAP_MEM_INNERWBACK) {
1967			wimg_mode = VM_WIMG_INNERWBACK;
1968		} else if (access == MAP_MEM_WTHRU) {
1969			wimg_mode = VM_WIMG_WTHRU;
1970		} else if (access == MAP_MEM_WCOMB) {
1971			wimg_mode = VM_WIMG_WCOMB;
1972		}
1973		if (access != MAP_MEM_NOOP) {
1974			object->wimg_bits = wimg_mode;
1975		}
1976		/* the object has no pages, so no WIMG bits to update here */
1977
1978		/*
1979		 * XXX
1980		 * We use this path when we want to make sure that
1981		 * nobody messes with the object (coalesce, for
1982		 * example) before we map it.
1983		 * We might want to use these objects for transposition via
1984		 * vm_object_transpose() too, so we don't want any copy or
1985		 * shadow objects either...
1986		 */
1987		object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1988
1989		user_entry->backing.object = object;
1990		user_entry->internal = TRUE;
1991		user_entry->is_sub_map = FALSE;
1992		user_entry->is_pager = FALSE;
1993		user_entry->offset = 0;
1994		user_entry->protection = protections;
1995		SET_MAP_MEM(access, user_entry->protection);
1996		user_entry->size = map_size;
1997
1998		/* user_object pager and internal fields are not used */
1999		/* when the object field is filled in.		      */
2000
2001		*size = CAST_DOWN(vm_size_t, map_size);
2002		*object_handle = user_handle;
2003		return KERN_SUCCESS;
2004	}
2005
2006	if (parent_entry == NULL ||
2007	    (permission & MAP_MEM_NAMED_REUSE)) {
2008
2009		/* Create a named object based on address range within the task map */
2010		/* Go find the object at given address */
2011
2012		if (target_map == VM_MAP_NULL) {
2013			return KERN_INVALID_TASK;
2014		}
2015
2016redo_lookup:
2017		protections = original_protections;
2018		vm_map_lock_read(target_map);
2019
2020		/* get the object associated with the target address */
2021		/* note we check the permission of the range against */
2022		/* that requested by the caller */
2023
2024		kr = vm_map_lookup_locked(&target_map, map_offset,
2025					  protections | mask_protections,
2026					  OBJECT_LOCK_EXCLUSIVE, &version,
2027					  &object, &obj_off, &prot, &wired,
2028					  &fault_info,
2029					  &real_map);
2030		if (kr != KERN_SUCCESS) {
2031			vm_map_unlock_read(target_map);
2032			goto make_mem_done;
2033		}
2034		if (mask_protections) {
2035			/*
2036			 * The caller asked us to use the "protections" as
2037			 * a mask, so restrict "protections" to what this
2038			 * mapping actually allows.
2039			 */
2040			protections &= prot;
2041		}
2042		if (((prot & protections) != protections)
2043					|| (object == kernel_object)) {
2044			kr = KERN_INVALID_RIGHT;
2045			vm_object_unlock(object);
2046			vm_map_unlock_read(target_map);
2047			if(real_map != target_map)
2048				vm_map_unlock_read(real_map);
2049			if(object == kernel_object) {
2050				printf("Warning: Attempt to create a named"
2051					" entry from the kernel_object\n");
2052			}
2053			goto make_mem_done;
2054		}
2055
2056		/* We have an object, now check to see if this object */
2057		/* is suitable.  If not, create a shadow and share that */
2058
2059		/*
2060		 * We have to unlock the VM object to avoid deadlocking with
2061		 * a VM map lock (the lock ordering is map, the object), if we
2062		 * need to modify the VM map to create a shadow object.  Since
2063		 * we might release the VM map lock below anyway, we have
2064		 * to release the VM map lock now.
2065		 * XXX FBDP There must be a way to avoid this double lookup...
2066		 *
2067		 * Take an extra reference on the VM object to make sure it's
2068		 * not going to disappear.
2069		 */
2070		vm_object_reference_locked(object); /* extra ref to hold obj */
2071		vm_object_unlock(object);
2072
2073		local_map = original_map;
2074		local_offset = map_offset;
2075		if(target_map != local_map) {
2076			vm_map_unlock_read(target_map);
2077			if(real_map != target_map)
2078				vm_map_unlock_read(real_map);
2079			vm_map_lock_read(local_map);
2080			target_map = local_map;
2081			real_map = local_map;
2082		}
2083		while(TRUE) {
2084		   if(!vm_map_lookup_entry(local_map,
2085						local_offset, &map_entry)) {
2086			kr = KERN_INVALID_ARGUMENT;
2087                        vm_map_unlock_read(target_map);
2088			if(real_map != target_map)
2089				vm_map_unlock_read(real_map);
2090                        vm_object_deallocate(object); /* release extra ref */
2091			object = VM_OBJECT_NULL;
2092                        goto make_mem_done;
2093		   }
2094		   if(!(map_entry->is_sub_map)) {
2095		      if(map_entry->object.vm_object != object) {
2096			 kr = KERN_INVALID_ARGUMENT;
2097                         vm_map_unlock_read(target_map);
2098			 if(real_map != target_map)
2099				vm_map_unlock_read(real_map);
2100                         vm_object_deallocate(object); /* release extra ref */
2101			 object = VM_OBJECT_NULL;
2102                         goto make_mem_done;
2103	              }
2104		      break;
2105		   } else {
2106			vm_map_t	tmap;
2107			tmap = local_map;
2108			local_map = map_entry->object.sub_map;
2109
2110			vm_map_lock_read(local_map);
2111			vm_map_unlock_read(tmap);
2112			target_map = local_map;
2113			real_map = local_map;
2114			local_offset = local_offset - map_entry->vme_start;
2115			local_offset += map_entry->offset;
2116		   }
2117		}
2118
2119		/*
2120		 * We found the VM map entry, lock the VM object again.
2121		 */
2122		vm_object_lock(object);
2123		if(map_entry->wired_count) {
2124			 /* JMM - The check below should be reworked instead. */
2125			 object->true_share = TRUE;
2126		      }
2127		if (mask_protections) {
2128			/*
2129			 * The caller asked us to use the "protections" as
2130			 * a mask, so restrict "protections" to what this
2131			 * mapping actually allows.
2132			 */
2133			protections &= map_entry->max_protection;
2134		}
2135		if(((map_entry->max_protection) & protections) != protections) {
2136			 kr = KERN_INVALID_RIGHT;
2137                         vm_object_unlock(object);
2138                         vm_map_unlock_read(target_map);
2139			 if(real_map != target_map)
2140				vm_map_unlock_read(real_map);
2141			 vm_object_deallocate(object);
2142			 object = VM_OBJECT_NULL;
2143                         goto make_mem_done;
2144		}
2145
2146		mappable_size = fault_info.hi_offset - obj_off;
2147		total_size = map_entry->vme_end - map_entry->vme_start;
2148		if(map_size > mappable_size) {
2149			/* try to extend mappable size if the entries */
2150			/* following are from the same object and are */
2151			/* compatible */
2152			next_entry = map_entry->vme_next;
2153			/* lets see if the next map entry is still   */
2154			/* pointing at this object and is contiguous */
2155			while(map_size > mappable_size) {
2156				if((next_entry->object.vm_object == object) &&
2157					(next_entry->vme_start ==
2158						next_entry->vme_prev->vme_end) &&
2159					(next_entry->offset ==
2160					   next_entry->vme_prev->offset +
2161					   (next_entry->vme_prev->vme_end -
2162				 	   next_entry->vme_prev->vme_start))) {
2163					if (mask_protections) {
2164						/*
2165						 * The caller asked us to use
2166						 * the "protections" as a mask,
2167						 * so restrict "protections" to
2168						 * what this mapping actually
2169						 * allows.
2170						 */
2171						protections &= next_entry->max_protection;
2172					}
2173					if ((next_entry->wired_count) &&
2174					    (map_entry->wired_count == 0)) {
2175						break;
2176					}
2177					if(((next_entry->max_protection)
2178						& protections) != protections) {
2179			 			break;
2180					}
2181					if (next_entry->needs_copy !=
2182					    map_entry->needs_copy)
2183						break;
2184					mappable_size += next_entry->vme_end
2185						- next_entry->vme_start;
2186					total_size += next_entry->vme_end
2187						- next_entry->vme_start;
2188					next_entry = next_entry->vme_next;
2189				} else {
2190					break;
2191				}
2192
2193			}
2194		}
2195
2196#if !CONFIG_EMBEDDED
2197		if (vm_map_entry_should_cow_for_true_share(map_entry) &&
2198		    object->vo_size > map_size &&
2199		    map_size != 0) {
2200			/*
2201			 * Set up the targeted range for copy-on-write to
2202			 * limit the impact of "true_share"/"copy_delay" to
2203			 * that range instead of the entire VM object...
2204			 */
2205
2206			vm_object_unlock(object);
2207			if (vm_map_lock_read_to_write(target_map)) {
2208				vm_object_deallocate(object);
2209				target_map = original_map;
2210				goto redo_lookup;
2211			}
2212
2213			vm_map_clip_start(target_map, map_entry, vm_map_trunc_page(offset));
2214			vm_map_clip_end(target_map, map_entry, vm_map_round_page(offset) + map_size);
2215			force_shadow = TRUE;
2216
2217			map_size = map_entry->vme_end - map_entry->vme_start;
2218			total_size = map_size;
2219
2220			vm_map_lock_write_to_read(target_map);
2221			vm_object_lock(object);
2222		}
2223#endif /* !CONFIG_EMBEDDED */
2224
2225		if(object->internal) {
2226	   		/* vm_map_lookup_locked will create a shadow if   */
2227		 	/* needs_copy is set but does not check for the   */
2228			/* other two conditions shown. It is important to */
2229			/* set up an object which will not be pulled from */
2230			/* under us.  */
2231
2232	      		if (force_shadow ||
2233			    ((map_entry->needs_copy  ||
2234			      object->shadowed ||
2235			      (object->vo_size > total_size)) &&
2236			     !object->true_share)) {
2237				/*
2238				 * We have to unlock the VM object before
2239				 * trying to upgrade the VM map lock, to
2240				 * honor lock ordering (map then object).
2241				 * Otherwise, we would deadlock if another
2242				 * thread holds a read lock on the VM map and
2243				 * is trying to acquire the VM object's lock.
2244				 * We still hold an extra reference on the
2245				 * VM object, guaranteeing that it won't
2246				 * disappear.
2247				 */
2248				vm_object_unlock(object);
2249
2250		   		if (vm_map_lock_read_to_write(target_map)) {
2251					/*
2252					 * We couldn't upgrade our VM map lock
2253					 * from "read" to "write" and we lost
2254					 * our "read" lock.
2255					 * Start all over again...
2256					 */
2257					vm_object_deallocate(object); /* extra ref */
2258					target_map = original_map;
2259		            		goto redo_lookup;
2260		   		}
2261				vm_object_lock(object);
2262
2263				/*
2264				 * JMM - We need to avoid coming here when the object
2265				 * is wired by anybody, not just the current map.  Why
2266				 * couldn't we use the standard vm_object_copy_quickly()
2267				 * approach here?
2268				 */
2269
2270		   		/* create a shadow object */
2271				vm_object_shadow(&map_entry->object.vm_object,
2272						 &map_entry->offset, total_size);
2273				shadow_object = map_entry->object.vm_object;
2274				vm_object_unlock(object);
2275
2276				prot = map_entry->protection & ~VM_PROT_WRITE;
2277
2278				if (override_nx(target_map, map_entry->alias) && prot)
2279				        prot |= VM_PROT_EXECUTE;
2280
2281				vm_object_pmap_protect(
2282					object, map_entry->offset,
2283					total_size,
2284					((map_entry->is_shared
2285					  || target_map->mapped_in_other_pmaps)
2286							? PMAP_NULL :
2287							target_map->pmap),
2288					map_entry->vme_start,
2289					prot);
2290				total_size -= (map_entry->vme_end
2291						- map_entry->vme_start);
2292				next_entry = map_entry->vme_next;
2293				map_entry->needs_copy = FALSE;
2294
2295				vm_object_lock(shadow_object);
2296				while (total_size) {
2297				    assert((next_entry->wired_count == 0) ||
2298					   (map_entry->wired_count));
2299
2300				   if(next_entry->object.vm_object == object) {
2301					vm_object_reference_locked(shadow_object);
2302					next_entry->object.vm_object
2303							= shadow_object;
2304					vm_object_deallocate(object);
2305					next_entry->offset
2306						= next_entry->vme_prev->offset +
2307						(next_entry->vme_prev->vme_end
2308						- next_entry->vme_prev->vme_start);
2309						next_entry->needs_copy = FALSE;
2310					} else {
2311						panic("mach_make_memory_entry_64:"
2312						  " map entries out of sync\n");
2313					}
2314					total_size -=
2315						next_entry->vme_end
2316							- next_entry->vme_start;
2317					next_entry = next_entry->vme_next;
2318				}
2319
2320				/*
2321				 * Transfer our extra reference to the
2322				 * shadow object.
2323				 */
2324				vm_object_reference_locked(shadow_object);
2325				vm_object_deallocate(object); /* extra ref */
2326				object = shadow_object;
2327
2328				obj_off = (local_offset - map_entry->vme_start)
2329							 + map_entry->offset;
2330
2331				vm_map_lock_write_to_read(target_map);
2332	        	}
2333	   	}
2334
2335		/* note: in the future we can (if necessary) allow for  */
2336		/* memory object lists, this will better support        */
2337		/* fragmentation, but is it necessary?  The user should */
2338		/* be encouraged to create address space oriented       */
2339		/* shared objects from CLEAN memory regions which have  */
2340		/* a known and defined history.  i.e. no inheritence    */
2341		/* share, make this call before making the region the   */
2342		/* target of ipc's, etc.  The code above, protecting    */
2343		/* against delayed copy, etc. is mostly defensive.      */
2344
2345		wimg_mode = object->wimg_bits;
2346		if(!(object->nophyscache)) {
2347			if(access == MAP_MEM_IO) {
2348				wimg_mode = VM_WIMG_IO;
2349			} else if (access == MAP_MEM_COPYBACK) {
2350				wimg_mode = VM_WIMG_USE_DEFAULT;
2351			} else if (access == MAP_MEM_INNERWBACK) {
2352				wimg_mode = VM_WIMG_INNERWBACK;
2353			} else if (access == MAP_MEM_WTHRU) {
2354				wimg_mode = VM_WIMG_WTHRU;
2355			} else if (access == MAP_MEM_WCOMB) {
2356				wimg_mode = VM_WIMG_WCOMB;
2357			}
2358		}
2359
2360		object->true_share = TRUE;
2361		if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2362			object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2363
2364		/*
2365		 * The memory entry now points to this VM object and we
2366		 * need to hold a reference on the VM object.  Use the extra
2367		 * reference we took earlier to keep the object alive when we
2368		 * had to unlock it.
2369		 */
2370
2371		vm_map_unlock_read(target_map);
2372		if(real_map != target_map)
2373			vm_map_unlock_read(real_map);
2374
2375		if (object->wimg_bits != wimg_mode)
2376			vm_object_change_wimg_mode(object, wimg_mode);
2377
2378		/* the size of mapped entry that overlaps with our region */
2379		/* which is targeted for share.                           */
2380		/* (entry_end - entry_start) -                            */
2381		/*                   offset of our beg addr within entry  */
2382		/* it corresponds to this:                                */
2383
2384		if(map_size > mappable_size)
2385			map_size = mappable_size;
2386
2387		if (permission & MAP_MEM_NAMED_REUSE) {
2388			/*
2389			 * Compare what we got with the "parent_entry".
2390			 * If they match, re-use the "parent_entry" instead
2391			 * of creating a new one.
2392			 */
2393			if (parent_entry != NULL &&
2394			    parent_entry->backing.object == object &&
2395			    parent_entry->internal == object->internal &&
2396			    parent_entry->is_sub_map == FALSE &&
2397			    parent_entry->is_pager == FALSE &&
2398			    parent_entry->offset == obj_off &&
2399			    parent_entry->protection == protections &&
2400			    parent_entry->size == map_size) {
2401				/*
2402				 * We have a match: re-use "parent_entry".
2403				 */
2404				/* release our extra reference on object */
2405				vm_object_unlock(object);
2406				vm_object_deallocate(object);
2407				/* parent_entry->ref_count++; XXX ? */
2408				/* Get an extra send-right on handle */
2409				ipc_port_copy_send(parent_handle);
2410				*object_handle = parent_handle;
2411				return KERN_SUCCESS;
2412			} else {
2413				/*
2414				 * No match: we need to create a new entry.
2415				 * fall through...
2416				 */
2417			}
2418		}
2419
2420		vm_object_unlock(object);
2421		if (mach_memory_entry_allocate(&user_entry, &user_handle)
2422		    != KERN_SUCCESS) {
2423			/* release our unused reference on the object */
2424			vm_object_deallocate(object);
2425			return KERN_FAILURE;
2426		}
2427
2428		user_entry->backing.object = object;
2429		user_entry->internal = object->internal;
2430		user_entry->is_sub_map = FALSE;
2431		user_entry->is_pager = FALSE;
2432		user_entry->offset = obj_off;
2433		user_entry->protection = protections;
2434		SET_MAP_MEM(GET_MAP_MEM(permission), user_entry->protection);
2435		user_entry->size = map_size;
2436
2437		/* user_object pager and internal fields are not used */
2438		/* when the object field is filled in.		      */
2439
2440		*size = CAST_DOWN(vm_size_t, map_size);
2441		*object_handle = user_handle;
2442		return KERN_SUCCESS;
2443
2444	} else {
2445		/* The new object will be base on an existing named object */
2446
2447		if (parent_entry == NULL) {
2448			kr = KERN_INVALID_ARGUMENT;
2449			goto make_mem_done;
2450		}
2451		if((offset + map_size) > parent_entry->size) {
2452			kr = KERN_INVALID_ARGUMENT;
2453			goto make_mem_done;
2454		}
2455
2456		if (mask_protections) {
2457			/*
2458			 * The caller asked us to use the "protections" as
2459			 * a mask, so restrict "protections" to what this
2460			 * mapping actually allows.
2461			 */
2462			protections &= parent_entry->protection;
2463		}
2464		if((protections & parent_entry->protection) != protections) {
2465			kr = KERN_PROTECTION_FAILURE;
2466			goto make_mem_done;
2467		}
2468
2469		if (mach_memory_entry_allocate(&user_entry, &user_handle)
2470		    != KERN_SUCCESS) {
2471			kr = KERN_FAILURE;
2472			goto make_mem_done;
2473		}
2474
2475		user_entry->size = map_size;
2476		user_entry->offset = parent_entry->offset + map_offset;
2477		user_entry->is_sub_map = parent_entry->is_sub_map;
2478		user_entry->is_pager = parent_entry->is_pager;
2479		user_entry->internal = parent_entry->internal;
2480		user_entry->protection = protections;
2481
2482		if(access != MAP_MEM_NOOP) {
2483		   SET_MAP_MEM(access, user_entry->protection);
2484		}
2485
2486		if(parent_entry->is_sub_map) {
2487		   user_entry->backing.map = parent_entry->backing.map;
2488		   vm_map_lock(user_entry->backing.map);
2489		   user_entry->backing.map->ref_count++;
2490		   vm_map_unlock(user_entry->backing.map);
2491		}
2492		else if (parent_entry->is_pager) {
2493		   user_entry->backing.pager = parent_entry->backing.pager;
2494		   /* JMM - don't we need a reference here? */
2495		} else {
2496		   object = parent_entry->backing.object;
2497		   assert(object != VM_OBJECT_NULL);
2498		   user_entry->backing.object = object;
2499		   /* we now point to this object, hold on */
2500		   vm_object_reference(object);
2501		   vm_object_lock(object);
2502		   object->true_share = TRUE;
2503		   if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2504			object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2505		   vm_object_unlock(object);
2506		}
2507		*size = CAST_DOWN(vm_size_t, map_size);
2508		*object_handle = user_handle;
2509		return KERN_SUCCESS;
2510	}
2511
2512make_mem_done:
2513	if (user_handle != IP_NULL) {
2514		/*
2515		 * Releasing "user_handle" causes the kernel object
2516		 * associated with it ("user_entry" here) to also be
2517		 * released and freed.
2518		 */
2519		mach_memory_entry_port_release(user_handle);
2520	}
2521	return kr;
2522}
2523
2524kern_return_t
2525_mach_make_memory_entry(
2526	vm_map_t		target_map,
2527	memory_object_size_t	*size,
2528	memory_object_offset_t	offset,
2529	vm_prot_t		permission,
2530	ipc_port_t		*object_handle,
2531	ipc_port_t		parent_entry)
2532{
2533	memory_object_size_t 	mo_size;
2534	kern_return_t		kr;
2535
2536	mo_size = (memory_object_size_t)*size;
2537	kr = mach_make_memory_entry_64(target_map, &mo_size,
2538			(memory_object_offset_t)offset, permission, object_handle,
2539			parent_entry);
2540	*size = mo_size;
2541	return kr;
2542}
2543
2544kern_return_t
2545mach_make_memory_entry(
2546	vm_map_t		target_map,
2547	vm_size_t		*size,
2548	vm_offset_t		offset,
2549	vm_prot_t		permission,
2550	ipc_port_t		*object_handle,
2551	ipc_port_t		parent_entry)
2552{
2553	memory_object_size_t 	mo_size;
2554	kern_return_t		kr;
2555
2556	mo_size = (memory_object_size_t)*size;
2557	kr = mach_make_memory_entry_64(target_map, &mo_size,
2558			(memory_object_offset_t)offset, permission, object_handle,
2559			parent_entry);
2560	*size = CAST_DOWN(vm_size_t, mo_size);
2561	return kr;
2562}
2563
2564/*
2565 *	task_wire
2566 *
2567 *	Set or clear the map's wiring_required flag.  This flag, if set,
2568 *	will cause all future virtual memory allocation to allocate
2569 *	user wired memory.  Unwiring pages wired down as a result of
2570 *	this routine is done with the vm_wire interface.
2571 */
2572kern_return_t
2573task_wire(
2574	vm_map_t	map,
2575	boolean_t	must_wire)
2576{
2577	if (map == VM_MAP_NULL)
2578		return(KERN_INVALID_ARGUMENT);
2579
2580	if (must_wire)
2581		map->wiring_required = TRUE;
2582	else
2583		map->wiring_required = FALSE;
2584
2585	return(KERN_SUCCESS);
2586}
2587
2588__private_extern__ kern_return_t
2589mach_memory_entry_allocate(
2590	vm_named_entry_t	*user_entry_p,
2591	ipc_port_t		*user_handle_p)
2592{
2593	vm_named_entry_t	user_entry;
2594	ipc_port_t		user_handle;
2595	ipc_port_t		previous;
2596
2597	user_entry = (vm_named_entry_t) kalloc(sizeof *user_entry);
2598	if (user_entry == NULL)
2599		return KERN_FAILURE;
2600
2601	named_entry_lock_init(user_entry);
2602
2603	user_handle = ipc_port_alloc_kernel();
2604	if (user_handle == IP_NULL) {
2605		kfree(user_entry, sizeof *user_entry);
2606		return KERN_FAILURE;
2607	}
2608	ip_lock(user_handle);
2609
2610	/* make a sonce right */
2611	user_handle->ip_sorights++;
2612	ip_reference(user_handle);
2613
2614	user_handle->ip_destination = IP_NULL;
2615	user_handle->ip_receiver_name = MACH_PORT_NULL;
2616	user_handle->ip_receiver = ipc_space_kernel;
2617
2618	/* make a send right */
2619        user_handle->ip_mscount++;
2620        user_handle->ip_srights++;
2621        ip_reference(user_handle);
2622
2623	ipc_port_nsrequest(user_handle, 1, user_handle, &previous);
2624	/* nsrequest unlocks user_handle */
2625
2626	user_entry->backing.pager = NULL;
2627	user_entry->is_sub_map = FALSE;
2628	user_entry->is_pager = FALSE;
2629	user_entry->internal = FALSE;
2630	user_entry->size = 0;
2631	user_entry->offset = 0;
2632	user_entry->protection = VM_PROT_NONE;
2633	user_entry->ref_count = 1;
2634
2635	ipc_kobject_set(user_handle, (ipc_kobject_t) user_entry,
2636			IKOT_NAMED_ENTRY);
2637
2638	*user_entry_p = user_entry;
2639	*user_handle_p = user_handle;
2640
2641	return KERN_SUCCESS;
2642}
2643
2644/*
2645 *	mach_memory_object_memory_entry_64
2646 *
2647 *	Create a named entry backed by the provided pager.
2648 *
2649 *	JMM - we need to hold a reference on the pager -
2650 *	and release it when the named entry is destroyed.
2651 */
2652kern_return_t
2653mach_memory_object_memory_entry_64(
2654	host_t			host,
2655	boolean_t		internal,
2656	vm_object_offset_t	size,
2657	vm_prot_t		permission,
2658 	memory_object_t		pager,
2659	ipc_port_t		*entry_handle)
2660{
2661	unsigned int		access;
2662	vm_named_entry_t	user_entry;
2663	ipc_port_t		user_handle;
2664
2665        if (host == HOST_NULL)
2666                return(KERN_INVALID_HOST);
2667
2668	if (mach_memory_entry_allocate(&user_entry, &user_handle)
2669	    != KERN_SUCCESS) {
2670		return KERN_FAILURE;
2671	}
2672
2673	user_entry->backing.pager = pager;
2674	user_entry->size = size;
2675	user_entry->offset = 0;
2676	user_entry->protection = permission & VM_PROT_ALL;
2677	access = GET_MAP_MEM(permission);
2678	SET_MAP_MEM(access, user_entry->protection);
2679	user_entry->internal = internal;
2680	user_entry->is_sub_map = FALSE;
2681	user_entry->is_pager = TRUE;
2682	assert(user_entry->ref_count == 1);
2683
2684	*entry_handle = user_handle;
2685	return KERN_SUCCESS;
2686}
2687
2688kern_return_t
2689mach_memory_object_memory_entry(
2690	host_t		host,
2691	boolean_t	internal,
2692	vm_size_t	size,
2693	vm_prot_t	permission,
2694 	memory_object_t	pager,
2695	ipc_port_t	*entry_handle)
2696{
2697	return mach_memory_object_memory_entry_64( host, internal,
2698		(vm_object_offset_t)size, permission, pager, entry_handle);
2699}
2700
2701
2702kern_return_t
2703mach_memory_entry_purgable_control(
2704	ipc_port_t	entry_port,
2705	vm_purgable_t	control,
2706	int		*state)
2707{
2708	kern_return_t		kr;
2709	vm_named_entry_t	mem_entry;
2710	vm_object_t		object;
2711
2712	if (entry_port == IP_NULL ||
2713	    ip_kotype(entry_port) != IKOT_NAMED_ENTRY) {
2714		return KERN_INVALID_ARGUMENT;
2715	}
2716	if (control != VM_PURGABLE_SET_STATE &&
2717	    control != VM_PURGABLE_GET_STATE)
2718		return(KERN_INVALID_ARGUMENT);
2719
2720	if (control == VM_PURGABLE_SET_STATE &&
2721	    (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
2722	     ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
2723		return(KERN_INVALID_ARGUMENT);
2724
2725	mem_entry = (vm_named_entry_t) entry_port->ip_kobject;
2726
2727	named_entry_lock(mem_entry);
2728
2729	if (mem_entry->is_sub_map || mem_entry->is_pager) {
2730		named_entry_unlock(mem_entry);
2731		return KERN_INVALID_ARGUMENT;
2732	}
2733
2734	object = mem_entry->backing.object;
2735	if (object == VM_OBJECT_NULL) {
2736		named_entry_unlock(mem_entry);
2737		return KERN_INVALID_ARGUMENT;
2738	}
2739
2740	vm_object_lock(object);
2741
2742	/* check that named entry covers entire object ? */
2743	if (mem_entry->offset != 0 || object->vo_size != mem_entry->size) {
2744		vm_object_unlock(object);
2745		named_entry_unlock(mem_entry);
2746		return KERN_INVALID_ARGUMENT;
2747	}
2748
2749	named_entry_unlock(mem_entry);
2750
2751	kr = vm_object_purgable_control(object, control, state);
2752
2753	vm_object_unlock(object);
2754
2755	return kr;
2756}
2757
2758/*
2759 * mach_memory_entry_port_release:
2760 *
2761 * Release a send right on a named entry port.  This is the correct
2762 * way to destroy a named entry.  When the last right on the port is
2763 * released, ipc_kobject_destroy() will call mach_destroy_memory_entry().
2764 */
2765void
2766mach_memory_entry_port_release(
2767	ipc_port_t	port)
2768{
2769	assert(ip_kotype(port) == IKOT_NAMED_ENTRY);
2770	ipc_port_release_send(port);
2771}
2772
2773/*
2774 * mach_destroy_memory_entry:
2775 *
2776 * Drops a reference on a memory entry and destroys the memory entry if
2777 * there are no more references on it.
2778 * NOTE: This routine should not be called to destroy a memory entry from the
2779 * kernel, as it will not release the Mach port associated with the memory
2780 * entry.  The proper way to destroy a memory entry in the kernel is to
2781 * call mach_memort_entry_port_release() to release the kernel's send-right on
2782 * the memory entry's port.  When the last send right is released, the memory
2783 * entry will be destroyed via ipc_kobject_destroy().
2784 */
2785void
2786mach_destroy_memory_entry(
2787	ipc_port_t	port)
2788{
2789	vm_named_entry_t	named_entry;
2790#if MACH_ASSERT
2791	assert(ip_kotype(port) == IKOT_NAMED_ENTRY);
2792#endif /* MACH_ASSERT */
2793	named_entry = (vm_named_entry_t)port->ip_kobject;
2794
2795	named_entry_lock(named_entry);
2796	named_entry->ref_count -= 1;
2797
2798	if(named_entry->ref_count == 0) {
2799		if (named_entry->is_sub_map) {
2800			vm_map_deallocate(named_entry->backing.map);
2801		} else if (!named_entry->is_pager) {
2802			/* release the memory object we've been pointing to */
2803			vm_object_deallocate(named_entry->backing.object);
2804		} /* else JMM - need to drop reference on pager in that case */
2805
2806		named_entry_unlock(named_entry);
2807		named_entry_lock_destroy(named_entry);
2808
2809		kfree((void *) port->ip_kobject,
2810		      sizeof (struct vm_named_entry));
2811	} else
2812		named_entry_unlock(named_entry);
2813}
2814
2815/* Allow manipulation of individual page state.  This is actually part of */
2816/* the UPL regimen but takes place on the memory entry rather than on a UPL */
2817
2818kern_return_t
2819mach_memory_entry_page_op(
2820	ipc_port_t		entry_port,
2821	vm_object_offset_t	offset,
2822	int			ops,
2823	ppnum_t			*phys_entry,
2824	int			*flags)
2825{
2826	vm_named_entry_t	mem_entry;
2827	vm_object_t		object;
2828	kern_return_t		kr;
2829
2830	if (entry_port == IP_NULL ||
2831	    ip_kotype(entry_port) != IKOT_NAMED_ENTRY) {
2832		return KERN_INVALID_ARGUMENT;
2833	}
2834
2835	mem_entry = (vm_named_entry_t) entry_port->ip_kobject;
2836
2837	named_entry_lock(mem_entry);
2838
2839	if (mem_entry->is_sub_map || mem_entry->is_pager) {
2840		named_entry_unlock(mem_entry);
2841		return KERN_INVALID_ARGUMENT;
2842	}
2843
2844	object = mem_entry->backing.object;
2845	if (object == VM_OBJECT_NULL) {
2846		named_entry_unlock(mem_entry);
2847		return KERN_INVALID_ARGUMENT;
2848	}
2849
2850	vm_object_reference(object);
2851	named_entry_unlock(mem_entry);
2852
2853	kr = vm_object_page_op(object, offset, ops, phys_entry, flags);
2854
2855	vm_object_deallocate(object);
2856
2857	return kr;
2858}
2859
2860/*
2861 * mach_memory_entry_range_op offers performance enhancement over
2862 * mach_memory_entry_page_op for page_op functions which do not require page
2863 * level state to be returned from the call.  Page_op was created to provide
2864 * a low-cost alternative to page manipulation via UPLs when only a single
2865 * page was involved.  The range_op call establishes the ability in the _op
2866 * family of functions to work on multiple pages where the lack of page level
2867 * state handling allows the caller to avoid the overhead of the upl structures.
2868 */
2869
2870kern_return_t
2871mach_memory_entry_range_op(
2872	ipc_port_t		entry_port,
2873	vm_object_offset_t	offset_beg,
2874	vm_object_offset_t	offset_end,
2875	int                     ops,
2876	int                     *range)
2877{
2878	vm_named_entry_t	mem_entry;
2879	vm_object_t		object;
2880	kern_return_t		kr;
2881
2882	if (entry_port == IP_NULL ||
2883	    ip_kotype(entry_port) != IKOT_NAMED_ENTRY) {
2884		return KERN_INVALID_ARGUMENT;
2885	}
2886
2887	mem_entry = (vm_named_entry_t) entry_port->ip_kobject;
2888
2889	named_entry_lock(mem_entry);
2890
2891	if (mem_entry->is_sub_map || mem_entry->is_pager) {
2892		named_entry_unlock(mem_entry);
2893		return KERN_INVALID_ARGUMENT;
2894	}
2895
2896	object = mem_entry->backing.object;
2897	if (object == VM_OBJECT_NULL) {
2898		named_entry_unlock(mem_entry);
2899		return KERN_INVALID_ARGUMENT;
2900	}
2901
2902	vm_object_reference(object);
2903	named_entry_unlock(mem_entry);
2904
2905	kr = vm_object_range_op(object,
2906				offset_beg,
2907				offset_end,
2908				ops,
2909				(uint32_t *) range);
2910
2911	vm_object_deallocate(object);
2912
2913	return kr;
2914}
2915
2916
2917kern_return_t
2918set_dp_control_port(
2919	host_priv_t	host_priv,
2920	ipc_port_t	control_port)
2921{
2922        if (host_priv == HOST_PRIV_NULL)
2923                return (KERN_INVALID_HOST);
2924
2925	if (IP_VALID(dynamic_pager_control_port))
2926		ipc_port_release_send(dynamic_pager_control_port);
2927
2928	dynamic_pager_control_port = control_port;
2929	return KERN_SUCCESS;
2930}
2931
2932kern_return_t
2933get_dp_control_port(
2934	host_priv_t	host_priv,
2935	ipc_port_t	*control_port)
2936{
2937        if (host_priv == HOST_PRIV_NULL)
2938                return (KERN_INVALID_HOST);
2939
2940	*control_port = ipc_port_copy_send(dynamic_pager_control_port);
2941	return KERN_SUCCESS;
2942
2943}
2944
2945/* ******* Temporary Internal calls to UPL for BSD ***** */
2946
2947extern int kernel_upl_map(
2948	vm_map_t        map,
2949	upl_t           upl,
2950	vm_offset_t     *dst_addr);
2951
2952extern int kernel_upl_unmap(
2953	vm_map_t        map,
2954	upl_t           upl);
2955
2956extern int kernel_upl_commit(
2957	upl_t                   upl,
2958	upl_page_info_t         *pl,
2959	mach_msg_type_number_t	 count);
2960
2961extern int kernel_upl_commit_range(
2962	upl_t                   upl,
2963	upl_offset_t             offset,
2964	upl_size_t		size,
2965	int			flags,
2966	upl_page_info_array_t	pl,
2967	mach_msg_type_number_t	count);
2968
2969extern int kernel_upl_abort(
2970	upl_t                   upl,
2971	int                     abort_type);
2972
2973extern int kernel_upl_abort_range(
2974	upl_t                   upl,
2975	upl_offset_t             offset,
2976	upl_size_t               size,
2977	int                     abort_flags);
2978
2979
2980kern_return_t
2981kernel_upl_map(
2982	vm_map_t	map,
2983	upl_t		upl,
2984	vm_offset_t	*dst_addr)
2985{
2986	return vm_upl_map(map, upl, dst_addr);
2987}
2988
2989
2990kern_return_t
2991kernel_upl_unmap(
2992	vm_map_t	map,
2993	upl_t		upl)
2994{
2995	return vm_upl_unmap(map, upl);
2996}
2997
2998kern_return_t
2999kernel_upl_commit(
3000	upl_t                   upl,
3001	upl_page_info_t        *pl,
3002	mach_msg_type_number_t  count)
3003{
3004	kern_return_t 	kr;
3005
3006	kr = upl_commit(upl, pl, count);
3007	upl_deallocate(upl);
3008	return kr;
3009}
3010
3011
3012kern_return_t
3013kernel_upl_commit_range(
3014	upl_t 			upl,
3015	upl_offset_t		offset,
3016	upl_size_t		size,
3017	int			flags,
3018	upl_page_info_array_t   pl,
3019	mach_msg_type_number_t  count)
3020{
3021	boolean_t		finished = FALSE;
3022	kern_return_t 		kr;
3023
3024	if (flags & UPL_COMMIT_FREE_ON_EMPTY)
3025		flags |= UPL_COMMIT_NOTIFY_EMPTY;
3026
3027	if (flags & UPL_COMMIT_KERNEL_ONLY_FLAGS) {
3028		return KERN_INVALID_ARGUMENT;
3029	}
3030
3031	kr = upl_commit_range(upl, offset, size, flags, pl, count, &finished);
3032
3033	if ((flags & UPL_COMMIT_NOTIFY_EMPTY) && finished)
3034		upl_deallocate(upl);
3035
3036	return kr;
3037}
3038
3039kern_return_t
3040kernel_upl_abort_range(
3041	upl_t			upl,
3042	upl_offset_t		offset,
3043	upl_size_t		size,
3044	int			abort_flags)
3045{
3046	kern_return_t 		kr;
3047	boolean_t		finished = FALSE;
3048
3049	if (abort_flags & UPL_COMMIT_FREE_ON_EMPTY)
3050		abort_flags |= UPL_COMMIT_NOTIFY_EMPTY;
3051
3052	kr = upl_abort_range(upl, offset, size, abort_flags, &finished);
3053
3054	if ((abort_flags & UPL_COMMIT_FREE_ON_EMPTY) && finished)
3055		upl_deallocate(upl);
3056
3057	return kr;
3058}
3059
3060kern_return_t
3061kernel_upl_abort(
3062	upl_t			upl,
3063	int			abort_type)
3064{
3065	kern_return_t	kr;
3066
3067	kr = upl_abort(upl, abort_type);
3068	upl_deallocate(upl);
3069	return kr;
3070}
3071
3072/*
3073 * Now a kernel-private interface (for BootCache
3074 * use only).  Need a cleaner way to create an
3075 * empty vm_map() and return a handle to it.
3076 */
3077
3078kern_return_t
3079vm_region_object_create(
3080	__unused vm_map_t	target_map,
3081	vm_size_t		size,
3082	ipc_port_t		*object_handle)
3083{
3084	vm_named_entry_t	user_entry;
3085	ipc_port_t		user_handle;
3086
3087	vm_map_t	new_map;
3088
3089	if (mach_memory_entry_allocate(&user_entry, &user_handle)
3090	    != KERN_SUCCESS) {
3091		return KERN_FAILURE;
3092	}
3093
3094	/* Create a named object based on a submap of specified size */
3095
3096	new_map = vm_map_create(PMAP_NULL, VM_MAP_MIN_ADDRESS,
3097				vm_map_round_page(size), TRUE);
3098
3099	user_entry->backing.map = new_map;
3100	user_entry->internal = TRUE;
3101	user_entry->is_sub_map = TRUE;
3102	user_entry->offset = 0;
3103	user_entry->protection = VM_PROT_ALL;
3104	user_entry->size = size;
3105	assert(user_entry->ref_count == 1);
3106
3107	*object_handle = user_handle;
3108	return KERN_SUCCESS;
3109
3110}
3111
3112ppnum_t vm_map_get_phys_page(		/* forward */
3113	vm_map_t	map,
3114	vm_offset_t	offset);
3115
3116ppnum_t
3117vm_map_get_phys_page(
3118	vm_map_t		map,
3119	vm_offset_t		addr)
3120{
3121	vm_object_offset_t	offset;
3122	vm_object_t		object;
3123	vm_map_offset_t 	map_offset;
3124	vm_map_entry_t		entry;
3125	ppnum_t			phys_page = 0;
3126
3127	map_offset = vm_map_trunc_page(addr);
3128
3129	vm_map_lock(map);
3130	while (vm_map_lookup_entry(map, map_offset, &entry)) {
3131
3132		if (entry->object.vm_object == VM_OBJECT_NULL) {
3133			vm_map_unlock(map);
3134			return (ppnum_t) 0;
3135		}
3136		if (entry->is_sub_map) {
3137			vm_map_t	old_map;
3138			vm_map_lock(entry->object.sub_map);
3139			old_map = map;
3140			map = entry->object.sub_map;
3141			map_offset = entry->offset + (map_offset - entry->vme_start);
3142			vm_map_unlock(old_map);
3143			continue;
3144		}
3145		if (entry->object.vm_object->phys_contiguous) {
3146			/* These are  not standard pageable memory mappings */
3147			/* If they are not present in the object they will  */
3148			/* have to be picked up from the pager through the  */
3149			/* fault mechanism.  */
3150			if(entry->object.vm_object->vo_shadow_offset == 0) {
3151				/* need to call vm_fault */
3152				vm_map_unlock(map);
3153				vm_fault(map, map_offset, VM_PROT_NONE,
3154					FALSE, THREAD_UNINT, NULL, 0);
3155				vm_map_lock(map);
3156				continue;
3157			}
3158			offset = entry->offset + (map_offset - entry->vme_start);
3159			phys_page = (ppnum_t)
3160				((entry->object.vm_object->vo_shadow_offset
3161							+ offset) >> 12);
3162			break;
3163
3164		}
3165		offset = entry->offset + (map_offset - entry->vme_start);
3166		object = entry->object.vm_object;
3167		vm_object_lock(object);
3168		while (TRUE) {
3169			vm_page_t dst_page = vm_page_lookup(object,offset);
3170	                if(dst_page == VM_PAGE_NULL) {
3171				if(object->shadow) {
3172					vm_object_t old_object;
3173					vm_object_lock(object->shadow);
3174					old_object = object;
3175					offset = offset + object->vo_shadow_offset;
3176					object = object->shadow;
3177					vm_object_unlock(old_object);
3178				} else {
3179					vm_object_unlock(object);
3180					break;
3181				}
3182			} else {
3183				phys_page = (ppnum_t)(dst_page->phys_page);
3184				vm_object_unlock(object);
3185				break;
3186			}
3187		}
3188		break;
3189
3190	}
3191
3192	vm_map_unlock(map);
3193	return phys_page;
3194}
3195
3196
3197
3198kern_return_t kernel_object_iopl_request(	/* forward */
3199	vm_named_entry_t	named_entry,
3200	memory_object_offset_t	offset,
3201	upl_size_t		*upl_size,
3202	upl_t			*upl_ptr,
3203	upl_page_info_array_t	user_page_list,
3204	unsigned int		*page_list_count,
3205	int			*flags);
3206
3207kern_return_t
3208kernel_object_iopl_request(
3209	vm_named_entry_t	named_entry,
3210	memory_object_offset_t	offset,
3211	upl_size_t		*upl_size,
3212	upl_t			*upl_ptr,
3213	upl_page_info_array_t	user_page_list,
3214	unsigned int		*page_list_count,
3215	int			*flags)
3216{
3217	vm_object_t		object;
3218	kern_return_t		ret;
3219
3220	int			caller_flags;
3221
3222	caller_flags = *flags;
3223
3224	if (caller_flags & ~UPL_VALID_FLAGS) {
3225		/*
3226		 * For forward compatibility's sake,
3227		 * reject any unknown flag.
3228		 */
3229		return KERN_INVALID_VALUE;
3230	}
3231
3232	/* a few checks to make sure user is obeying rules */
3233	if(*upl_size == 0) {
3234		if(offset >= named_entry->size)
3235			return(KERN_INVALID_RIGHT);
3236		*upl_size = (upl_size_t) (named_entry->size - offset);
3237		if (*upl_size != named_entry->size - offset)
3238			return KERN_INVALID_ARGUMENT;
3239	}
3240	if(caller_flags & UPL_COPYOUT_FROM) {
3241		if((named_entry->protection & VM_PROT_READ)
3242					!= VM_PROT_READ) {
3243			return(KERN_INVALID_RIGHT);
3244		}
3245	} else {
3246		if((named_entry->protection &
3247			(VM_PROT_READ | VM_PROT_WRITE))
3248			!= (VM_PROT_READ | VM_PROT_WRITE)) {
3249			return(KERN_INVALID_RIGHT);
3250		}
3251	}
3252	if(named_entry->size < (offset + *upl_size))
3253		return(KERN_INVALID_ARGUMENT);
3254
3255	/* the callers parameter offset is defined to be the */
3256	/* offset from beginning of named entry offset in object */
3257	offset = offset + named_entry->offset;
3258
3259	if(named_entry->is_sub_map)
3260		return (KERN_INVALID_ARGUMENT);
3261
3262	named_entry_lock(named_entry);
3263
3264	if (named_entry->is_pager) {
3265		object = vm_object_enter(named_entry->backing.pager,
3266				named_entry->offset + named_entry->size,
3267				named_entry->internal,
3268				FALSE,
3269				FALSE);
3270		if (object == VM_OBJECT_NULL) {
3271			named_entry_unlock(named_entry);
3272			return(KERN_INVALID_OBJECT);
3273		}
3274
3275		/* JMM - drop reference on the pager here? */
3276
3277		/* create an extra reference for the object */
3278		vm_object_lock(object);
3279		vm_object_reference_locked(object);
3280		named_entry->backing.object = object;
3281		named_entry->is_pager = FALSE;
3282		named_entry_unlock(named_entry);
3283
3284		/* wait for object (if any) to be ready */
3285		if (!named_entry->internal) {
3286			while (!object->pager_ready) {
3287				vm_object_wait(object,
3288					       VM_OBJECT_EVENT_PAGER_READY,
3289					       THREAD_UNINT);
3290				vm_object_lock(object);
3291			}
3292		}
3293		vm_object_unlock(object);
3294
3295	} else {
3296		/* This is the case where we are going to operate */
3297		/* an an already known object.  If the object is */
3298		/* not ready it is internal.  An external     */
3299		/* object cannot be mapped until it is ready  */
3300		/* we can therefore avoid the ready check     */
3301		/* in this case.  */
3302		object = named_entry->backing.object;
3303		vm_object_reference(object);
3304		named_entry_unlock(named_entry);
3305	}
3306
3307	if (!object->private) {
3308		if (*upl_size > (MAX_UPL_TRANSFER*PAGE_SIZE))
3309			*upl_size = (MAX_UPL_TRANSFER*PAGE_SIZE);
3310		if (object->phys_contiguous) {
3311			*flags = UPL_PHYS_CONTIG;
3312		} else {
3313			*flags = 0;
3314		}
3315	} else {
3316		*flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG;
3317	}
3318
3319	ret = vm_object_iopl_request(object,
3320				     offset,
3321				     *upl_size,
3322				     upl_ptr,
3323				     user_page_list,
3324				     page_list_count,
3325				     caller_flags);
3326	vm_object_deallocate(object);
3327	return ret;
3328}
3329