1/*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
49 *  School of Computer Science
50 *  Carnegie Mellon University
51 *  Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 *	File:	vm/vm_user.c
60 *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 *	User-exported virtual memory functions.
63 */
64
65/*
66 * There are three implementations of the "XXX_allocate" functionality in
67 * the kernel: mach_vm_allocate (for any task on the platform), vm_allocate
68 * (for a task with the same address space size, especially the current task),
69 * and vm32_vm_allocate (for the specific case of a 32-bit task). vm_allocate
70 * in the kernel should only be used on the kernel_task. vm32_vm_allocate only
71 * makes sense on platforms where a user task can either be 32 or 64, or the kernel
72 * task can be 32 or 64. mach_vm_allocate makes sense everywhere, and is preferred
73 * for new code.
74 *
75 * The entrypoints into the kernel are more complex. All platforms support a
76 * mach_vm_allocate-style API (subsystem 4800) which operates with the largest
77 * size types for the platform. On platforms that only support U32/K32,
78 * subsystem 4800 is all you need. On platforms that support both U32 and U64,
79 * subsystem 3800 is used disambiguate the size of parameters, and they will
80 * always be 32-bit and call into the vm32_vm_allocate APIs. On non-U32/K32 platforms,
81 * the MIG glue should never call into vm_allocate directly, because the calling
82 * task and kernel_task are unlikely to use the same size parameters
83 *
84 * New VM call implementations should be added here and to mach_vm.defs
85 * (subsystem 4800), and use mach_vm_* "wide" types.
86 */
87
88#include <debug.h>
89
90#include <vm_cpm.h>
91#include <mach/boolean.h>
92#include <mach/kern_return.h>
93#include <mach/mach_types.h>	/* to get vm_address_t */
94#include <mach/memory_object.h>
95#include <mach/std_types.h>	/* to get pointer_t */
96#include <mach/upl.h>
97#include <mach/vm_attributes.h>
98#include <mach/vm_param.h>
99#include <mach/vm_statistics.h>
100#include <mach/mach_syscalls.h>
101
102#include <mach/host_priv_server.h>
103#include <mach/mach_vm_server.h>
104#include <mach/vm_map_server.h>
105
106#include <kern/host.h>
107#include <kern/kalloc.h>
108#include <kern/task.h>
109#include <kern/misc_protos.h>
110#include <vm/vm_fault.h>
111#include <vm/vm_map.h>
112#include <vm/vm_object.h>
113#include <vm/vm_page.h>
114#include <vm/memory_object.h>
115#include <vm/vm_pageout.h>
116#include <vm/vm_protos.h>
117
118vm_size_t        upl_offset_to_pagelist = 0;
119
120#if	VM_CPM
121#include <vm/cpm.h>
122#endif	/* VM_CPM */
123
124ipc_port_t	dynamic_pager_control_port=NULL;
125
126/*
127 *	mach_vm_allocate allocates "zero fill" memory in the specfied
128 *	map.
129 */
130kern_return_t
131mach_vm_allocate(
132	vm_map_t		map,
133	mach_vm_offset_t	*addr,
134	mach_vm_size_t	size,
135	int			flags)
136{
137	vm_map_offset_t map_addr;
138	vm_map_size_t	map_size;
139	kern_return_t	result;
140	boolean_t	anywhere;
141
142	/* filter out any kernel-only flags */
143	if (flags & ~VM_FLAGS_USER_ALLOCATE)
144		return KERN_INVALID_ARGUMENT;
145
146	if (map == VM_MAP_NULL)
147		return(KERN_INVALID_ARGUMENT);
148	if (size == 0) {
149		*addr = 0;
150		return(KERN_SUCCESS);
151	}
152
153	anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
154	if (anywhere) {
155		/*
156		 * No specific address requested, so start candidate address
157		 * search at the minimum address in the map.  However, if that
158		 * minimum is 0, bump it up by PAGE_SIZE.  We want to limit
159		 * allocations of PAGEZERO to explicit requests since its
160		 * normal use is to catch dereferences of NULL and many
161		 * applications also treat pointers with a value of 0 as
162		 * special and suddenly having address 0 contain useable
163		 * memory would tend to confuse those applications.
164		 */
165		map_addr = vm_map_min(map);
166		if (map_addr == 0)
167			map_addr += VM_MAP_PAGE_SIZE(map);
168	} else
169		map_addr = vm_map_trunc_page(*addr,
170					     VM_MAP_PAGE_MASK(map));
171	map_size = vm_map_round_page(size,
172				     VM_MAP_PAGE_MASK(map));
173	if (map_size == 0) {
174	  return(KERN_INVALID_ARGUMENT);
175	}
176
177	result = vm_map_enter(
178			map,
179			&map_addr,
180			map_size,
181			(vm_map_offset_t)0,
182			flags,
183			VM_OBJECT_NULL,
184			(vm_object_offset_t)0,
185			FALSE,
186			VM_PROT_DEFAULT,
187			VM_PROT_ALL,
188			VM_INHERIT_DEFAULT);
189
190	*addr = map_addr;
191	return(result);
192}
193
194/*
195 *	vm_allocate
196 *	Legacy routine that allocates "zero fill" memory in the specfied
197 *	map (which is limited to the same size as the kernel).
198 */
199kern_return_t
200vm_allocate(
201	vm_map_t	map,
202	vm_offset_t	*addr,
203	vm_size_t	size,
204	int		flags)
205{
206	vm_map_offset_t map_addr;
207	vm_map_size_t	map_size;
208	kern_return_t	result;
209	boolean_t	anywhere;
210
211	/* filter out any kernel-only flags */
212	if (flags & ~VM_FLAGS_USER_ALLOCATE)
213		return KERN_INVALID_ARGUMENT;
214
215	if (map == VM_MAP_NULL)
216		return(KERN_INVALID_ARGUMENT);
217	if (size == 0) {
218		*addr = 0;
219		return(KERN_SUCCESS);
220	}
221
222	anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
223	if (anywhere) {
224		/*
225		 * No specific address requested, so start candidate address
226		 * search at the minimum address in the map.  However, if that
227		 * minimum is 0, bump it up by PAGE_SIZE.  We want to limit
228		 * allocations of PAGEZERO to explicit requests since its
229		 * normal use is to catch dereferences of NULL and many
230		 * applications also treat pointers with a value of 0 as
231		 * special and suddenly having address 0 contain useable
232		 * memory would tend to confuse those applications.
233		 */
234		map_addr = vm_map_min(map);
235		if (map_addr == 0)
236			map_addr += VM_MAP_PAGE_SIZE(map);
237	} else
238		map_addr = vm_map_trunc_page(*addr,
239					     VM_MAP_PAGE_MASK(map));
240	map_size = vm_map_round_page(size,
241				     VM_MAP_PAGE_MASK(map));
242	if (map_size == 0) {
243	  return(KERN_INVALID_ARGUMENT);
244	}
245
246	result = vm_map_enter(
247			map,
248			&map_addr,
249			map_size,
250			(vm_map_offset_t)0,
251			flags,
252			VM_OBJECT_NULL,
253			(vm_object_offset_t)0,
254			FALSE,
255			VM_PROT_DEFAULT,
256			VM_PROT_ALL,
257			VM_INHERIT_DEFAULT);
258
259	*addr = CAST_DOWN(vm_offset_t, map_addr);
260	return(result);
261}
262
263/*
264 *	mach_vm_deallocate -
265 *	deallocates the specified range of addresses in the
266 *	specified address map.
267 */
268kern_return_t
269mach_vm_deallocate(
270	vm_map_t		map,
271	mach_vm_offset_t	start,
272	mach_vm_size_t	size)
273{
274	if ((map == VM_MAP_NULL) || (start + size < start))
275		return(KERN_INVALID_ARGUMENT);
276
277	if (size == (mach_vm_offset_t) 0)
278		return(KERN_SUCCESS);
279
280	return(vm_map_remove(map,
281			     vm_map_trunc_page(start,
282					       VM_MAP_PAGE_MASK(map)),
283			     vm_map_round_page(start+size,
284					       VM_MAP_PAGE_MASK(map)),
285			     VM_MAP_NO_FLAGS));
286}
287
288/*
289 *	vm_deallocate -
290 *	deallocates the specified range of addresses in the
291 *	specified address map (limited to addresses the same
292 *	size as the kernel).
293 */
294kern_return_t
295vm_deallocate(
296	register vm_map_t	map,
297	vm_offset_t		start,
298	vm_size_t		size)
299{
300	if ((map == VM_MAP_NULL) || (start + size < start))
301		return(KERN_INVALID_ARGUMENT);
302
303	if (size == (vm_offset_t) 0)
304		return(KERN_SUCCESS);
305
306	return(vm_map_remove(map,
307			     vm_map_trunc_page(start,
308					       VM_MAP_PAGE_MASK(map)),
309			     vm_map_round_page(start+size,
310					       VM_MAP_PAGE_MASK(map)),
311			     VM_MAP_NO_FLAGS));
312}
313
314/*
315 *	mach_vm_inherit -
316 *	Sets the inheritance of the specified range in the
317 *	specified map.
318 */
319kern_return_t
320mach_vm_inherit(
321	vm_map_t		map,
322	mach_vm_offset_t	start,
323	mach_vm_size_t	size,
324	vm_inherit_t		new_inheritance)
325{
326	if ((map == VM_MAP_NULL) || (start + size < start) ||
327	    (new_inheritance > VM_INHERIT_LAST_VALID))
328                return(KERN_INVALID_ARGUMENT);
329
330	if (size == 0)
331		return KERN_SUCCESS;
332
333	return(vm_map_inherit(map,
334			      vm_map_trunc_page(start,
335						VM_MAP_PAGE_MASK(map)),
336			      vm_map_round_page(start+size,
337						VM_MAP_PAGE_MASK(map)),
338			      new_inheritance));
339}
340
341/*
342 *	vm_inherit -
343 *	Sets the inheritance of the specified range in the
344 *	specified map (range limited to addresses
345 */
346kern_return_t
347vm_inherit(
348	register vm_map_t	map,
349	vm_offset_t		start,
350	vm_size_t		size,
351	vm_inherit_t		new_inheritance)
352{
353	if ((map == VM_MAP_NULL) || (start + size < start) ||
354	    (new_inheritance > VM_INHERIT_LAST_VALID))
355                return(KERN_INVALID_ARGUMENT);
356
357	if (size == 0)
358		return KERN_SUCCESS;
359
360	return(vm_map_inherit(map,
361			      vm_map_trunc_page(start,
362						VM_MAP_PAGE_MASK(map)),
363			      vm_map_round_page(start+size,
364						VM_MAP_PAGE_MASK(map)),
365			      new_inheritance));
366}
367
368/*
369 *	mach_vm_protect -
370 *	Sets the protection of the specified range in the
371 *	specified map.
372 */
373
374kern_return_t
375mach_vm_protect(
376	vm_map_t		map,
377	mach_vm_offset_t	start,
378	mach_vm_size_t	size,
379	boolean_t		set_maximum,
380	vm_prot_t		new_protection)
381{
382	if ((map == VM_MAP_NULL) || (start + size < start) ||
383	    (new_protection & ~(VM_PROT_ALL | VM_PROT_COPY)))
384		return(KERN_INVALID_ARGUMENT);
385
386	if (size == 0)
387		return KERN_SUCCESS;
388
389	return(vm_map_protect(map,
390			      vm_map_trunc_page(start,
391						VM_MAP_PAGE_MASK(map)),
392			      vm_map_round_page(start+size,
393						VM_MAP_PAGE_MASK(map)),
394			      new_protection,
395			      set_maximum));
396}
397
398/*
399 *	vm_protect -
400 *	Sets the protection of the specified range in the
401 *	specified map. Addressability of the range limited
402 *	to the same size as the kernel.
403 */
404
405kern_return_t
406vm_protect(
407	vm_map_t		map,
408	vm_offset_t		start,
409	vm_size_t		size,
410	boolean_t		set_maximum,
411	vm_prot_t		new_protection)
412{
413	if ((map == VM_MAP_NULL) || (start + size < start) ||
414	    (new_protection & ~(VM_PROT_ALL | VM_PROT_COPY)))
415		return(KERN_INVALID_ARGUMENT);
416
417	if (size == 0)
418		return KERN_SUCCESS;
419
420	return(vm_map_protect(map,
421			      vm_map_trunc_page(start,
422						VM_MAP_PAGE_MASK(map)),
423			      vm_map_round_page(start+size,
424						VM_MAP_PAGE_MASK(map)),
425			      new_protection,
426			      set_maximum));
427}
428
429/*
430 * mach_vm_machine_attributes -
431 * Handle machine-specific attributes for a mapping, such
432 * as cachability, migrability, etc.
433 */
434kern_return_t
435mach_vm_machine_attribute(
436	vm_map_t			map,
437	mach_vm_address_t		addr,
438	mach_vm_size_t		size,
439	vm_machine_attribute_t	attribute,
440	vm_machine_attribute_val_t* value)		/* IN/OUT */
441{
442	if ((map == VM_MAP_NULL) || (addr + size < addr))
443		return(KERN_INVALID_ARGUMENT);
444
445	if (size == 0)
446		return KERN_SUCCESS;
447
448	return vm_map_machine_attribute(
449		map,
450		vm_map_trunc_page(addr,
451				  VM_MAP_PAGE_MASK(map)),
452		vm_map_round_page(addr+size,
453				  VM_MAP_PAGE_MASK(map)),
454		attribute,
455		value);
456}
457
458/*
459 * vm_machine_attribute -
460 * Handle machine-specific attributes for a mapping, such
461 * as cachability, migrability, etc. Limited addressability
462 * (same range limits as for the native kernel map).
463 */
464kern_return_t
465vm_machine_attribute(
466	vm_map_t	map,
467	vm_address_t	addr,
468	vm_size_t	size,
469	vm_machine_attribute_t	attribute,
470	vm_machine_attribute_val_t* value)		/* IN/OUT */
471{
472	if ((map == VM_MAP_NULL) || (addr + size < addr))
473		return(KERN_INVALID_ARGUMENT);
474
475	if (size == 0)
476		return KERN_SUCCESS;
477
478	return vm_map_machine_attribute(
479		map,
480		vm_map_trunc_page(addr,
481				  VM_MAP_PAGE_MASK(map)),
482		vm_map_round_page(addr+size,
483				  VM_MAP_PAGE_MASK(map)),
484		attribute,
485		value);
486}
487
488/*
489 * mach_vm_read -
490 * Read/copy a range from one address space and return it to the caller.
491 *
492 * It is assumed that the address for the returned memory is selected by
493 * the IPC implementation as part of receiving the reply to this call.
494 * If IPC isn't used, the caller must deal with the vm_map_copy_t object
495 * that gets returned.
496 *
497 * JMM - because of mach_msg_type_number_t, this call is limited to a
498 * single 4GB region at this time.
499 *
500 */
501kern_return_t
502mach_vm_read(
503	vm_map_t		map,
504	mach_vm_address_t	addr,
505	mach_vm_size_t	size,
506	pointer_t		*data,
507	mach_msg_type_number_t	*data_size)
508{
509	kern_return_t	error;
510	vm_map_copy_t	ipc_address;
511
512	if (map == VM_MAP_NULL)
513		return(KERN_INVALID_ARGUMENT);
514
515	if ((mach_msg_type_number_t) size != size)
516		return KERN_INVALID_ARGUMENT;
517
518	error = vm_map_copyin(map,
519			(vm_map_address_t)addr,
520			(vm_map_size_t)size,
521			FALSE,	/* src_destroy */
522			&ipc_address);
523
524	if (KERN_SUCCESS == error) {
525		*data = (pointer_t) ipc_address;
526		*data_size = (mach_msg_type_number_t) size;
527		assert(*data_size == size);
528	}
529	return(error);
530}
531
532/*
533 * vm_read -
534 * Read/copy a range from one address space and return it to the caller.
535 * Limited addressability (same range limits as for the native kernel map).
536 *
537 * It is assumed that the address for the returned memory is selected by
538 * the IPC implementation as part of receiving the reply to this call.
539 * If IPC isn't used, the caller must deal with the vm_map_copy_t object
540 * that gets returned.
541 */
542kern_return_t
543vm_read(
544	vm_map_t		map,
545	vm_address_t		addr,
546	vm_size_t		size,
547	pointer_t		*data,
548	mach_msg_type_number_t	*data_size)
549{
550	kern_return_t	error;
551	vm_map_copy_t	ipc_address;
552
553	if (map == VM_MAP_NULL)
554		return(KERN_INVALID_ARGUMENT);
555
556	if (size > (unsigned)(mach_msg_type_number_t) -1) {
557		/*
558		 * The kernel could handle a 64-bit "size" value, but
559		 * it could not return the size of the data in "*data_size"
560		 * without overflowing.
561		 * Let's reject this "size" as invalid.
562		 */
563		return KERN_INVALID_ARGUMENT;
564	}
565
566	error = vm_map_copyin(map,
567			(vm_map_address_t)addr,
568			(vm_map_size_t)size,
569			FALSE,	/* src_destroy */
570			&ipc_address);
571
572	if (KERN_SUCCESS == error) {
573		*data = (pointer_t) ipc_address;
574		*data_size = (mach_msg_type_number_t) size;
575		assert(*data_size == size);
576	}
577	return(error);
578}
579
580/*
581 * mach_vm_read_list -
582 * Read/copy a list of address ranges from specified map.
583 *
584 * MIG does not know how to deal with a returned array of
585 * vm_map_copy_t structures, so we have to do the copyout
586 * manually here.
587 */
588kern_return_t
589mach_vm_read_list(
590	vm_map_t			map,
591	mach_vm_read_entry_t		data_list,
592	natural_t			count)
593{
594	mach_msg_type_number_t	i;
595	kern_return_t	error;
596	vm_map_copy_t	copy;
597
598	if (map == VM_MAP_NULL ||
599	    count > VM_MAP_ENTRY_MAX)
600		return(KERN_INVALID_ARGUMENT);
601
602	error = KERN_SUCCESS;
603	for(i=0; i<count; i++) {
604		vm_map_address_t map_addr;
605		vm_map_size_t map_size;
606
607		map_addr = (vm_map_address_t)(data_list[i].address);
608		map_size = (vm_map_size_t)(data_list[i].size);
609
610		if(map_size != 0) {
611			error = vm_map_copyin(map,
612					map_addr,
613					map_size,
614					FALSE,	/* src_destroy */
615					&copy);
616			if (KERN_SUCCESS == error) {
617				error = vm_map_copyout(
618						current_task()->map,
619						&map_addr,
620						copy);
621				if (KERN_SUCCESS == error) {
622					data_list[i].address = map_addr;
623					continue;
624				}
625				vm_map_copy_discard(copy);
626			}
627		}
628		data_list[i].address = (mach_vm_address_t)0;
629		data_list[i].size = (mach_vm_size_t)0;
630	}
631	return(error);
632}
633
634/*
635 * vm_read_list -
636 * Read/copy a list of address ranges from specified map.
637 *
638 * MIG does not know how to deal with a returned array of
639 * vm_map_copy_t structures, so we have to do the copyout
640 * manually here.
641 *
642 * The source and destination ranges are limited to those
643 * that can be described with a vm_address_t (i.e. same
644 * size map as the kernel).
645 *
646 * JMM - If the result of the copyout is an address range
647 * that cannot be described with a vm_address_t (i.e. the
648 * caller had a larger address space but used this call
649 * anyway), it will result in a truncated address being
650 * returned (and a likely confused caller).
651 */
652
653kern_return_t
654vm_read_list(
655	vm_map_t		map,
656	vm_read_entry_t	data_list,
657	natural_t		count)
658{
659	mach_msg_type_number_t	i;
660	kern_return_t	error;
661	vm_map_copy_t	copy;
662
663	if (map == VM_MAP_NULL ||
664	    count > VM_MAP_ENTRY_MAX)
665		return(KERN_INVALID_ARGUMENT);
666
667	error = KERN_SUCCESS;
668	for(i=0; i<count; i++) {
669		vm_map_address_t map_addr;
670		vm_map_size_t map_size;
671
672		map_addr = (vm_map_address_t)(data_list[i].address);
673		map_size = (vm_map_size_t)(data_list[i].size);
674
675		if(map_size != 0) {
676			error = vm_map_copyin(map,
677					map_addr,
678					map_size,
679					FALSE,	/* src_destroy */
680					&copy);
681			if (KERN_SUCCESS == error) {
682				error = vm_map_copyout(current_task()->map,
683						&map_addr,
684						copy);
685				if (KERN_SUCCESS == error) {
686					data_list[i].address =
687						CAST_DOWN(vm_offset_t, map_addr);
688					continue;
689				}
690				vm_map_copy_discard(copy);
691			}
692		}
693		data_list[i].address = (mach_vm_address_t)0;
694		data_list[i].size = (mach_vm_size_t)0;
695	}
696	return(error);
697}
698
699/*
700 * mach_vm_read_overwrite -
701 * Overwrite a range of the current map with data from the specified
702 * map/address range.
703 *
704 * In making an assumption that the current thread is local, it is
705 * no longer cluster-safe without a fully supportive local proxy
706 * thread/task (but we don't support cluster's anymore so this is moot).
707 */
708
709kern_return_t
710mach_vm_read_overwrite(
711	vm_map_t		map,
712	mach_vm_address_t	address,
713	mach_vm_size_t	size,
714	mach_vm_address_t	data,
715	mach_vm_size_t	*data_size)
716{
717	kern_return_t	error;
718	vm_map_copy_t	copy;
719
720	if (map == VM_MAP_NULL)
721		return(KERN_INVALID_ARGUMENT);
722
723	error = vm_map_copyin(map, (vm_map_address_t)address,
724				(vm_map_size_t)size, FALSE, &copy);
725
726	if (KERN_SUCCESS == error) {
727		error = vm_map_copy_overwrite(current_thread()->map,
728 					(vm_map_address_t)data,
729					copy, FALSE);
730		if (KERN_SUCCESS == error) {
731			*data_size = size;
732			return error;
733		}
734		vm_map_copy_discard(copy);
735	}
736	return(error);
737}
738
739/*
740 * vm_read_overwrite -
741 * Overwrite a range of the current map with data from the specified
742 * map/address range.
743 *
744 * This routine adds the additional limitation that the source and
745 * destination ranges must be describable with vm_address_t values
746 * (i.e. the same size address spaces as the kernel, or at least the
747 * the ranges are in that first portion of the respective address
748 * spaces).
749 */
750
751kern_return_t
752vm_read_overwrite(
753	vm_map_t	map,
754	vm_address_t	address,
755	vm_size_t	size,
756	vm_address_t	data,
757	vm_size_t	*data_size)
758{
759	kern_return_t	error;
760	vm_map_copy_t	copy;
761
762	if (map == VM_MAP_NULL)
763		return(KERN_INVALID_ARGUMENT);
764
765	error = vm_map_copyin(map, (vm_map_address_t)address,
766				(vm_map_size_t)size, FALSE, &copy);
767
768	if (KERN_SUCCESS == error) {
769		error = vm_map_copy_overwrite(current_thread()->map,
770 					(vm_map_address_t)data,
771					copy, FALSE);
772		if (KERN_SUCCESS == error) {
773			*data_size = size;
774			return error;
775		}
776		vm_map_copy_discard(copy);
777	}
778	return(error);
779}
780
781
782/*
783 * mach_vm_write -
784 * Overwrite the specified address range with the data provided
785 * (from the current map).
786 */
787kern_return_t
788mach_vm_write(
789	vm_map_t			map,
790	mach_vm_address_t		address,
791	pointer_t			data,
792	__unused mach_msg_type_number_t	size)
793{
794	if (map == VM_MAP_NULL)
795		return KERN_INVALID_ARGUMENT;
796
797	return vm_map_copy_overwrite(map, (vm_map_address_t)address,
798		(vm_map_copy_t) data, FALSE /* interruptible XXX */);
799}
800
801/*
802 * vm_write -
803 * Overwrite the specified address range with the data provided
804 * (from the current map).
805 *
806 * The addressability of the range of addresses to overwrite is
807 * limited bu the use of a vm_address_t (same size as kernel map).
808 * Either the target map is also small, or the range is in the
809 * low addresses within it.
810 */
811kern_return_t
812vm_write(
813	vm_map_t			map,
814	vm_address_t			address,
815	pointer_t			data,
816	__unused mach_msg_type_number_t	size)
817{
818	if (map == VM_MAP_NULL)
819		return KERN_INVALID_ARGUMENT;
820
821	return vm_map_copy_overwrite(map, (vm_map_address_t)address,
822		(vm_map_copy_t) data, FALSE /* interruptible XXX */);
823}
824
825/*
826 * mach_vm_copy -
827 * Overwrite one range of the specified map with the contents of
828 * another range within that same map (i.e. both address ranges
829 * are "over there").
830 */
831kern_return_t
832mach_vm_copy(
833	vm_map_t		map,
834	mach_vm_address_t	source_address,
835	mach_vm_size_t	size,
836	mach_vm_address_t	dest_address)
837{
838	vm_map_copy_t copy;
839	kern_return_t kr;
840
841	if (map == VM_MAP_NULL)
842		return KERN_INVALID_ARGUMENT;
843
844	kr = vm_map_copyin(map, (vm_map_address_t)source_address,
845			   (vm_map_size_t)size, FALSE, &copy);
846
847	if (KERN_SUCCESS == kr) {
848		kr = vm_map_copy_overwrite(map,
849				(vm_map_address_t)dest_address,
850				copy, FALSE /* interruptible XXX */);
851
852		if (KERN_SUCCESS != kr)
853			vm_map_copy_discard(copy);
854	}
855	return kr;
856}
857
858kern_return_t
859vm_copy(
860	vm_map_t	map,
861	vm_address_t	source_address,
862	vm_size_t	size,
863	vm_address_t	dest_address)
864{
865	vm_map_copy_t copy;
866	kern_return_t kr;
867
868	if (map == VM_MAP_NULL)
869		return KERN_INVALID_ARGUMENT;
870
871	kr = vm_map_copyin(map, (vm_map_address_t)source_address,
872			   (vm_map_size_t)size, FALSE, &copy);
873
874	if (KERN_SUCCESS == kr) {
875		kr = vm_map_copy_overwrite(map,
876				(vm_map_address_t)dest_address,
877				copy, FALSE /* interruptible XXX */);
878
879		if (KERN_SUCCESS != kr)
880			vm_map_copy_discard(copy);
881	}
882	return kr;
883}
884
885/*
886 * mach_vm_map -
887 * Map some range of an object into an address space.
888 *
889 * The object can be one of several types of objects:
890 *	NULL - anonymous memory
891 *	a named entry - a range within another address space
892 *	                or a range within a memory object
893 *	a whole memory object
894 *
895 */
896kern_return_t
897mach_vm_map(
898	vm_map_t		target_map,
899	mach_vm_offset_t	*address,
900	mach_vm_size_t	initial_size,
901	mach_vm_offset_t	mask,
902	int			flags,
903	ipc_port_t		port,
904	vm_object_offset_t	offset,
905	boolean_t		copy,
906	vm_prot_t		cur_protection,
907	vm_prot_t		max_protection,
908	vm_inherit_t		inheritance)
909{
910	kern_return_t		kr;
911	vm_map_offset_t 	vmmaddr;
912
913	vmmaddr = (vm_map_offset_t) *address;
914
915	/* filter out any kernel-only flags */
916	if (flags & ~VM_FLAGS_USER_MAP)
917		return KERN_INVALID_ARGUMENT;
918
919	kr = vm_map_enter_mem_object(target_map,
920				       &vmmaddr,
921				       initial_size,
922				       mask,
923				       flags,
924				       port,
925				       offset,
926				       copy,
927				       cur_protection,
928				       max_protection,
929				       inheritance);
930
931	*address = vmmaddr;
932	return kr;
933}
934
935
936/* legacy interface */
937kern_return_t
938vm_map_64(
939	vm_map_t		target_map,
940	vm_offset_t		*address,
941	vm_size_t		size,
942	vm_offset_t		mask,
943	int			flags,
944	ipc_port_t		port,
945	vm_object_offset_t	offset,
946	boolean_t		copy,
947	vm_prot_t		cur_protection,
948	vm_prot_t		max_protection,
949	vm_inherit_t		inheritance)
950{
951	mach_vm_address_t map_addr;
952	mach_vm_size_t map_size;
953	mach_vm_offset_t map_mask;
954	kern_return_t kr;
955
956	map_addr = (mach_vm_address_t)*address;
957	map_size = (mach_vm_size_t)size;
958	map_mask = (mach_vm_offset_t)mask;
959
960	kr = mach_vm_map(target_map, &map_addr, map_size, map_mask, flags,
961			 port, offset, copy,
962			 cur_protection, max_protection, inheritance);
963	*address = CAST_DOWN(vm_offset_t, map_addr);
964	return kr;
965}
966
967/* temporary, until world build */
968kern_return_t
969vm_map(
970	vm_map_t		target_map,
971	vm_offset_t		*address,
972	vm_size_t		size,
973	vm_offset_t		mask,
974	int			flags,
975	ipc_port_t		port,
976	vm_offset_t		offset,
977	boolean_t		copy,
978	vm_prot_t		cur_protection,
979	vm_prot_t		max_protection,
980	vm_inherit_t		inheritance)
981{
982	mach_vm_address_t map_addr;
983	mach_vm_size_t map_size;
984	mach_vm_offset_t map_mask;
985	vm_object_offset_t obj_offset;
986	kern_return_t kr;
987
988	map_addr = (mach_vm_address_t)*address;
989	map_size = (mach_vm_size_t)size;
990	map_mask = (mach_vm_offset_t)mask;
991	obj_offset = (vm_object_offset_t)offset;
992
993	kr = mach_vm_map(target_map, &map_addr, map_size, map_mask, flags,
994			 port, obj_offset, copy,
995			 cur_protection, max_protection, inheritance);
996	*address = CAST_DOWN(vm_offset_t, map_addr);
997	return kr;
998}
999
1000/*
1001 * mach_vm_remap -
1002 * Remap a range of memory from one task into another,
1003 * to another address range within the same task, or
1004 * over top of itself (with altered permissions and/or
1005 * as an in-place copy of itself).
1006 */
1007
1008kern_return_t
1009mach_vm_remap(
1010	vm_map_t		target_map,
1011	mach_vm_offset_t	*address,
1012	mach_vm_size_t	size,
1013	mach_vm_offset_t	mask,
1014	int			flags,
1015	vm_map_t		src_map,
1016	mach_vm_offset_t	memory_address,
1017	boolean_t		copy,
1018	vm_prot_t		*cur_protection,
1019	vm_prot_t		*max_protection,
1020	vm_inherit_t		inheritance)
1021{
1022	vm_map_offset_t		map_addr;
1023	kern_return_t		kr;
1024
1025	if (VM_MAP_NULL == target_map || VM_MAP_NULL == src_map)
1026		return KERN_INVALID_ARGUMENT;
1027
1028	/* filter out any kernel-only flags */
1029	if (flags & ~VM_FLAGS_USER_REMAP)
1030		return KERN_INVALID_ARGUMENT;
1031
1032	map_addr = (vm_map_offset_t)*address;
1033
1034	kr = vm_map_remap(target_map,
1035			  &map_addr,
1036			  size,
1037			  mask,
1038			  flags,
1039			  src_map,
1040			  memory_address,
1041			  copy,
1042			  cur_protection,
1043			  max_protection,
1044			  inheritance);
1045	*address = map_addr;
1046	return kr;
1047}
1048
1049/*
1050 * vm_remap -
1051 * Remap a range of memory from one task into another,
1052 * to another address range within the same task, or
1053 * over top of itself (with altered permissions and/or
1054 * as an in-place copy of itself).
1055 *
1056 * The addressability of the source and target address
1057 * range is limited by the size of vm_address_t (in the
1058 * kernel context).
1059 */
1060kern_return_t
1061vm_remap(
1062	vm_map_t		target_map,
1063	vm_offset_t		*address,
1064	vm_size_t		size,
1065	vm_offset_t		mask,
1066	int			flags,
1067	vm_map_t		src_map,
1068	vm_offset_t		memory_address,
1069	boolean_t		copy,
1070	vm_prot_t		*cur_protection,
1071	vm_prot_t		*max_protection,
1072	vm_inherit_t		inheritance)
1073{
1074	vm_map_offset_t		map_addr;
1075	kern_return_t		kr;
1076
1077	if (VM_MAP_NULL == target_map || VM_MAP_NULL == src_map)
1078		return KERN_INVALID_ARGUMENT;
1079
1080	/* filter out any kernel-only flags */
1081	if (flags & ~VM_FLAGS_USER_REMAP)
1082		return KERN_INVALID_ARGUMENT;
1083
1084	map_addr = (vm_map_offset_t)*address;
1085
1086	kr = vm_map_remap(target_map,
1087			  &map_addr,
1088			  size,
1089			  mask,
1090			  flags,
1091			  src_map,
1092			  memory_address,
1093			  copy,
1094			  cur_protection,
1095			  max_protection,
1096			  inheritance);
1097	*address = CAST_DOWN(vm_offset_t, map_addr);
1098	return kr;
1099}
1100
1101/*
1102 * NOTE: these routine (and this file) will no longer require mach_host_server.h
1103 * when mach_vm_wire and vm_wire are changed to use ledgers.
1104 */
1105#include <mach/mach_host_server.h>
1106/*
1107 *	mach_vm_wire
1108 *	Specify that the range of the virtual address space
1109 *	of the target task must not cause page faults for
1110 *	the indicated accesses.
1111 *
1112 *	[ To unwire the pages, specify VM_PROT_NONE. ]
1113 */
1114kern_return_t
1115mach_vm_wire(
1116	host_priv_t		host_priv,
1117	vm_map_t		map,
1118	mach_vm_offset_t	start,
1119	mach_vm_size_t	size,
1120	vm_prot_t		access)
1121{
1122	kern_return_t		rc;
1123
1124	if (host_priv == HOST_PRIV_NULL)
1125		return KERN_INVALID_HOST;
1126
1127	assert(host_priv == &realhost);
1128
1129	if (map == VM_MAP_NULL)
1130		return KERN_INVALID_TASK;
1131
1132	if (access & ~VM_PROT_ALL || (start + size < start))
1133		return KERN_INVALID_ARGUMENT;
1134
1135	if (access != VM_PROT_NONE) {
1136		rc = vm_map_wire(map,
1137				 vm_map_trunc_page(start,
1138						   VM_MAP_PAGE_MASK(map)),
1139				 vm_map_round_page(start+size,
1140						   VM_MAP_PAGE_MASK(map)),
1141				 access,
1142				 TRUE);
1143	} else {
1144		rc = vm_map_unwire(map,
1145				   vm_map_trunc_page(start,
1146						     VM_MAP_PAGE_MASK(map)),
1147				   vm_map_round_page(start+size,
1148						     VM_MAP_PAGE_MASK(map)),
1149				   TRUE);
1150	}
1151	return rc;
1152}
1153
1154/*
1155 *	vm_wire -
1156 *	Specify that the range of the virtual address space
1157 *	of the target task must not cause page faults for
1158 *	the indicated accesses.
1159 *
1160 *	[ To unwire the pages, specify VM_PROT_NONE. ]
1161 */
1162kern_return_t
1163vm_wire(
1164	host_priv_t		host_priv,
1165	register vm_map_t	map,
1166	vm_offset_t		start,
1167	vm_size_t		size,
1168	vm_prot_t		access)
1169{
1170	kern_return_t		rc;
1171
1172	if (host_priv == HOST_PRIV_NULL)
1173		return KERN_INVALID_HOST;
1174
1175	assert(host_priv == &realhost);
1176
1177	if (map == VM_MAP_NULL)
1178		return KERN_INVALID_TASK;
1179
1180	if ((access & ~VM_PROT_ALL) || (start + size < start))
1181		return KERN_INVALID_ARGUMENT;
1182
1183	if (size == 0) {
1184		rc = KERN_SUCCESS;
1185	} else if (access != VM_PROT_NONE) {
1186		rc = vm_map_wire(map,
1187				 vm_map_trunc_page(start,
1188						   VM_MAP_PAGE_MASK(map)),
1189				 vm_map_round_page(start+size,
1190						   VM_MAP_PAGE_MASK(map)),
1191				 access,
1192				 TRUE);
1193	} else {
1194		rc = vm_map_unwire(map,
1195				   vm_map_trunc_page(start,
1196						     VM_MAP_PAGE_MASK(map)),
1197				   vm_map_round_page(start+size,
1198						     VM_MAP_PAGE_MASK(map)),
1199				   TRUE);
1200	}
1201	return rc;
1202}
1203
1204/*
1205 *	vm_msync
1206 *
1207 *	Synchronises the memory range specified with its backing store
1208 *	image by either flushing or cleaning the contents to the appropriate
1209 *	memory manager.
1210 *
1211 *	interpretation of sync_flags
1212 *	VM_SYNC_INVALIDATE	- discard pages, only return precious
1213 *				  pages to manager.
1214 *
1215 *	VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
1216 *				- discard pages, write dirty or precious
1217 *				  pages back to memory manager.
1218 *
1219 *	VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
1220 *				- write dirty or precious pages back to
1221 *				  the memory manager.
1222 *
1223 *	VM_SYNC_CONTIGUOUS	- does everything normally, but if there
1224 *				  is a hole in the region, and we would
1225 *				  have returned KERN_SUCCESS, return
1226 *				  KERN_INVALID_ADDRESS instead.
1227 *
1228 *	RETURNS
1229 *	KERN_INVALID_TASK		Bad task parameter
1230 *	KERN_INVALID_ARGUMENT		both sync and async were specified.
1231 *	KERN_SUCCESS			The usual.
1232 *	KERN_INVALID_ADDRESS		There was a hole in the region.
1233 */
1234
1235kern_return_t
1236mach_vm_msync(
1237	vm_map_t		map,
1238	mach_vm_address_t	address,
1239	mach_vm_size_t	size,
1240	vm_sync_t		sync_flags)
1241{
1242
1243	if (map == VM_MAP_NULL)
1244		return(KERN_INVALID_TASK);
1245
1246	return vm_map_msync(map, (vm_map_address_t)address,
1247			(vm_map_size_t)size, sync_flags);
1248}
1249
1250/*
1251 *	vm_msync
1252 *
1253 *	Synchronises the memory range specified with its backing store
1254 *	image by either flushing or cleaning the contents to the appropriate
1255 *	memory manager.
1256 *
1257 *	interpretation of sync_flags
1258 *	VM_SYNC_INVALIDATE	- discard pages, only return precious
1259 *				  pages to manager.
1260 *
1261 *	VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
1262 *				- discard pages, write dirty or precious
1263 *				  pages back to memory manager.
1264 *
1265 *	VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
1266 *				- write dirty or precious pages back to
1267 *				  the memory manager.
1268 *
1269 *	VM_SYNC_CONTIGUOUS	- does everything normally, but if there
1270 *				  is a hole in the region, and we would
1271 *				  have returned KERN_SUCCESS, return
1272 *				  KERN_INVALID_ADDRESS instead.
1273 *
1274 *	The addressability of the range is limited to that which can
1275 *	be described by a vm_address_t.
1276 *
1277 *	RETURNS
1278 *	KERN_INVALID_TASK		Bad task parameter
1279 *	KERN_INVALID_ARGUMENT		both sync and async were specified.
1280 *	KERN_SUCCESS			The usual.
1281 *	KERN_INVALID_ADDRESS		There was a hole in the region.
1282 */
1283
1284kern_return_t
1285vm_msync(
1286	vm_map_t	map,
1287	vm_address_t	address,
1288	vm_size_t	size,
1289	vm_sync_t	sync_flags)
1290{
1291
1292	if (map == VM_MAP_NULL)
1293		return(KERN_INVALID_TASK);
1294
1295	return vm_map_msync(map, (vm_map_address_t)address,
1296			(vm_map_size_t)size, sync_flags);
1297}
1298
1299
1300int
1301vm_toggle_entry_reuse(int toggle, int *old_value)
1302{
1303	vm_map_t map = current_map();
1304
1305	if(toggle == VM_TOGGLE_GETVALUE && old_value != NULL){
1306		*old_value = map->disable_vmentry_reuse;
1307	} else if(toggle == VM_TOGGLE_SET){
1308		vm_map_lock(map);
1309		map->disable_vmentry_reuse = TRUE;
1310		if (map->first_free == vm_map_to_entry(map)) {
1311			map->highest_entry_end = vm_map_min(map);
1312		} else {
1313			map->highest_entry_end = map->first_free->vme_end;
1314		}
1315		vm_map_unlock(map);
1316	} else if (toggle == VM_TOGGLE_CLEAR){
1317		vm_map_lock(map);
1318		map->disable_vmentry_reuse = FALSE;
1319		vm_map_unlock(map);
1320	} else
1321		return KERN_INVALID_ARGUMENT;
1322
1323	return KERN_SUCCESS;
1324}
1325
1326/*
1327 *	mach_vm_behavior_set
1328 *
1329 *	Sets the paging behavior attribute for the  specified range
1330 *	in the specified map.
1331 *
1332 *	This routine will fail with KERN_INVALID_ADDRESS if any address
1333 *	in [start,start+size) is not a valid allocated memory region.
1334 */
1335kern_return_t
1336mach_vm_behavior_set(
1337	vm_map_t		map,
1338	mach_vm_offset_t	start,
1339	mach_vm_size_t	size,
1340	vm_behavior_t		new_behavior)
1341{
1342	if ((map == VM_MAP_NULL) || (start + size < start))
1343		return(KERN_INVALID_ARGUMENT);
1344
1345	if (size == 0)
1346		return KERN_SUCCESS;
1347
1348	return(vm_map_behavior_set(map,
1349				   vm_map_trunc_page(start,
1350						     VM_MAP_PAGE_MASK(map)),
1351				   vm_map_round_page(start+size,
1352						     VM_MAP_PAGE_MASK(map)),
1353				   new_behavior));
1354}
1355
1356/*
1357 *	vm_behavior_set
1358 *
1359 *	Sets the paging behavior attribute for the  specified range
1360 *	in the specified map.
1361 *
1362 *	This routine will fail with KERN_INVALID_ADDRESS if any address
1363 *	in [start,start+size) is not a valid allocated memory region.
1364 *
1365 *	This routine is potentially limited in addressibility by the
1366 *	use of vm_offset_t (if the map provided is larger than the
1367 *	kernel's).
1368 */
1369kern_return_t
1370vm_behavior_set(
1371	vm_map_t		map,
1372	vm_offset_t		start,
1373	vm_size_t		size,
1374	vm_behavior_t		new_behavior)
1375{
1376	if ((map == VM_MAP_NULL) || (start + size < start))
1377		return(KERN_INVALID_ARGUMENT);
1378
1379	if (size == 0)
1380		return KERN_SUCCESS;
1381
1382	return(vm_map_behavior_set(map,
1383				   vm_map_trunc_page(start,
1384						     VM_MAP_PAGE_MASK(map)),
1385				   vm_map_round_page(start+size,
1386						     VM_MAP_PAGE_MASK(map)),
1387				   new_behavior));
1388}
1389
1390/*
1391 *	mach_vm_region:
1392 *
1393 *	User call to obtain information about a region in
1394 *	a task's address map. Currently, only one flavor is
1395 *	supported.
1396 *
1397 *	XXX The reserved and behavior fields cannot be filled
1398 *	    in until the vm merge from the IK is completed, and
1399 *	    vm_reserve is implemented.
1400 *
1401 *	XXX Dependency: syscall_vm_region() also supports only one flavor.
1402 */
1403
1404kern_return_t
1405mach_vm_region(
1406	vm_map_t		 map,
1407	mach_vm_offset_t	*address,		/* IN/OUT */
1408	mach_vm_size_t	*size,			/* OUT */
1409	vm_region_flavor_t	 flavor,		/* IN */
1410	vm_region_info_t	 info,			/* OUT */
1411	mach_msg_type_number_t	*count,			/* IN/OUT */
1412	mach_port_t		*object_name)		/* OUT */
1413{
1414	vm_map_offset_t 	map_addr;
1415	vm_map_size_t 		map_size;
1416	kern_return_t		kr;
1417
1418	if (VM_MAP_NULL == map)
1419		return KERN_INVALID_ARGUMENT;
1420
1421	map_addr = (vm_map_offset_t)*address;
1422	map_size = (vm_map_size_t)*size;
1423
1424	/* legacy conversion */
1425	if (VM_REGION_BASIC_INFO == flavor)
1426		flavor = VM_REGION_BASIC_INFO_64;
1427
1428	kr = vm_map_region(map,
1429			   &map_addr, &map_size,
1430			   flavor, info, count,
1431			   object_name);
1432
1433	*address = map_addr;
1434	*size = map_size;
1435	return kr;
1436}
1437
1438/*
1439 *	vm_region_64 and vm_region:
1440 *
1441 *	User call to obtain information about a region in
1442 *	a task's address map. Currently, only one flavor is
1443 *	supported.
1444 *
1445 *	XXX The reserved and behavior fields cannot be filled
1446 *	    in until the vm merge from the IK is completed, and
1447 *	    vm_reserve is implemented.
1448 *
1449 *	XXX Dependency: syscall_vm_region() also supports only one flavor.
1450 */
1451
1452kern_return_t
1453vm_region_64(
1454	vm_map_t		 map,
1455	vm_offset_t	        *address,		/* IN/OUT */
1456	vm_size_t		*size,			/* OUT */
1457	vm_region_flavor_t	 flavor,		/* IN */
1458	vm_region_info_t	 info,			/* OUT */
1459	mach_msg_type_number_t	*count,			/* IN/OUT */
1460	mach_port_t		*object_name)		/* OUT */
1461{
1462	vm_map_offset_t 	map_addr;
1463	vm_map_size_t 		map_size;
1464	kern_return_t		kr;
1465
1466	if (VM_MAP_NULL == map)
1467		return KERN_INVALID_ARGUMENT;
1468
1469	map_addr = (vm_map_offset_t)*address;
1470	map_size = (vm_map_size_t)*size;
1471
1472	/* legacy conversion */
1473	if (VM_REGION_BASIC_INFO == flavor)
1474		flavor = VM_REGION_BASIC_INFO_64;
1475
1476	kr = vm_map_region(map,
1477			   &map_addr, &map_size,
1478			   flavor, info, count,
1479			   object_name);
1480
1481	*address = CAST_DOWN(vm_offset_t, map_addr);
1482	*size = CAST_DOWN(vm_size_t, map_size);
1483
1484	if (KERN_SUCCESS == kr && map_addr + map_size > VM_MAX_ADDRESS)
1485		return KERN_INVALID_ADDRESS;
1486	return kr;
1487}
1488
1489kern_return_t
1490vm_region(
1491	vm_map_t			map,
1492	vm_address_t	      		*address,	/* IN/OUT */
1493	vm_size_t			*size,		/* OUT */
1494	vm_region_flavor_t	 	flavor,	/* IN */
1495	vm_region_info_t	 	info,		/* OUT */
1496	mach_msg_type_number_t	*count,	/* IN/OUT */
1497	mach_port_t			*object_name)	/* OUT */
1498{
1499	vm_map_address_t 	map_addr;
1500	vm_map_size_t 		map_size;
1501	kern_return_t		kr;
1502
1503	if (VM_MAP_NULL == map)
1504		return KERN_INVALID_ARGUMENT;
1505
1506	map_addr = (vm_map_address_t)*address;
1507	map_size = (vm_map_size_t)*size;
1508
1509	kr = vm_map_region(map,
1510			   &map_addr, &map_size,
1511			   flavor, info, count,
1512			   object_name);
1513
1514	*address = CAST_DOWN(vm_address_t, map_addr);
1515	*size = CAST_DOWN(vm_size_t, map_size);
1516
1517	if (KERN_SUCCESS == kr && map_addr + map_size > VM_MAX_ADDRESS)
1518		return KERN_INVALID_ADDRESS;
1519	return kr;
1520}
1521
1522/*
1523 *	vm_region_recurse: A form of vm_region which follows the
1524 *	submaps in a target map
1525 *
1526 */
1527kern_return_t
1528mach_vm_region_recurse(
1529	vm_map_t			map,
1530	mach_vm_address_t		*address,
1531	mach_vm_size_t		*size,
1532	uint32_t			*depth,
1533	vm_region_recurse_info_t	info,
1534	mach_msg_type_number_t 	*infoCnt)
1535{
1536	vm_map_address_t	map_addr;
1537	vm_map_size_t		map_size;
1538	kern_return_t		kr;
1539
1540	if (VM_MAP_NULL == map)
1541		return KERN_INVALID_ARGUMENT;
1542
1543	map_addr = (vm_map_address_t)*address;
1544	map_size = (vm_map_size_t)*size;
1545
1546	kr = vm_map_region_recurse_64(
1547			map,
1548			&map_addr,
1549			&map_size,
1550			depth,
1551			(vm_region_submap_info_64_t)info,
1552			infoCnt);
1553
1554	*address = map_addr;
1555	*size = map_size;
1556	return kr;
1557}
1558
1559/*
1560 *	vm_region_recurse: A form of vm_region which follows the
1561 *	submaps in a target map
1562 *
1563 */
1564kern_return_t
1565vm_region_recurse_64(
1566	vm_map_t			map,
1567	vm_address_t			*address,
1568	vm_size_t			*size,
1569	uint32_t			*depth,
1570	vm_region_recurse_info_64_t	info,
1571	mach_msg_type_number_t 	*infoCnt)
1572{
1573	vm_map_address_t	map_addr;
1574	vm_map_size_t		map_size;
1575	kern_return_t		kr;
1576
1577	if (VM_MAP_NULL == map)
1578		return KERN_INVALID_ARGUMENT;
1579
1580	map_addr = (vm_map_address_t)*address;
1581	map_size = (vm_map_size_t)*size;
1582
1583	kr = vm_map_region_recurse_64(
1584			map,
1585			&map_addr,
1586			&map_size,
1587			depth,
1588			(vm_region_submap_info_64_t)info,
1589			infoCnt);
1590
1591	*address = CAST_DOWN(vm_address_t, map_addr);
1592	*size = CAST_DOWN(vm_size_t, map_size);
1593
1594	if (KERN_SUCCESS == kr && map_addr + map_size > VM_MAX_ADDRESS)
1595		return KERN_INVALID_ADDRESS;
1596	return kr;
1597}
1598
1599kern_return_t
1600vm_region_recurse(
1601	vm_map_t			map,
1602	vm_offset_t	       	*address,	/* IN/OUT */
1603	vm_size_t			*size,		/* OUT */
1604	natural_t	 		*depth,	/* IN/OUT */
1605	vm_region_recurse_info_t	info32,	/* IN/OUT */
1606	mach_msg_type_number_t	*infoCnt)	/* IN/OUT */
1607{
1608	vm_region_submap_info_data_64_t info64;
1609	vm_region_submap_info_t info;
1610	vm_map_address_t	map_addr;
1611	vm_map_size_t		map_size;
1612	kern_return_t		kr;
1613
1614	if (VM_MAP_NULL == map || *infoCnt < VM_REGION_SUBMAP_INFO_COUNT)
1615		return KERN_INVALID_ARGUMENT;
1616
1617
1618	map_addr = (vm_map_address_t)*address;
1619	map_size = (vm_map_size_t)*size;
1620	info = (vm_region_submap_info_t)info32;
1621	*infoCnt = VM_REGION_SUBMAP_INFO_COUNT_64;
1622
1623	kr = vm_map_region_recurse_64(map, &map_addr,&map_size,
1624				      depth, &info64, infoCnt);
1625
1626	info->protection = info64.protection;
1627	info->max_protection = info64.max_protection;
1628	info->inheritance = info64.inheritance;
1629	info->offset = (uint32_t)info64.offset; /* trouble-maker */
1630        info->user_tag = info64.user_tag;
1631        info->pages_resident = info64.pages_resident;
1632        info->pages_shared_now_private = info64.pages_shared_now_private;
1633        info->pages_swapped_out = info64.pages_swapped_out;
1634        info->pages_dirtied = info64.pages_dirtied;
1635        info->ref_count = info64.ref_count;
1636        info->shadow_depth = info64.shadow_depth;
1637        info->external_pager = info64.external_pager;
1638        info->share_mode = info64.share_mode;
1639	info->is_submap = info64.is_submap;
1640	info->behavior = info64.behavior;
1641	info->object_id = info64.object_id;
1642	info->user_wired_count = info64.user_wired_count;
1643
1644	*address = CAST_DOWN(vm_address_t, map_addr);
1645	*size = CAST_DOWN(vm_size_t, map_size);
1646	*infoCnt = VM_REGION_SUBMAP_INFO_COUNT;
1647
1648	if (KERN_SUCCESS == kr && map_addr + map_size > VM_MAX_ADDRESS)
1649		return KERN_INVALID_ADDRESS;
1650	return kr;
1651}
1652
1653kern_return_t
1654mach_vm_purgable_control(
1655	vm_map_t		map,
1656	mach_vm_offset_t	address,
1657	vm_purgable_t		control,
1658	int			*state)
1659{
1660	if (VM_MAP_NULL == map)
1661		return KERN_INVALID_ARGUMENT;
1662
1663	return vm_map_purgable_control(map,
1664				       vm_map_trunc_page(address, PAGE_MASK),
1665				       control,
1666				       state);
1667}
1668
1669kern_return_t
1670vm_purgable_control(
1671	vm_map_t		map,
1672	vm_offset_t		address,
1673	vm_purgable_t		control,
1674	int			*state)
1675{
1676	if (VM_MAP_NULL == map)
1677		return KERN_INVALID_ARGUMENT;
1678
1679	return vm_map_purgable_control(map,
1680				       vm_map_trunc_page(address, PAGE_MASK),
1681				       control,
1682				       state);
1683}
1684
1685
1686/*
1687 *	Ordinarily, the right to allocate CPM is restricted
1688 *	to privileged applications (those that can gain access
1689 *	to the host priv port).  Set this variable to zero if
1690 *	you want to let any application allocate CPM.
1691 */
1692unsigned int	vm_allocate_cpm_privileged = 0;
1693
1694/*
1695 *	Allocate memory in the specified map, with the caveat that
1696 *	the memory is physically contiguous.  This call may fail
1697 *	if the system can't find sufficient contiguous memory.
1698 *	This call may cause or lead to heart-stopping amounts of
1699 *	paging activity.
1700 *
1701 *	Memory obtained from this call should be freed in the
1702 *	normal way, viz., via vm_deallocate.
1703 */
1704kern_return_t
1705vm_allocate_cpm(
1706	host_priv_t		host_priv,
1707	vm_map_t		map,
1708	vm_address_t		*addr,
1709	vm_size_t		size,
1710	int			flags)
1711{
1712	vm_map_address_t	map_addr;
1713	vm_map_size_t		map_size;
1714	kern_return_t		kr;
1715
1716	if (vm_allocate_cpm_privileged && HOST_PRIV_NULL == host_priv)
1717		return KERN_INVALID_HOST;
1718
1719	if (VM_MAP_NULL == map)
1720		return KERN_INVALID_ARGUMENT;
1721
1722	map_addr = (vm_map_address_t)*addr;
1723	map_size = (vm_map_size_t)size;
1724
1725	kr = vm_map_enter_cpm(map,
1726			      &map_addr,
1727			      map_size,
1728			      flags);
1729
1730	*addr = CAST_DOWN(vm_address_t, map_addr);
1731	return kr;
1732}
1733
1734
1735kern_return_t
1736mach_vm_page_query(
1737	vm_map_t		map,
1738	mach_vm_offset_t	offset,
1739	int			*disposition,
1740	int			*ref_count)
1741{
1742	if (VM_MAP_NULL == map)
1743		return KERN_INVALID_ARGUMENT;
1744
1745	return vm_map_page_query_internal(
1746		map,
1747		vm_map_trunc_page(offset, PAGE_MASK),
1748		disposition, ref_count);
1749}
1750
1751kern_return_t
1752vm_map_page_query(
1753	vm_map_t		map,
1754	vm_offset_t		offset,
1755	int			*disposition,
1756	int			*ref_count)
1757{
1758	if (VM_MAP_NULL == map)
1759		return KERN_INVALID_ARGUMENT;
1760
1761	return vm_map_page_query_internal(
1762		map,
1763		vm_map_trunc_page(offset, PAGE_MASK),
1764		disposition, ref_count);
1765}
1766
1767kern_return_t
1768mach_vm_page_info(
1769	vm_map_t		map,
1770	mach_vm_address_t	address,
1771	vm_page_info_flavor_t	flavor,
1772	vm_page_info_t		info,
1773	mach_msg_type_number_t	*count)
1774{
1775	kern_return_t	kr;
1776
1777	if (map == VM_MAP_NULL) {
1778		return KERN_INVALID_ARGUMENT;
1779	}
1780
1781	kr = vm_map_page_info(map, address, flavor, info, count);
1782	return kr;
1783}
1784
1785/* map a (whole) upl into an address space */
1786kern_return_t
1787vm_upl_map(
1788	vm_map_t		map,
1789	upl_t			upl,
1790	vm_address_t		*dst_addr)
1791{
1792	vm_map_offset_t		map_addr;
1793	kern_return_t		kr;
1794
1795	if (VM_MAP_NULL == map)
1796		return KERN_INVALID_ARGUMENT;
1797
1798	kr = vm_map_enter_upl(map, upl, &map_addr);
1799	*dst_addr = CAST_DOWN(vm_address_t, map_addr);
1800	return kr;
1801}
1802
1803kern_return_t
1804vm_upl_unmap(
1805	vm_map_t		map,
1806	upl_t 			upl)
1807{
1808	if (VM_MAP_NULL == map)
1809		return KERN_INVALID_ARGUMENT;
1810
1811	return (vm_map_remove_upl(map, upl));
1812}
1813
1814/* Retrieve a upl for an object underlying an address range in a map */
1815
1816kern_return_t
1817vm_map_get_upl(
1818	vm_map_t		map,
1819	vm_map_offset_t		map_offset,
1820	upl_size_t		*upl_size,
1821	upl_t			*upl,
1822	upl_page_info_array_t	page_list,
1823	unsigned int		*count,
1824	int			*flags,
1825	int             	force_data_sync)
1826{
1827	int 		map_flags;
1828	kern_return_t	kr;
1829
1830	if (VM_MAP_NULL == map)
1831		return KERN_INVALID_ARGUMENT;
1832
1833	map_flags = *flags & ~UPL_NOZEROFILL;
1834	if (force_data_sync)
1835		map_flags |= UPL_FORCE_DATA_SYNC;
1836
1837	kr = vm_map_create_upl(map,
1838			       map_offset,
1839			       upl_size,
1840			       upl,
1841			       page_list,
1842			       count,
1843			       &map_flags);
1844
1845	*flags = (map_flags & ~UPL_FORCE_DATA_SYNC);
1846	return kr;
1847}
1848
1849/*
1850 * mach_make_memory_entry_64
1851 *
1852 * Think of it as a two-stage vm_remap() operation.  First
1853 * you get a handle.  Second, you get map that handle in
1854 * somewhere else. Rather than doing it all at once (and
1855 * without needing access to the other whole map).
1856 */
1857
1858kern_return_t
1859mach_make_memory_entry_64(
1860	vm_map_t		target_map,
1861	memory_object_size_t	*size,
1862	memory_object_offset_t offset,
1863	vm_prot_t		permission,
1864	ipc_port_t		*object_handle,
1865	ipc_port_t		parent_handle)
1866{
1867	vm_map_version_t	version;
1868	vm_named_entry_t	parent_entry;
1869	vm_named_entry_t	user_entry;
1870	ipc_port_t		user_handle;
1871	kern_return_t		kr;
1872	vm_map_t		real_map;
1873
1874	/* needed for call to vm_map_lookup_locked */
1875	boolean_t		wired;
1876	vm_object_offset_t	obj_off;
1877	vm_prot_t		prot;
1878	struct vm_object_fault_info	fault_info;
1879	vm_object_t		object;
1880	vm_object_t		shadow_object;
1881
1882	/* needed for direct map entry manipulation */
1883	vm_map_entry_t		map_entry;
1884	vm_map_entry_t		next_entry;
1885	vm_map_t		local_map;
1886	vm_map_t		original_map = target_map;
1887	vm_map_size_t		total_size;
1888	vm_map_size_t		map_size;
1889	vm_map_offset_t		map_offset;
1890	vm_map_offset_t		local_offset;
1891	vm_object_size_t	mappable_size;
1892
1893	/*
1894	 * Stash the offset in the page for use by vm_map_enter_mem_object()
1895	 * in the VM_FLAGS_RETURN_DATA_ADDR/MAP_MEM_USE_DATA_ADDR case.
1896	 */
1897	vm_object_offset_t	offset_in_page;
1898
1899	unsigned int		access;
1900	vm_prot_t		protections;
1901	vm_prot_t		original_protections, mask_protections;
1902	unsigned int		wimg_mode;
1903
1904	boolean_t		force_shadow = FALSE;
1905	boolean_t 		use_data_addr;
1906
1907	if (((permission & 0x00FF0000) &
1908	     ~(MAP_MEM_ONLY |
1909	       MAP_MEM_NAMED_CREATE |
1910	       MAP_MEM_PURGABLE |
1911	       MAP_MEM_NAMED_REUSE |
1912	       MAP_MEM_USE_DATA_ADDR |
1913	       MAP_MEM_VM_COPY |
1914	       MAP_MEM_VM_SHARE))) {
1915		/*
1916		 * Unknown flag: reject for forward compatibility.
1917		 */
1918		return KERN_INVALID_VALUE;
1919	}
1920
1921	if (parent_handle != IP_NULL &&
1922	    ip_kotype(parent_handle) == IKOT_NAMED_ENTRY) {
1923		parent_entry = (vm_named_entry_t) parent_handle->ip_kobject;
1924	} else {
1925		parent_entry = NULL;
1926	}
1927
1928	if (parent_entry && parent_entry->is_copy) {
1929		return KERN_INVALID_ARGUMENT;
1930	}
1931
1932	original_protections = permission & VM_PROT_ALL;
1933	protections = original_protections;
1934	mask_protections = permission & VM_PROT_IS_MASK;
1935	access = GET_MAP_MEM(permission);
1936	use_data_addr = ((permission & MAP_MEM_USE_DATA_ADDR) != 0);
1937
1938	user_handle = IP_NULL;
1939	user_entry = NULL;
1940
1941	map_offset = vm_map_trunc_page(offset, PAGE_MASK);
1942
1943	if (permission & MAP_MEM_ONLY) {
1944		boolean_t		parent_is_object;
1945
1946		map_size = vm_map_round_page(*size, PAGE_MASK);
1947
1948		if (use_data_addr || parent_entry == NULL) {
1949			return KERN_INVALID_ARGUMENT;
1950		}
1951
1952		parent_is_object = !(parent_entry->is_sub_map ||
1953				     parent_entry->is_pager);
1954		object = parent_entry->backing.object;
1955		if(parent_is_object && object != VM_OBJECT_NULL)
1956			wimg_mode = object->wimg_bits;
1957		else
1958			wimg_mode = VM_WIMG_USE_DEFAULT;
1959		if((access != GET_MAP_MEM(parent_entry->protection)) &&
1960				!(parent_entry->protection & VM_PROT_WRITE)) {
1961			return KERN_INVALID_RIGHT;
1962		}
1963		if(access == MAP_MEM_IO) {
1964		   SET_MAP_MEM(access, parent_entry->protection);
1965		   wimg_mode = VM_WIMG_IO;
1966		} else if (access == MAP_MEM_COPYBACK) {
1967		   SET_MAP_MEM(access, parent_entry->protection);
1968		   wimg_mode = VM_WIMG_USE_DEFAULT;
1969		} else if (access == MAP_MEM_INNERWBACK) {
1970		   SET_MAP_MEM(access, parent_entry->protection);
1971		   wimg_mode = VM_WIMG_INNERWBACK;
1972		} else if (access == MAP_MEM_WTHRU) {
1973		   SET_MAP_MEM(access, parent_entry->protection);
1974		   wimg_mode = VM_WIMG_WTHRU;
1975		} else if (access == MAP_MEM_WCOMB) {
1976		   SET_MAP_MEM(access, parent_entry->protection);
1977		   wimg_mode = VM_WIMG_WCOMB;
1978		}
1979		if (parent_is_object && object &&
1980			(access != MAP_MEM_NOOP) &&
1981			(!(object->nophyscache))) {
1982
1983			if (object->wimg_bits != wimg_mode) {
1984				vm_object_lock(object);
1985				vm_object_change_wimg_mode(object, wimg_mode);
1986				vm_object_unlock(object);
1987			}
1988		}
1989		if (object_handle)
1990			*object_handle = IP_NULL;
1991		return KERN_SUCCESS;
1992	} else if (permission & MAP_MEM_NAMED_CREATE) {
1993		map_size = vm_map_round_page(*size, PAGE_MASK);
1994
1995		if (use_data_addr) {
1996			return KERN_INVALID_ARGUMENT;
1997		}
1998
1999		kr = mach_memory_entry_allocate(&user_entry, &user_handle);
2000		if (kr != KERN_SUCCESS) {
2001			return KERN_FAILURE;
2002		}
2003
2004		/*
2005		 * Force the creation of the VM object now.
2006		 */
2007		if (map_size > (vm_map_size_t) ANON_MAX_SIZE) {
2008			/*
2009			 * LP64todo - for now, we can only allocate 4GB-4096
2010			 * internal objects because the default pager can't
2011			 * page bigger ones.  Remove this when it can.
2012			 */
2013			kr = KERN_FAILURE;
2014			goto make_mem_done;
2015		}
2016
2017		object = vm_object_allocate(map_size);
2018		assert(object != VM_OBJECT_NULL);
2019
2020		if (permission & MAP_MEM_PURGABLE) {
2021			if (! (permission & VM_PROT_WRITE)) {
2022				/* if we can't write, we can't purge */
2023				vm_object_deallocate(object);
2024				kr = KERN_INVALID_ARGUMENT;
2025				goto make_mem_done;
2026			}
2027			object->purgable = VM_PURGABLE_NONVOLATILE;
2028		}
2029
2030		/*
2031		 * The VM object is brand new and nobody else knows about it,
2032		 * so we don't need to lock it.
2033		 */
2034
2035		wimg_mode = object->wimg_bits;
2036		if (access == MAP_MEM_IO) {
2037			wimg_mode = VM_WIMG_IO;
2038		} else if (access == MAP_MEM_COPYBACK) {
2039			wimg_mode = VM_WIMG_USE_DEFAULT;
2040		} else if (access == MAP_MEM_INNERWBACK) {
2041			wimg_mode = VM_WIMG_INNERWBACK;
2042		} else if (access == MAP_MEM_WTHRU) {
2043			wimg_mode = VM_WIMG_WTHRU;
2044		} else if (access == MAP_MEM_WCOMB) {
2045			wimg_mode = VM_WIMG_WCOMB;
2046		}
2047		if (access != MAP_MEM_NOOP) {
2048			object->wimg_bits = wimg_mode;
2049		}
2050		/* the object has no pages, so no WIMG bits to update here */
2051
2052		/*
2053		 * XXX
2054		 * We use this path when we want to make sure that
2055		 * nobody messes with the object (coalesce, for
2056		 * example) before we map it.
2057		 * We might want to use these objects for transposition via
2058		 * vm_object_transpose() too, so we don't want any copy or
2059		 * shadow objects either...
2060		 */
2061		object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2062
2063		user_entry->backing.object = object;
2064		user_entry->internal = TRUE;
2065		user_entry->is_sub_map = FALSE;
2066		user_entry->is_pager = FALSE;
2067		user_entry->offset = 0;
2068		user_entry->data_offset = 0;
2069		user_entry->protection = protections;
2070		SET_MAP_MEM(access, user_entry->protection);
2071		user_entry->size = map_size;
2072
2073		/* user_object pager and internal fields are not used */
2074		/* when the object field is filled in.		      */
2075
2076		*size = CAST_DOWN(vm_size_t, map_size);
2077		*object_handle = user_handle;
2078		return KERN_SUCCESS;
2079	}
2080
2081	if (permission & MAP_MEM_VM_COPY) {
2082		vm_map_copy_t	copy;
2083
2084		if (target_map == VM_MAP_NULL) {
2085			return KERN_INVALID_TASK;
2086		}
2087
2088		if (use_data_addr) {
2089			map_size = (vm_map_round_page(offset + *size,
2090						      PAGE_MASK) -
2091				    map_offset);
2092			offset_in_page = offset - map_offset;
2093		} else {
2094			map_size = vm_map_round_page(*size, PAGE_MASK);
2095			offset_in_page = 0;
2096		}
2097
2098		kr = vm_map_copyin(target_map,
2099				   map_offset,
2100				   map_size,
2101				   FALSE,
2102				   &copy);
2103		if (kr != KERN_SUCCESS) {
2104			return kr;
2105		}
2106
2107		kr = mach_memory_entry_allocate(&user_entry, &user_handle);
2108		if (kr != KERN_SUCCESS) {
2109			vm_map_copy_discard(copy);
2110			return KERN_FAILURE;
2111		}
2112
2113		user_entry->backing.copy = copy;
2114		user_entry->internal = FALSE;
2115		user_entry->is_sub_map = FALSE;
2116		user_entry->is_pager = FALSE;
2117		user_entry->is_copy = TRUE;
2118		user_entry->offset = 0;
2119		user_entry->protection = protections;
2120		user_entry->size = map_size;
2121		user_entry->data_offset = offset_in_page;
2122
2123		*size = CAST_DOWN(vm_size_t, map_size);
2124		*object_handle = user_handle;
2125		return KERN_SUCCESS;
2126	}
2127
2128	if (permission & MAP_MEM_VM_SHARE) {
2129		vm_map_copy_t	copy;
2130		vm_prot_t	cur_prot, max_prot;
2131
2132		if (target_map == VM_MAP_NULL) {
2133			return KERN_INVALID_TASK;
2134		}
2135
2136		if (use_data_addr) {
2137			map_size = (vm_map_round_page(offset + *size,
2138						      PAGE_MASK) -
2139				    map_offset);
2140			offset_in_page = offset - map_offset;
2141		} else {
2142			map_size = vm_map_round_page(*size, PAGE_MASK);
2143			offset_in_page = 0;
2144		}
2145
2146		kr = vm_map_copy_extract(target_map,
2147					 map_offset,
2148					 map_size,
2149					 &copy,
2150					 &cur_prot,
2151					 &max_prot);
2152		if (kr != KERN_SUCCESS) {
2153			return kr;
2154		}
2155
2156		if (mask_protections) {
2157			/*
2158			 * We just want as much of "original_protections"
2159			 * as we can get out of the actual "cur_prot".
2160			 */
2161			protections &= cur_prot;
2162			if (protections == VM_PROT_NONE) {
2163				/* no access at all: fail */
2164				vm_map_copy_discard(copy);
2165				return KERN_PROTECTION_FAILURE;
2166			}
2167		} else {
2168			/*
2169			 * We want exactly "original_protections"
2170			 * out of "cur_prot".
2171			 */
2172			if ((cur_prot & protections) != protections) {
2173				vm_map_copy_discard(copy);
2174				return KERN_PROTECTION_FAILURE;
2175			}
2176		}
2177
2178		kr = mach_memory_entry_allocate(&user_entry, &user_handle);
2179		if (kr != KERN_SUCCESS) {
2180			vm_map_copy_discard(copy);
2181			return KERN_FAILURE;
2182		}
2183
2184		user_entry->backing.copy = copy;
2185		user_entry->internal = FALSE;
2186		user_entry->is_sub_map = FALSE;
2187		user_entry->is_pager = FALSE;
2188		user_entry->is_copy = TRUE;
2189		user_entry->offset = 0;
2190		user_entry->protection = protections;
2191		user_entry->size = map_size;
2192		user_entry->data_offset = offset_in_page;
2193
2194		*size = CAST_DOWN(vm_size_t, map_size);
2195		*object_handle = user_handle;
2196		return KERN_SUCCESS;
2197	}
2198
2199	if (parent_entry == NULL ||
2200	    (permission & MAP_MEM_NAMED_REUSE)) {
2201
2202		if (use_data_addr) {
2203			map_size = vm_map_round_page(offset + *size, PAGE_MASK) - map_offset;
2204			offset_in_page = offset - map_offset;
2205		} else {
2206			map_size = vm_map_round_page(*size, PAGE_MASK);
2207			offset_in_page = 0;
2208		}
2209
2210		/* Create a named object based on address range within the task map */
2211		/* Go find the object at given address */
2212
2213		if (target_map == VM_MAP_NULL) {
2214			return KERN_INVALID_TASK;
2215		}
2216
2217redo_lookup:
2218		protections = original_protections;
2219		vm_map_lock_read(target_map);
2220
2221		/* get the object associated with the target address */
2222		/* note we check the permission of the range against */
2223		/* that requested by the caller */
2224
2225		kr = vm_map_lookup_locked(&target_map, map_offset,
2226					  protections | mask_protections,
2227					  OBJECT_LOCK_EXCLUSIVE, &version,
2228					  &object, &obj_off, &prot, &wired,
2229					  &fault_info,
2230					  &real_map);
2231		if (kr != KERN_SUCCESS) {
2232			vm_map_unlock_read(target_map);
2233			goto make_mem_done;
2234		}
2235		if (mask_protections) {
2236			/*
2237			 * The caller asked us to use the "protections" as
2238			 * a mask, so restrict "protections" to what this
2239			 * mapping actually allows.
2240			 */
2241			protections &= prot;
2242		}
2243		if (((prot & protections) != protections)
2244					|| (object == kernel_object)) {
2245			kr = KERN_INVALID_RIGHT;
2246			vm_object_unlock(object);
2247			vm_map_unlock_read(target_map);
2248			if(real_map != target_map)
2249				vm_map_unlock_read(real_map);
2250			if(object == kernel_object) {
2251				printf("Warning: Attempt to create a named"
2252					" entry from the kernel_object\n");
2253			}
2254			goto make_mem_done;
2255		}
2256
2257		/* We have an object, now check to see if this object */
2258		/* is suitable.  If not, create a shadow and share that */
2259
2260		/*
2261		 * We have to unlock the VM object to avoid deadlocking with
2262		 * a VM map lock (the lock ordering is map, the object), if we
2263		 * need to modify the VM map to create a shadow object.  Since
2264		 * we might release the VM map lock below anyway, we have
2265		 * to release the VM map lock now.
2266		 * XXX FBDP There must be a way to avoid this double lookup...
2267		 *
2268		 * Take an extra reference on the VM object to make sure it's
2269		 * not going to disappear.
2270		 */
2271		vm_object_reference_locked(object); /* extra ref to hold obj */
2272		vm_object_unlock(object);
2273
2274		local_map = original_map;
2275		local_offset = map_offset;
2276		if(target_map != local_map) {
2277			vm_map_unlock_read(target_map);
2278			if(real_map != target_map)
2279				vm_map_unlock_read(real_map);
2280			vm_map_lock_read(local_map);
2281			target_map = local_map;
2282			real_map = local_map;
2283		}
2284		while(TRUE) {
2285		   if(!vm_map_lookup_entry(local_map,
2286						local_offset, &map_entry)) {
2287			kr = KERN_INVALID_ARGUMENT;
2288                        vm_map_unlock_read(target_map);
2289			if(real_map != target_map)
2290				vm_map_unlock_read(real_map);
2291                        vm_object_deallocate(object); /* release extra ref */
2292			object = VM_OBJECT_NULL;
2293                        goto make_mem_done;
2294		   }
2295		   if(!(map_entry->is_sub_map)) {
2296		      if(map_entry->object.vm_object != object) {
2297			 kr = KERN_INVALID_ARGUMENT;
2298                         vm_map_unlock_read(target_map);
2299			 if(real_map != target_map)
2300				vm_map_unlock_read(real_map);
2301                         vm_object_deallocate(object); /* release extra ref */
2302			 object = VM_OBJECT_NULL;
2303                         goto make_mem_done;
2304	              }
2305		      break;
2306		   } else {
2307			vm_map_t	tmap;
2308			tmap = local_map;
2309			local_map = map_entry->object.sub_map;
2310
2311			vm_map_lock_read(local_map);
2312			vm_map_unlock_read(tmap);
2313			target_map = local_map;
2314			real_map = local_map;
2315			local_offset = local_offset - map_entry->vme_start;
2316			local_offset += map_entry->offset;
2317		   }
2318		}
2319
2320		/*
2321		 * We found the VM map entry, lock the VM object again.
2322		 */
2323		vm_object_lock(object);
2324		if(map_entry->wired_count) {
2325			 /* JMM - The check below should be reworked instead. */
2326			 object->true_share = TRUE;
2327		      }
2328		if (mask_protections) {
2329			/*
2330			 * The caller asked us to use the "protections" as
2331			 * a mask, so restrict "protections" to what this
2332			 * mapping actually allows.
2333			 */
2334			protections &= map_entry->max_protection;
2335		}
2336		if(((map_entry->max_protection) & protections) != protections) {
2337			 kr = KERN_INVALID_RIGHT;
2338                         vm_object_unlock(object);
2339                         vm_map_unlock_read(target_map);
2340			 if(real_map != target_map)
2341				vm_map_unlock_read(real_map);
2342			 vm_object_deallocate(object);
2343			 object = VM_OBJECT_NULL;
2344                         goto make_mem_done;
2345		}
2346
2347		mappable_size = fault_info.hi_offset - obj_off;
2348		total_size = map_entry->vme_end - map_entry->vme_start;
2349		if(map_size > mappable_size) {
2350			/* try to extend mappable size if the entries */
2351			/* following are from the same object and are */
2352			/* compatible */
2353			next_entry = map_entry->vme_next;
2354			/* lets see if the next map entry is still   */
2355			/* pointing at this object and is contiguous */
2356			while(map_size > mappable_size) {
2357				if((next_entry->object.vm_object == object) &&
2358					(next_entry->vme_start ==
2359						next_entry->vme_prev->vme_end) &&
2360					(next_entry->offset ==
2361					   next_entry->vme_prev->offset +
2362					   (next_entry->vme_prev->vme_end -
2363				 	   next_entry->vme_prev->vme_start))) {
2364					if (mask_protections) {
2365						/*
2366						 * The caller asked us to use
2367						 * the "protections" as a mask,
2368						 * so restrict "protections" to
2369						 * what this mapping actually
2370						 * allows.
2371						 */
2372						protections &= next_entry->max_protection;
2373					}
2374					if ((next_entry->wired_count) &&
2375					    (map_entry->wired_count == 0)) {
2376						break;
2377					}
2378					if(((next_entry->max_protection)
2379						& protections) != protections) {
2380			 			break;
2381					}
2382					if (next_entry->needs_copy !=
2383					    map_entry->needs_copy)
2384						break;
2385					mappable_size += next_entry->vme_end
2386						- next_entry->vme_start;
2387					total_size += next_entry->vme_end
2388						- next_entry->vme_start;
2389					next_entry = next_entry->vme_next;
2390				} else {
2391					break;
2392				}
2393
2394			}
2395		}
2396
2397		if (vm_map_entry_should_cow_for_true_share(map_entry) &&
2398		    object->vo_size > map_size &&
2399		    map_size != 0) {
2400			/*
2401			 * Set up the targeted range for copy-on-write to
2402			 * limit the impact of "true_share"/"copy_delay" to
2403			 * that range instead of the entire VM object...
2404			 */
2405
2406			vm_object_unlock(object);
2407			if (vm_map_lock_read_to_write(target_map)) {
2408				vm_object_deallocate(object);
2409				target_map = original_map;
2410				goto redo_lookup;
2411			}
2412
2413			vm_map_clip_start(target_map,
2414					  map_entry,
2415					  vm_map_trunc_page(offset,
2416							    VM_MAP_PAGE_MASK(target_map)));
2417			vm_map_clip_end(target_map,
2418					map_entry,
2419					(vm_map_round_page(offset,
2420							  VM_MAP_PAGE_MASK(target_map))
2421					 + map_size));
2422			force_shadow = TRUE;
2423
2424			map_size = map_entry->vme_end - map_entry->vme_start;
2425			total_size = map_size;
2426
2427			vm_map_lock_write_to_read(target_map);
2428			vm_object_lock(object);
2429		}
2430
2431		if (object->internal) {
2432	   		/* vm_map_lookup_locked will create a shadow if   */
2433		 	/* needs_copy is set but does not check for the   */
2434			/* other two conditions shown. It is important to */
2435			/* set up an object which will not be pulled from */
2436			/* under us.  */
2437
2438	      		if (force_shadow ||
2439			    ((map_entry->needs_copy  ||
2440			      object->shadowed ||
2441			      (object->vo_size > total_size &&
2442			       (map_entry->offset != 0 ||
2443				object->vo_size >
2444				vm_map_round_page(total_size,
2445						  VM_MAP_PAGE_MASK(target_map)))))
2446			     && !object->true_share)) {
2447				/*
2448				 * We have to unlock the VM object before
2449				 * trying to upgrade the VM map lock, to
2450				 * honor lock ordering (map then object).
2451				 * Otherwise, we would deadlock if another
2452				 * thread holds a read lock on the VM map and
2453				 * is trying to acquire the VM object's lock.
2454				 * We still hold an extra reference on the
2455				 * VM object, guaranteeing that it won't
2456				 * disappear.
2457				 */
2458				vm_object_unlock(object);
2459
2460		   		if (vm_map_lock_read_to_write(target_map)) {
2461					/*
2462					 * We couldn't upgrade our VM map lock
2463					 * from "read" to "write" and we lost
2464					 * our "read" lock.
2465					 * Start all over again...
2466					 */
2467					vm_object_deallocate(object); /* extra ref */
2468					target_map = original_map;
2469		            		goto redo_lookup;
2470		   		}
2471				vm_object_lock(object);
2472
2473				/*
2474				 * JMM - We need to avoid coming here when the object
2475				 * is wired by anybody, not just the current map.  Why
2476				 * couldn't we use the standard vm_object_copy_quickly()
2477				 * approach here?
2478				 */
2479
2480		   		/* create a shadow object */
2481				vm_object_shadow(&map_entry->object.vm_object,
2482						 &map_entry->offset, total_size);
2483				shadow_object = map_entry->object.vm_object;
2484				vm_object_unlock(object);
2485
2486				prot = map_entry->protection & ~VM_PROT_WRITE;
2487
2488				if (override_nx(target_map, map_entry->alias) && prot)
2489				        prot |= VM_PROT_EXECUTE;
2490
2491				vm_object_pmap_protect(
2492					object, map_entry->offset,
2493					total_size,
2494					((map_entry->is_shared
2495					  || target_map->mapped_in_other_pmaps)
2496							? PMAP_NULL :
2497							target_map->pmap),
2498					map_entry->vme_start,
2499					prot);
2500				total_size -= (map_entry->vme_end
2501						- map_entry->vme_start);
2502				next_entry = map_entry->vme_next;
2503				map_entry->needs_copy = FALSE;
2504
2505				vm_object_lock(shadow_object);
2506				while (total_size) {
2507				    assert((next_entry->wired_count == 0) ||
2508					   (map_entry->wired_count));
2509
2510				   if(next_entry->object.vm_object == object) {
2511					vm_object_reference_locked(shadow_object);
2512					next_entry->object.vm_object
2513							= shadow_object;
2514					vm_object_deallocate(object);
2515					next_entry->offset
2516						= next_entry->vme_prev->offset +
2517						(next_entry->vme_prev->vme_end
2518						- next_entry->vme_prev->vme_start);
2519						next_entry->needs_copy = FALSE;
2520					} else {
2521						panic("mach_make_memory_entry_64:"
2522						  " map entries out of sync\n");
2523					}
2524					total_size -=
2525						next_entry->vme_end
2526							- next_entry->vme_start;
2527					next_entry = next_entry->vme_next;
2528				}
2529
2530				/*
2531				 * Transfer our extra reference to the
2532				 * shadow object.
2533				 */
2534				vm_object_reference_locked(shadow_object);
2535				vm_object_deallocate(object); /* extra ref */
2536				object = shadow_object;
2537
2538				obj_off = (local_offset - map_entry->vme_start)
2539							 + map_entry->offset;
2540
2541				vm_map_lock_write_to_read(target_map);
2542	        	}
2543	   	}
2544
2545		/* note: in the future we can (if necessary) allow for  */
2546		/* memory object lists, this will better support        */
2547		/* fragmentation, but is it necessary?  The user should */
2548		/* be encouraged to create address space oriented       */
2549		/* shared objects from CLEAN memory regions which have  */
2550		/* a known and defined history.  i.e. no inheritence    */
2551		/* share, make this call before making the region the   */
2552		/* target of ipc's, etc.  The code above, protecting    */
2553		/* against delayed copy, etc. is mostly defensive.      */
2554
2555		wimg_mode = object->wimg_bits;
2556		if(!(object->nophyscache)) {
2557			if(access == MAP_MEM_IO) {
2558				wimg_mode = VM_WIMG_IO;
2559			} else if (access == MAP_MEM_COPYBACK) {
2560				wimg_mode = VM_WIMG_USE_DEFAULT;
2561			} else if (access == MAP_MEM_INNERWBACK) {
2562				wimg_mode = VM_WIMG_INNERWBACK;
2563			} else if (access == MAP_MEM_WTHRU) {
2564				wimg_mode = VM_WIMG_WTHRU;
2565			} else if (access == MAP_MEM_WCOMB) {
2566				wimg_mode = VM_WIMG_WCOMB;
2567			}
2568		}
2569
2570		object->true_share = TRUE;
2571		if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2572			object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2573
2574		/*
2575		 * The memory entry now points to this VM object and we
2576		 * need to hold a reference on the VM object.  Use the extra
2577		 * reference we took earlier to keep the object alive when we
2578		 * had to unlock it.
2579		 */
2580
2581		vm_map_unlock_read(target_map);
2582		if(real_map != target_map)
2583			vm_map_unlock_read(real_map);
2584
2585		if (object->wimg_bits != wimg_mode)
2586			vm_object_change_wimg_mode(object, wimg_mode);
2587
2588		/* the size of mapped entry that overlaps with our region */
2589		/* which is targeted for share.                           */
2590		/* (entry_end - entry_start) -                            */
2591		/*                   offset of our beg addr within entry  */
2592		/* it corresponds to this:                                */
2593
2594		if(map_size > mappable_size)
2595			map_size = mappable_size;
2596
2597		if (permission & MAP_MEM_NAMED_REUSE) {
2598			/*
2599			 * Compare what we got with the "parent_entry".
2600			 * If they match, re-use the "parent_entry" instead
2601			 * of creating a new one.
2602			 */
2603			if (parent_entry != NULL &&
2604			    parent_entry->backing.object == object &&
2605			    parent_entry->internal == object->internal &&
2606			    parent_entry->is_sub_map == FALSE &&
2607			    parent_entry->is_pager == FALSE &&
2608			    parent_entry->offset == obj_off &&
2609			    parent_entry->protection == protections &&
2610			    parent_entry->size == map_size &&
2611			    ((!use_data_addr && (parent_entry->data_offset == 0)) ||
2612			     (use_data_addr && (parent_entry->data_offset == offset_in_page)))) {
2613				/*
2614				 * We have a match: re-use "parent_entry".
2615				 */
2616				/* release our extra reference on object */
2617				vm_object_unlock(object);
2618				vm_object_deallocate(object);
2619				/* parent_entry->ref_count++; XXX ? */
2620				/* Get an extra send-right on handle */
2621				ipc_port_copy_send(parent_handle);
2622				*object_handle = parent_handle;
2623				return KERN_SUCCESS;
2624			} else {
2625				/*
2626				 * No match: we need to create a new entry.
2627				 * fall through...
2628				 */
2629			}
2630		}
2631
2632		vm_object_unlock(object);
2633		if (mach_memory_entry_allocate(&user_entry, &user_handle)
2634		    != KERN_SUCCESS) {
2635			/* release our unused reference on the object */
2636			vm_object_deallocate(object);
2637			return KERN_FAILURE;
2638		}
2639
2640		user_entry->backing.object = object;
2641		user_entry->internal = object->internal;
2642		user_entry->is_sub_map = FALSE;
2643		user_entry->is_pager = FALSE;
2644		user_entry->offset = obj_off;
2645		user_entry->data_offset = offset_in_page;
2646		user_entry->protection = protections;
2647		SET_MAP_MEM(GET_MAP_MEM(permission), user_entry->protection);
2648		user_entry->size = map_size;
2649
2650		/* user_object pager and internal fields are not used */
2651		/* when the object field is filled in.		      */
2652
2653		*size = CAST_DOWN(vm_size_t, map_size);
2654		*object_handle = user_handle;
2655		return KERN_SUCCESS;
2656
2657	} else {
2658		/* The new object will be base on an existing named object */
2659		if (parent_entry == NULL) {
2660			kr = KERN_INVALID_ARGUMENT;
2661			goto make_mem_done;
2662		}
2663
2664		if (use_data_addr) {
2665			/*
2666			 * submaps and pagers should only be accessible from within
2667			 * the kernel, which shouldn't use the data address flag, so can fail here.
2668			 */
2669			if (parent_entry->is_pager || parent_entry->is_sub_map) {
2670				panic("Shouldn't be using data address with a parent entry that is a submap or pager.");
2671			}
2672			/*
2673			 * Account for offset to data in parent entry and
2674			 * compute our own offset to data.
2675			 */
2676			if((offset + *size + parent_entry->data_offset) > parent_entry->size) {
2677				kr = KERN_INVALID_ARGUMENT;
2678				goto make_mem_done;
2679			}
2680
2681			map_offset = vm_map_trunc_page(offset + parent_entry->data_offset, PAGE_MASK);
2682			offset_in_page = (offset + parent_entry->data_offset) - map_offset;
2683			map_size = vm_map_round_page(offset + parent_entry->data_offset + *size, PAGE_MASK) - map_offset;
2684		} else {
2685			map_size = vm_map_round_page(*size, PAGE_MASK);
2686			offset_in_page = 0;
2687
2688			if((offset + map_size) > parent_entry->size) {
2689				kr = KERN_INVALID_ARGUMENT;
2690				goto make_mem_done;
2691			}
2692		}
2693
2694		if (mask_protections) {
2695			/*
2696			 * The caller asked us to use the "protections" as
2697			 * a mask, so restrict "protections" to what this
2698			 * mapping actually allows.
2699			 */
2700			protections &= parent_entry->protection;
2701		}
2702		if((protections & parent_entry->protection) != protections) {
2703			kr = KERN_PROTECTION_FAILURE;
2704			goto make_mem_done;
2705		}
2706
2707		if (mach_memory_entry_allocate(&user_entry, &user_handle)
2708		    != KERN_SUCCESS) {
2709			kr = KERN_FAILURE;
2710			goto make_mem_done;
2711		}
2712
2713		user_entry->size = map_size;
2714		user_entry->offset = parent_entry->offset + map_offset;
2715		user_entry->data_offset = offset_in_page;
2716		user_entry->is_sub_map = parent_entry->is_sub_map;
2717		user_entry->is_pager = parent_entry->is_pager;
2718		user_entry->is_copy = parent_entry->is_copy;
2719		user_entry->internal = parent_entry->internal;
2720		user_entry->protection = protections;
2721
2722		if(access != MAP_MEM_NOOP) {
2723		   SET_MAP_MEM(access, user_entry->protection);
2724		}
2725
2726		if(parent_entry->is_sub_map) {
2727		   user_entry->backing.map = parent_entry->backing.map;
2728		   vm_map_lock(user_entry->backing.map);
2729		   user_entry->backing.map->ref_count++;
2730		   vm_map_unlock(user_entry->backing.map);
2731		}
2732		else if (parent_entry->is_pager) {
2733		   user_entry->backing.pager = parent_entry->backing.pager;
2734		   /* JMM - don't we need a reference here? */
2735		} else {
2736		   object = parent_entry->backing.object;
2737		   assert(object != VM_OBJECT_NULL);
2738		   user_entry->backing.object = object;
2739		   /* we now point to this object, hold on */
2740		   vm_object_reference(object);
2741		   vm_object_lock(object);
2742		   object->true_share = TRUE;
2743		   if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2744			object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2745		   vm_object_unlock(object);
2746		}
2747		*size = CAST_DOWN(vm_size_t, map_size);
2748		*object_handle = user_handle;
2749		return KERN_SUCCESS;
2750	}
2751
2752make_mem_done:
2753	if (user_handle != IP_NULL) {
2754		/*
2755		 * Releasing "user_handle" causes the kernel object
2756		 * associated with it ("user_entry" here) to also be
2757		 * released and freed.
2758		 */
2759		mach_memory_entry_port_release(user_handle);
2760	}
2761	return kr;
2762}
2763
2764kern_return_t
2765_mach_make_memory_entry(
2766	vm_map_t		target_map,
2767	memory_object_size_t	*size,
2768	memory_object_offset_t	offset,
2769	vm_prot_t		permission,
2770	ipc_port_t		*object_handle,
2771	ipc_port_t		parent_entry)
2772{
2773	memory_object_size_t 	mo_size;
2774	kern_return_t		kr;
2775
2776	mo_size = (memory_object_size_t)*size;
2777	kr = mach_make_memory_entry_64(target_map, &mo_size,
2778			(memory_object_offset_t)offset, permission, object_handle,
2779			parent_entry);
2780	*size = mo_size;
2781	return kr;
2782}
2783
2784kern_return_t
2785mach_make_memory_entry(
2786	vm_map_t		target_map,
2787	vm_size_t		*size,
2788	vm_offset_t		offset,
2789	vm_prot_t		permission,
2790	ipc_port_t		*object_handle,
2791	ipc_port_t		parent_entry)
2792{
2793	memory_object_size_t 	mo_size;
2794	kern_return_t		kr;
2795
2796	mo_size = (memory_object_size_t)*size;
2797	kr = mach_make_memory_entry_64(target_map, &mo_size,
2798			(memory_object_offset_t)offset, permission, object_handle,
2799			parent_entry);
2800	*size = CAST_DOWN(vm_size_t, mo_size);
2801	return kr;
2802}
2803
2804/*
2805 *	task_wire
2806 *
2807 *	Set or clear the map's wiring_required flag.  This flag, if set,
2808 *	will cause all future virtual memory allocation to allocate
2809 *	user wired memory.  Unwiring pages wired down as a result of
2810 *	this routine is done with the vm_wire interface.
2811 */
2812kern_return_t
2813task_wire(
2814	vm_map_t	map,
2815	boolean_t	must_wire)
2816{
2817	if (map == VM_MAP_NULL)
2818		return(KERN_INVALID_ARGUMENT);
2819
2820	if (must_wire)
2821		map->wiring_required = TRUE;
2822	else
2823		map->wiring_required = FALSE;
2824
2825	return(KERN_SUCCESS);
2826}
2827
2828__private_extern__ kern_return_t
2829mach_memory_entry_allocate(
2830	vm_named_entry_t	*user_entry_p,
2831	ipc_port_t		*user_handle_p)
2832{
2833	vm_named_entry_t	user_entry;
2834	ipc_port_t		user_handle;
2835	ipc_port_t		previous;
2836
2837	user_entry = (vm_named_entry_t) kalloc(sizeof *user_entry);
2838	if (user_entry == NULL)
2839		return KERN_FAILURE;
2840
2841	named_entry_lock_init(user_entry);
2842
2843	user_handle = ipc_port_alloc_kernel();
2844	if (user_handle == IP_NULL) {
2845		kfree(user_entry, sizeof *user_entry);
2846		return KERN_FAILURE;
2847	}
2848	ip_lock(user_handle);
2849
2850	/* make a sonce right */
2851	user_handle->ip_sorights++;
2852	ip_reference(user_handle);
2853
2854	user_handle->ip_destination = IP_NULL;
2855	user_handle->ip_receiver_name = MACH_PORT_NULL;
2856	user_handle->ip_receiver = ipc_space_kernel;
2857
2858	/* make a send right */
2859        user_handle->ip_mscount++;
2860        user_handle->ip_srights++;
2861        ip_reference(user_handle);
2862
2863	ipc_port_nsrequest(user_handle, 1, user_handle, &previous);
2864	/* nsrequest unlocks user_handle */
2865
2866	user_entry->backing.pager = NULL;
2867	user_entry->is_sub_map = FALSE;
2868	user_entry->is_pager = FALSE;
2869	user_entry->is_copy = FALSE;
2870	user_entry->internal = FALSE;
2871	user_entry->size = 0;
2872	user_entry->offset = 0;
2873	user_entry->data_offset = 0;
2874	user_entry->protection = VM_PROT_NONE;
2875	user_entry->ref_count = 1;
2876
2877	ipc_kobject_set(user_handle, (ipc_kobject_t) user_entry,
2878			IKOT_NAMED_ENTRY);
2879
2880	*user_entry_p = user_entry;
2881	*user_handle_p = user_handle;
2882
2883	return KERN_SUCCESS;
2884}
2885
2886/*
2887 *	mach_memory_object_memory_entry_64
2888 *
2889 *	Create a named entry backed by the provided pager.
2890 *
2891 *	JMM - we need to hold a reference on the pager -
2892 *	and release it when the named entry is destroyed.
2893 */
2894kern_return_t
2895mach_memory_object_memory_entry_64(
2896	host_t			host,
2897	boolean_t		internal,
2898	vm_object_offset_t	size,
2899	vm_prot_t		permission,
2900 	memory_object_t		pager,
2901	ipc_port_t		*entry_handle)
2902{
2903	unsigned int		access;
2904	vm_named_entry_t	user_entry;
2905	ipc_port_t		user_handle;
2906
2907        if (host == HOST_NULL)
2908                return(KERN_INVALID_HOST);
2909
2910	if (mach_memory_entry_allocate(&user_entry, &user_handle)
2911	    != KERN_SUCCESS) {
2912		return KERN_FAILURE;
2913	}
2914
2915	user_entry->backing.pager = pager;
2916	user_entry->size = size;
2917	user_entry->offset = 0;
2918	user_entry->protection = permission & VM_PROT_ALL;
2919	access = GET_MAP_MEM(permission);
2920	SET_MAP_MEM(access, user_entry->protection);
2921	user_entry->internal = internal;
2922	user_entry->is_sub_map = FALSE;
2923	user_entry->is_pager = TRUE;
2924	assert(user_entry->ref_count == 1);
2925
2926	*entry_handle = user_handle;
2927	return KERN_SUCCESS;
2928}
2929
2930kern_return_t
2931mach_memory_object_memory_entry(
2932	host_t		host,
2933	boolean_t	internal,
2934	vm_size_t	size,
2935	vm_prot_t	permission,
2936 	memory_object_t	pager,
2937	ipc_port_t	*entry_handle)
2938{
2939	return mach_memory_object_memory_entry_64( host, internal,
2940		(vm_object_offset_t)size, permission, pager, entry_handle);
2941}
2942
2943
2944kern_return_t
2945mach_memory_entry_purgable_control(
2946	ipc_port_t	entry_port,
2947	vm_purgable_t	control,
2948	int		*state)
2949{
2950	kern_return_t		kr;
2951	vm_named_entry_t	mem_entry;
2952	vm_object_t		object;
2953
2954	if (entry_port == IP_NULL ||
2955	    ip_kotype(entry_port) != IKOT_NAMED_ENTRY) {
2956		return KERN_INVALID_ARGUMENT;
2957	}
2958	if (control != VM_PURGABLE_SET_STATE &&
2959	    control != VM_PURGABLE_GET_STATE)
2960		return(KERN_INVALID_ARGUMENT);
2961
2962	if (control == VM_PURGABLE_SET_STATE &&
2963	    (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
2964	     ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
2965		return(KERN_INVALID_ARGUMENT);
2966
2967	mem_entry = (vm_named_entry_t) entry_port->ip_kobject;
2968
2969	named_entry_lock(mem_entry);
2970
2971	if (mem_entry->is_sub_map ||
2972	    mem_entry->is_pager ||
2973	    mem_entry->is_copy) {
2974		named_entry_unlock(mem_entry);
2975		return KERN_INVALID_ARGUMENT;
2976	}
2977
2978	object = mem_entry->backing.object;
2979	if (object == VM_OBJECT_NULL) {
2980		named_entry_unlock(mem_entry);
2981		return KERN_INVALID_ARGUMENT;
2982	}
2983
2984	vm_object_lock(object);
2985
2986	/* check that named entry covers entire object ? */
2987	if (mem_entry->offset != 0 || object->vo_size != mem_entry->size) {
2988		vm_object_unlock(object);
2989		named_entry_unlock(mem_entry);
2990		return KERN_INVALID_ARGUMENT;
2991	}
2992
2993	named_entry_unlock(mem_entry);
2994
2995	kr = vm_object_purgable_control(object, control, state);
2996
2997	vm_object_unlock(object);
2998
2999	return kr;
3000}
3001
3002kern_return_t
3003mach_memory_entry_get_page_counts(
3004	ipc_port_t	entry_port,
3005	unsigned int	*resident_page_count,
3006	unsigned int	*dirty_page_count)
3007{
3008	kern_return_t		kr;
3009	vm_named_entry_t	mem_entry;
3010	vm_object_t		object;
3011	vm_object_offset_t	offset;
3012	vm_object_size_t	size;
3013
3014	if (entry_port == IP_NULL ||
3015	    ip_kotype(entry_port) != IKOT_NAMED_ENTRY) {
3016		return KERN_INVALID_ARGUMENT;
3017	}
3018
3019	mem_entry = (vm_named_entry_t) entry_port->ip_kobject;
3020
3021	named_entry_lock(mem_entry);
3022
3023	if (mem_entry->is_sub_map ||
3024	    mem_entry->is_pager ||
3025	    mem_entry->is_copy) {
3026		named_entry_unlock(mem_entry);
3027		return KERN_INVALID_ARGUMENT;
3028	}
3029
3030	object = mem_entry->backing.object;
3031	if (object == VM_OBJECT_NULL) {
3032		named_entry_unlock(mem_entry);
3033		return KERN_INVALID_ARGUMENT;
3034	}
3035
3036	vm_object_lock(object);
3037
3038	offset = mem_entry->offset;
3039	size = mem_entry->size;
3040
3041	named_entry_unlock(mem_entry);
3042
3043	kr = vm_object_get_page_counts(object, offset, size, resident_page_count, dirty_page_count);
3044
3045	vm_object_unlock(object);
3046
3047	return kr;
3048}
3049
3050/*
3051 * mach_memory_entry_port_release:
3052 *
3053 * Release a send right on a named entry port.  This is the correct
3054 * way to destroy a named entry.  When the last right on the port is
3055 * released, ipc_kobject_destroy() will call mach_destroy_memory_entry().
3056 */
3057void
3058mach_memory_entry_port_release(
3059	ipc_port_t	port)
3060{
3061	assert(ip_kotype(port) == IKOT_NAMED_ENTRY);
3062	ipc_port_release_send(port);
3063}
3064
3065/*
3066 * mach_destroy_memory_entry:
3067 *
3068 * Drops a reference on a memory entry and destroys the memory entry if
3069 * there are no more references on it.
3070 * NOTE: This routine should not be called to destroy a memory entry from the
3071 * kernel, as it will not release the Mach port associated with the memory
3072 * entry.  The proper way to destroy a memory entry in the kernel is to
3073 * call mach_memort_entry_port_release() to release the kernel's send-right on
3074 * the memory entry's port.  When the last send right is released, the memory
3075 * entry will be destroyed via ipc_kobject_destroy().
3076 */
3077void
3078mach_destroy_memory_entry(
3079	ipc_port_t	port)
3080{
3081	vm_named_entry_t	named_entry;
3082#if MACH_ASSERT
3083	assert(ip_kotype(port) == IKOT_NAMED_ENTRY);
3084#endif /* MACH_ASSERT */
3085	named_entry = (vm_named_entry_t)port->ip_kobject;
3086
3087	named_entry_lock(named_entry);
3088	named_entry->ref_count -= 1;
3089
3090	if(named_entry->ref_count == 0) {
3091		if (named_entry->is_sub_map) {
3092			vm_map_deallocate(named_entry->backing.map);
3093		} else if (named_entry->is_pager) {
3094			/* JMM - need to drop reference on pager in that case */
3095		} else if (named_entry->is_copy) {
3096			vm_map_copy_discard(named_entry->backing.copy);
3097		} else {
3098			/* release the VM object we've been pointing to */
3099			vm_object_deallocate(named_entry->backing.object);
3100		}
3101
3102		named_entry_unlock(named_entry);
3103		named_entry_lock_destroy(named_entry);
3104
3105		kfree((void *) port->ip_kobject,
3106		      sizeof (struct vm_named_entry));
3107	} else
3108		named_entry_unlock(named_entry);
3109}
3110
3111/* Allow manipulation of individual page state.  This is actually part of */
3112/* the UPL regimen but takes place on the memory entry rather than on a UPL */
3113
3114kern_return_t
3115mach_memory_entry_page_op(
3116	ipc_port_t		entry_port,
3117	vm_object_offset_t	offset,
3118	int			ops,
3119	ppnum_t			*phys_entry,
3120	int			*flags)
3121{
3122	vm_named_entry_t	mem_entry;
3123	vm_object_t		object;
3124	kern_return_t		kr;
3125
3126	if (entry_port == IP_NULL ||
3127	    ip_kotype(entry_port) != IKOT_NAMED_ENTRY) {
3128		return KERN_INVALID_ARGUMENT;
3129	}
3130
3131	mem_entry = (vm_named_entry_t) entry_port->ip_kobject;
3132
3133	named_entry_lock(mem_entry);
3134
3135	if (mem_entry->is_sub_map ||
3136	    mem_entry->is_pager ||
3137	    mem_entry->is_copy) {
3138		named_entry_unlock(mem_entry);
3139		return KERN_INVALID_ARGUMENT;
3140	}
3141
3142	object = mem_entry->backing.object;
3143	if (object == VM_OBJECT_NULL) {
3144		named_entry_unlock(mem_entry);
3145		return KERN_INVALID_ARGUMENT;
3146	}
3147
3148	vm_object_reference(object);
3149	named_entry_unlock(mem_entry);
3150
3151	kr = vm_object_page_op(object, offset, ops, phys_entry, flags);
3152
3153	vm_object_deallocate(object);
3154
3155	return kr;
3156}
3157
3158/*
3159 * mach_memory_entry_range_op offers performance enhancement over
3160 * mach_memory_entry_page_op for page_op functions which do not require page
3161 * level state to be returned from the call.  Page_op was created to provide
3162 * a low-cost alternative to page manipulation via UPLs when only a single
3163 * page was involved.  The range_op call establishes the ability in the _op
3164 * family of functions to work on multiple pages where the lack of page level
3165 * state handling allows the caller to avoid the overhead of the upl structures.
3166 */
3167
3168kern_return_t
3169mach_memory_entry_range_op(
3170	ipc_port_t		entry_port,
3171	vm_object_offset_t	offset_beg,
3172	vm_object_offset_t	offset_end,
3173	int                     ops,
3174	int                     *range)
3175{
3176	vm_named_entry_t	mem_entry;
3177	vm_object_t		object;
3178	kern_return_t		kr;
3179
3180	if (entry_port == IP_NULL ||
3181	    ip_kotype(entry_port) != IKOT_NAMED_ENTRY) {
3182		return KERN_INVALID_ARGUMENT;
3183	}
3184
3185	mem_entry = (vm_named_entry_t) entry_port->ip_kobject;
3186
3187	named_entry_lock(mem_entry);
3188
3189	if (mem_entry->is_sub_map ||
3190	    mem_entry->is_pager ||
3191	    mem_entry->is_copy) {
3192		named_entry_unlock(mem_entry);
3193		return KERN_INVALID_ARGUMENT;
3194	}
3195
3196	object = mem_entry->backing.object;
3197	if (object == VM_OBJECT_NULL) {
3198		named_entry_unlock(mem_entry);
3199		return KERN_INVALID_ARGUMENT;
3200	}
3201
3202	vm_object_reference(object);
3203	named_entry_unlock(mem_entry);
3204
3205	kr = vm_object_range_op(object,
3206				offset_beg,
3207				offset_end,
3208				ops,
3209				(uint32_t *) range);
3210
3211	vm_object_deallocate(object);
3212
3213	return kr;
3214}
3215
3216
3217kern_return_t
3218set_dp_control_port(
3219	host_priv_t	host_priv,
3220	ipc_port_t	control_port)
3221{
3222        if (host_priv == HOST_PRIV_NULL)
3223                return (KERN_INVALID_HOST);
3224
3225	if (IP_VALID(dynamic_pager_control_port))
3226		ipc_port_release_send(dynamic_pager_control_port);
3227
3228	dynamic_pager_control_port = control_port;
3229	return KERN_SUCCESS;
3230}
3231
3232kern_return_t
3233get_dp_control_port(
3234	host_priv_t	host_priv,
3235	ipc_port_t	*control_port)
3236{
3237        if (host_priv == HOST_PRIV_NULL)
3238                return (KERN_INVALID_HOST);
3239
3240	*control_port = ipc_port_copy_send(dynamic_pager_control_port);
3241	return KERN_SUCCESS;
3242
3243}
3244
3245/* ******* Temporary Internal calls to UPL for BSD ***** */
3246
3247extern int kernel_upl_map(
3248	vm_map_t        map,
3249	upl_t           upl,
3250	vm_offset_t     *dst_addr);
3251
3252extern int kernel_upl_unmap(
3253	vm_map_t        map,
3254	upl_t           upl);
3255
3256extern int kernel_upl_commit(
3257	upl_t                   upl,
3258	upl_page_info_t         *pl,
3259	mach_msg_type_number_t	 count);
3260
3261extern int kernel_upl_commit_range(
3262	upl_t                   upl,
3263	upl_offset_t             offset,
3264	upl_size_t		size,
3265	int			flags,
3266	upl_page_info_array_t	pl,
3267	mach_msg_type_number_t	count);
3268
3269extern int kernel_upl_abort(
3270	upl_t                   upl,
3271	int                     abort_type);
3272
3273extern int kernel_upl_abort_range(
3274	upl_t                   upl,
3275	upl_offset_t             offset,
3276	upl_size_t               size,
3277	int                     abort_flags);
3278
3279
3280kern_return_t
3281kernel_upl_map(
3282	vm_map_t	map,
3283	upl_t		upl,
3284	vm_offset_t	*dst_addr)
3285{
3286	return vm_upl_map(map, upl, dst_addr);
3287}
3288
3289
3290kern_return_t
3291kernel_upl_unmap(
3292	vm_map_t	map,
3293	upl_t		upl)
3294{
3295	return vm_upl_unmap(map, upl);
3296}
3297
3298kern_return_t
3299kernel_upl_commit(
3300	upl_t                   upl,
3301	upl_page_info_t        *pl,
3302	mach_msg_type_number_t  count)
3303{
3304	kern_return_t 	kr;
3305
3306	kr = upl_commit(upl, pl, count);
3307	upl_deallocate(upl);
3308	return kr;
3309}
3310
3311
3312kern_return_t
3313kernel_upl_commit_range(
3314	upl_t 			upl,
3315	upl_offset_t		offset,
3316	upl_size_t		size,
3317	int			flags,
3318	upl_page_info_array_t   pl,
3319	mach_msg_type_number_t  count)
3320{
3321	boolean_t		finished = FALSE;
3322	kern_return_t 		kr;
3323
3324	if (flags & UPL_COMMIT_FREE_ON_EMPTY)
3325		flags |= UPL_COMMIT_NOTIFY_EMPTY;
3326
3327	if (flags & UPL_COMMIT_KERNEL_ONLY_FLAGS) {
3328		return KERN_INVALID_ARGUMENT;
3329	}
3330
3331	kr = upl_commit_range(upl, offset, size, flags, pl, count, &finished);
3332
3333	if ((flags & UPL_COMMIT_NOTIFY_EMPTY) && finished)
3334		upl_deallocate(upl);
3335
3336	return kr;
3337}
3338
3339kern_return_t
3340kernel_upl_abort_range(
3341	upl_t			upl,
3342	upl_offset_t		offset,
3343	upl_size_t		size,
3344	int			abort_flags)
3345{
3346	kern_return_t 		kr;
3347	boolean_t		finished = FALSE;
3348
3349	if (abort_flags & UPL_COMMIT_FREE_ON_EMPTY)
3350		abort_flags |= UPL_COMMIT_NOTIFY_EMPTY;
3351
3352	kr = upl_abort_range(upl, offset, size, abort_flags, &finished);
3353
3354	if ((abort_flags & UPL_COMMIT_FREE_ON_EMPTY) && finished)
3355		upl_deallocate(upl);
3356
3357	return kr;
3358}
3359
3360kern_return_t
3361kernel_upl_abort(
3362	upl_t			upl,
3363	int			abort_type)
3364{
3365	kern_return_t	kr;
3366
3367	kr = upl_abort(upl, abort_type);
3368	upl_deallocate(upl);
3369	return kr;
3370}
3371
3372/*
3373 * Now a kernel-private interface (for BootCache
3374 * use only).  Need a cleaner way to create an
3375 * empty vm_map() and return a handle to it.
3376 */
3377
3378kern_return_t
3379vm_region_object_create(
3380	__unused vm_map_t	target_map,
3381	vm_size_t		size,
3382	ipc_port_t		*object_handle)
3383{
3384	vm_named_entry_t	user_entry;
3385	ipc_port_t		user_handle;
3386
3387	vm_map_t	new_map;
3388
3389	if (mach_memory_entry_allocate(&user_entry, &user_handle)
3390	    != KERN_SUCCESS) {
3391		return KERN_FAILURE;
3392	}
3393
3394	/* Create a named object based on a submap of specified size */
3395
3396	new_map = vm_map_create(PMAP_NULL, VM_MAP_MIN_ADDRESS,
3397				vm_map_round_page(size,
3398						  VM_MAP_PAGE_MASK(target_map)),
3399				TRUE);
3400	vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(target_map));
3401
3402	user_entry->backing.map = new_map;
3403	user_entry->internal = TRUE;
3404	user_entry->is_sub_map = TRUE;
3405	user_entry->offset = 0;
3406	user_entry->protection = VM_PROT_ALL;
3407	user_entry->size = size;
3408	assert(user_entry->ref_count == 1);
3409
3410	*object_handle = user_handle;
3411	return KERN_SUCCESS;
3412
3413}
3414
3415ppnum_t vm_map_get_phys_page(		/* forward */
3416	vm_map_t	map,
3417	vm_offset_t	offset);
3418
3419ppnum_t
3420vm_map_get_phys_page(
3421	vm_map_t		map,
3422	vm_offset_t		addr)
3423{
3424	vm_object_offset_t	offset;
3425	vm_object_t		object;
3426	vm_map_offset_t 	map_offset;
3427	vm_map_entry_t		entry;
3428	ppnum_t			phys_page = 0;
3429
3430	map_offset = vm_map_trunc_page(addr, PAGE_MASK);
3431
3432	vm_map_lock(map);
3433	while (vm_map_lookup_entry(map, map_offset, &entry)) {
3434
3435		if (entry->object.vm_object == VM_OBJECT_NULL) {
3436			vm_map_unlock(map);
3437			return (ppnum_t) 0;
3438		}
3439		if (entry->is_sub_map) {
3440			vm_map_t	old_map;
3441			vm_map_lock(entry->object.sub_map);
3442			old_map = map;
3443			map = entry->object.sub_map;
3444			map_offset = entry->offset + (map_offset - entry->vme_start);
3445			vm_map_unlock(old_map);
3446			continue;
3447		}
3448		if (entry->object.vm_object->phys_contiguous) {
3449			/* These are  not standard pageable memory mappings */
3450			/* If they are not present in the object they will  */
3451			/* have to be picked up from the pager through the  */
3452			/* fault mechanism.  */
3453			if(entry->object.vm_object->vo_shadow_offset == 0) {
3454				/* need to call vm_fault */
3455				vm_map_unlock(map);
3456				vm_fault(map, map_offset, VM_PROT_NONE,
3457					FALSE, THREAD_UNINT, NULL, 0);
3458				vm_map_lock(map);
3459				continue;
3460			}
3461			offset = entry->offset + (map_offset - entry->vme_start);
3462			phys_page = (ppnum_t)
3463				((entry->object.vm_object->vo_shadow_offset
3464							+ offset) >> PAGE_SHIFT);
3465			break;
3466
3467		}
3468		offset = entry->offset + (map_offset - entry->vme_start);
3469		object = entry->object.vm_object;
3470		vm_object_lock(object);
3471		while (TRUE) {
3472			vm_page_t dst_page = vm_page_lookup(object,offset);
3473	                if(dst_page == VM_PAGE_NULL) {
3474				if(object->shadow) {
3475					vm_object_t old_object;
3476					vm_object_lock(object->shadow);
3477					old_object = object;
3478					offset = offset + object->vo_shadow_offset;
3479					object = object->shadow;
3480					vm_object_unlock(old_object);
3481				} else {
3482					vm_object_unlock(object);
3483					break;
3484				}
3485			} else {
3486				phys_page = (ppnum_t)(dst_page->phys_page);
3487				vm_object_unlock(object);
3488				break;
3489			}
3490		}
3491		break;
3492
3493	}
3494
3495	vm_map_unlock(map);
3496	return phys_page;
3497}
3498
3499
3500
3501kern_return_t kernel_object_iopl_request(	/* forward */
3502	vm_named_entry_t	named_entry,
3503	memory_object_offset_t	offset,
3504	upl_size_t		*upl_size,
3505	upl_t			*upl_ptr,
3506	upl_page_info_array_t	user_page_list,
3507	unsigned int		*page_list_count,
3508	int			*flags);
3509
3510kern_return_t
3511kernel_object_iopl_request(
3512	vm_named_entry_t	named_entry,
3513	memory_object_offset_t	offset,
3514	upl_size_t		*upl_size,
3515	upl_t			*upl_ptr,
3516	upl_page_info_array_t	user_page_list,
3517	unsigned int		*page_list_count,
3518	int			*flags)
3519{
3520	vm_object_t		object;
3521	kern_return_t		ret;
3522
3523	int			caller_flags;
3524
3525	caller_flags = *flags;
3526
3527	if (caller_flags & ~UPL_VALID_FLAGS) {
3528		/*
3529		 * For forward compatibility's sake,
3530		 * reject any unknown flag.
3531		 */
3532		return KERN_INVALID_VALUE;
3533	}
3534
3535	/* a few checks to make sure user is obeying rules */
3536	if(*upl_size == 0) {
3537		if(offset >= named_entry->size)
3538			return(KERN_INVALID_RIGHT);
3539		*upl_size = (upl_size_t) (named_entry->size - offset);
3540		if (*upl_size != named_entry->size - offset)
3541			return KERN_INVALID_ARGUMENT;
3542	}
3543	if(caller_flags & UPL_COPYOUT_FROM) {
3544		if((named_entry->protection & VM_PROT_READ)
3545					!= VM_PROT_READ) {
3546			return(KERN_INVALID_RIGHT);
3547		}
3548	} else {
3549		if((named_entry->protection &
3550			(VM_PROT_READ | VM_PROT_WRITE))
3551			!= (VM_PROT_READ | VM_PROT_WRITE)) {
3552			return(KERN_INVALID_RIGHT);
3553		}
3554	}
3555	if(named_entry->size < (offset + *upl_size))
3556		return(KERN_INVALID_ARGUMENT);
3557
3558	/* the callers parameter offset is defined to be the */
3559	/* offset from beginning of named entry offset in object */
3560	offset = offset + named_entry->offset;
3561
3562	if (named_entry->is_sub_map ||
3563	    named_entry->is_copy)
3564		return KERN_INVALID_ARGUMENT;
3565
3566	named_entry_lock(named_entry);
3567
3568	if (named_entry->is_pager) {
3569		object = vm_object_enter(named_entry->backing.pager,
3570				named_entry->offset + named_entry->size,
3571				named_entry->internal,
3572				FALSE,
3573				FALSE);
3574		if (object == VM_OBJECT_NULL) {
3575			named_entry_unlock(named_entry);
3576			return(KERN_INVALID_OBJECT);
3577		}
3578
3579		/* JMM - drop reference on the pager here? */
3580
3581		/* create an extra reference for the object */
3582		vm_object_lock(object);
3583		vm_object_reference_locked(object);
3584		named_entry->backing.object = object;
3585		named_entry->is_pager = FALSE;
3586		named_entry_unlock(named_entry);
3587
3588		/* wait for object (if any) to be ready */
3589		if (!named_entry->internal) {
3590			while (!object->pager_ready) {
3591				vm_object_wait(object,
3592					       VM_OBJECT_EVENT_PAGER_READY,
3593					       THREAD_UNINT);
3594				vm_object_lock(object);
3595			}
3596		}
3597		vm_object_unlock(object);
3598
3599	} else {
3600		/* This is the case where we are going to operate */
3601		/* an an already known object.  If the object is */
3602		/* not ready it is internal.  An external     */
3603		/* object cannot be mapped until it is ready  */
3604		/* we can therefore avoid the ready check     */
3605		/* in this case.  */
3606		object = named_entry->backing.object;
3607		vm_object_reference(object);
3608		named_entry_unlock(named_entry);
3609	}
3610
3611	if (!object->private) {
3612		if (*upl_size > (MAX_UPL_TRANSFER*PAGE_SIZE))
3613			*upl_size = (MAX_UPL_TRANSFER*PAGE_SIZE);
3614		if (object->phys_contiguous) {
3615			*flags = UPL_PHYS_CONTIG;
3616		} else {
3617			*flags = 0;
3618		}
3619	} else {
3620		*flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG;
3621	}
3622
3623	ret = vm_object_iopl_request(object,
3624				     offset,
3625				     *upl_size,
3626				     upl_ptr,
3627				     user_page_list,
3628				     page_list_count,
3629				     caller_flags);
3630	vm_object_deallocate(object);
3631	return ret;
3632}
3633