1/*
2 * Copyright (c) 2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24/*
25 * Shared region (... and comm page)
26 *
27 * This file handles the VM shared region and comm page.
28 *
29 */
30/*
31 * SHARED REGIONS
32 * --------------
33 *
34 * A shared region is a submap that contains the most common system shared
35 * libraries for a given environment.
36 * An environment is defined by (cpu-type, 64-bitness, root directory).
37 *
38 * The point of a shared region is to reduce the setup overhead when exec'ing
39 * a new process.
40 * A shared region uses a shared VM submap that gets mapped automatically
41 * at exec() time (see vm_map_exec()).  The first process of a given
42 * environment sets up the shared region and all further processes in that
43 * environment can re-use that shared region without having to re-create
44 * the same mappings in their VM map.  All they need is contained in the shared
45 * region.
46 * It can also shared a pmap (mostly for read-only parts but also for the
47 * initial version of some writable parts), which gets "nested" into the
48 * process's pmap.  This reduces the number of soft faults:  once one process
49 * brings in a page in the shared region, all the other processes can access
50 * it without having to enter it in their own pmap.
51 *
52 *
53 * When a process is being exec'ed, vm_map_exec() calls vm_shared_region_enter()
54 * to map the appropriate shared region in the process's address space.
55 * We look up the appropriate shared region for the process's environment.
56 * If we can't find one, we create a new (empty) one and add it to the list.
57 * Otherwise, we just take an extra reference on the shared region we found.
58 *
59 * The "dyld" runtime (mapped into the process's address space at exec() time)
60 * will then use the shared_region_check_np() and shared_region_map_np()
61 * system call to validate and/or populate the shared region with the
62 * appropriate dyld_shared_cache file.
63 *
64 * The shared region is inherited on fork() and the child simply takes an
65 * extra reference on its parent's shared region.
66 *
67 * When the task terminates, we release a reference on its shared region.
68 * When the last reference is released, we destroy the shared region.
69 *
70 * After a chroot(), the calling process keeps using its original shared region,
71 * since that's what was mapped when it was started.  But its children
72 * will use a different shared region, because they need to use the shared
73 * cache that's relative to the new root directory.
74 */
75/*
76 * COMM PAGE
77 *
78 * A "comm page" is an area of memory that is populated by the kernel with
79 * the appropriate platform-specific version of some commonly used code.
80 * There is one "comm page" per platform (cpu-type, 64-bitness) but only
81 * for the native cpu-type.  No need to overly optimize translated code
82 * for hardware that is not really there !
83 *
84 * The comm pages are created and populated at boot time.
85 *
86 * The appropriate comm page is mapped into a process's address space
87 * at exec() time, in vm_map_exec().
88 * It is then inherited on fork().
89 *
90 * The comm page is shared between the kernel and all applications of
91 * a given platform.  Only the kernel can modify it.
92 *
93 * Applications just branch to fixed addresses in the comm page and find
94 * the right version of the code for the platform.  There is also some
95 * data provided and updated by the kernel for processes to retrieve easily
96 * without having to do a system call.
97 */
98
99#include <debug.h>
100
101#include <kern/ipc_tt.h>
102#include <kern/kalloc.h>
103#include <kern/thread_call.h>
104
105#include <mach/mach_vm.h>
106
107#include <vm/vm_map.h>
108#include <vm/vm_shared_region.h>
109
110#include <vm/vm_protos.h>
111
112#include <machine/commpage.h>
113#include <machine/cpu_capabilities.h>
114
115/* "dyld" uses this to figure out what the kernel supports */
116int shared_region_version = 3;
117
118/* trace level, output is sent to the system log file */
119int shared_region_trace_level = SHARED_REGION_TRACE_ERROR_LVL;
120
121/* should local (non-chroot) shared regions persist when no task uses them ? */
122int shared_region_persistence = 0;	/* no by default */
123
124/* delay before reclaiming an unused shared region */
125int shared_region_destroy_delay = 120; /* in seconds */
126
127/*
128 * Only one cache gets to slide on Desktop, since we can't
129 * tear down slide info properly today and the desktop actually
130 * produces lots of shared caches.
131 */
132boolean_t shared_region_completed_slide = FALSE;
133
134/* this lock protects all the shared region data structures */
135lck_grp_t *vm_shared_region_lck_grp;
136lck_mtx_t vm_shared_region_lock;
137
138#define vm_shared_region_lock() lck_mtx_lock(&vm_shared_region_lock)
139#define vm_shared_region_unlock() lck_mtx_unlock(&vm_shared_region_lock)
140#define vm_shared_region_sleep(event, interruptible)			\
141	lck_mtx_sleep(&vm_shared_region_lock,				\
142		      LCK_SLEEP_DEFAULT,				\
143		      (event_t) (event),				\
144		      (interruptible))
145
146/* the list of currently available shared regions (one per environment) */
147queue_head_t	vm_shared_region_queue;
148
149static void vm_shared_region_reference_locked(vm_shared_region_t shared_region);
150static vm_shared_region_t vm_shared_region_create(
151	void			*root_dir,
152	cpu_type_t		cputype,
153	boolean_t		is_64bit);
154static void vm_shared_region_destroy(vm_shared_region_t shared_region);
155
156static void vm_shared_region_timeout(thread_call_param_t param0,
157				     thread_call_param_t param1);
158
159static int __commpage_setup = 0;
160#if defined(__i386__) || defined(__x86_64__)
161static int __system_power_source = 1;	/* init to extrnal power source */
162static void post_sys_powersource_internal(int i, int internal);
163#endif /* __i386__ || __x86_64__ */
164
165
166/*
167 * Initialize the module...
168 */
169void
170vm_shared_region_init(void)
171{
172	SHARED_REGION_TRACE_DEBUG(
173		("shared_region: -> init\n"));
174
175	vm_shared_region_lck_grp = lck_grp_alloc_init("vm shared region",
176						      LCK_GRP_ATTR_NULL);
177	lck_mtx_init(&vm_shared_region_lock,
178		     vm_shared_region_lck_grp,
179		     LCK_ATTR_NULL);
180
181	queue_init(&vm_shared_region_queue);
182
183	SHARED_REGION_TRACE_DEBUG(
184		("shared_region: <- init\n"));
185}
186
187/*
188 * Retrieve a task's shared region and grab an extra reference to
189 * make sure it doesn't disappear while the caller is using it.
190 * The caller is responsible for consuming that extra reference if
191 * necessary.
192 */
193vm_shared_region_t
194vm_shared_region_get(
195	task_t		task)
196{
197	vm_shared_region_t	shared_region;
198
199	SHARED_REGION_TRACE_DEBUG(
200		("shared_region: -> get(%p)\n",
201		 (void *)VM_KERNEL_ADDRPERM(task)));
202
203	task_lock(task);
204	vm_shared_region_lock();
205	shared_region = task->shared_region;
206	if (shared_region) {
207		assert(shared_region->sr_ref_count > 0);
208		vm_shared_region_reference_locked(shared_region);
209	}
210	vm_shared_region_unlock();
211	task_unlock(task);
212
213	SHARED_REGION_TRACE_DEBUG(
214		("shared_region: get(%p) <- %p\n",
215		 (void *)VM_KERNEL_ADDRPERM(task),
216		 (void *)VM_KERNEL_ADDRPERM(shared_region)));
217
218	return shared_region;
219}
220
221/*
222 * Get the base address of the shared region.
223 * That's the address at which it needs to be mapped in the process's address
224 * space.
225 * No need to lock since this data is set when the shared region is
226 * created and is never modified after that.  The caller must hold an extra
227 * reference on the shared region to prevent it from being destroyed.
228 */
229mach_vm_offset_t
230vm_shared_region_base_address(
231	vm_shared_region_t	shared_region)
232{
233	SHARED_REGION_TRACE_DEBUG(
234		("shared_region: -> base_address(%p)\n",
235		 (void *)VM_KERNEL_ADDRPERM(shared_region)));
236	assert(shared_region->sr_ref_count > 1);
237	SHARED_REGION_TRACE_DEBUG(
238		("shared_region: base_address(%p) <- 0x%llx\n",
239		 (void *)VM_KERNEL_ADDRPERM(shared_region),
240		 (long long)shared_region->sr_base_address));
241	return shared_region->sr_base_address;
242}
243
244/*
245 * Get the size of the shared region.
246 * That's the size that needs to be mapped in the process's address
247 * space.
248 * No need to lock since this data is set when the shared region is
249 * created and is never modified after that.  The caller must hold an extra
250 * reference on the shared region to prevent it from being destroyed.
251 */
252mach_vm_size_t
253vm_shared_region_size(
254	vm_shared_region_t	shared_region)
255{
256	SHARED_REGION_TRACE_DEBUG(
257		("shared_region: -> size(%p)\n",
258		 (void *)VM_KERNEL_ADDRPERM(shared_region)));
259	assert(shared_region->sr_ref_count > 1);
260	SHARED_REGION_TRACE_DEBUG(
261		("shared_region: size(%p) <- 0x%llx\n",
262		 (void *)VM_KERNEL_ADDRPERM(shared_region),
263		 (long long)shared_region->sr_size));
264	return shared_region->sr_size;
265}
266
267/*
268 * Get the memory entry of the shared region.
269 * That's the "memory object" that needs to be mapped in the process's address
270 * space.
271 * No need to lock since this data is set when the shared region is
272 * created and is never modified after that.  The caller must hold an extra
273 * reference on the shared region to prevent it from being destroyed.
274 */
275ipc_port_t
276vm_shared_region_mem_entry(
277	vm_shared_region_t	shared_region)
278{
279	SHARED_REGION_TRACE_DEBUG(
280		("shared_region: -> mem_entry(%p)\n",
281		 (void *)VM_KERNEL_ADDRPERM(shared_region)));
282	assert(shared_region->sr_ref_count > 1);
283	SHARED_REGION_TRACE_DEBUG(
284		("shared_region: mem_entry(%p) <- %p\n",
285		 (void *)VM_KERNEL_ADDRPERM(shared_region),
286		 (void *)VM_KERNEL_ADDRPERM(shared_region->sr_mem_entry)));
287	return shared_region->sr_mem_entry;
288}
289
290uint32_t
291vm_shared_region_get_slide(
292	vm_shared_region_t	shared_region)
293{
294	SHARED_REGION_TRACE_DEBUG(
295		("shared_region: -> vm_shared_region_get_slide(%p)\n",
296		 (void *)VM_KERNEL_ADDRPERM(shared_region)));
297	assert(shared_region->sr_ref_count > 1);
298	SHARED_REGION_TRACE_DEBUG(
299		("shared_region: vm_shared_region_get_slide(%p) <- %u\n",
300		 (void *)VM_KERNEL_ADDRPERM(shared_region),
301		 shared_region->sr_slide_info.slide));
302
303	/* 0 if we haven't slid */
304	assert(shared_region->sr_slide_info.slide_object != NULL ||
305			shared_region->sr_slide_info.slide == 0);
306
307	return shared_region->sr_slide_info.slide;
308}
309
310vm_shared_region_slide_info_t
311vm_shared_region_get_slide_info(
312	vm_shared_region_t	shared_region)
313{
314	SHARED_REGION_TRACE_DEBUG(
315		("shared_region: -> vm_shared_region_get_slide_info(%p)\n",
316		 (void *)VM_KERNEL_ADDRPERM(shared_region)));
317	assert(shared_region->sr_ref_count > 1);
318	SHARED_REGION_TRACE_DEBUG(
319		("shared_region: vm_shared_region_get_slide_info(%p) <- %p\n",
320		 (void *)VM_KERNEL_ADDRPERM(shared_region),
321		 (void *)VM_KERNEL_ADDRPERM(&shared_region->sr_slide_info)));
322	return &shared_region->sr_slide_info;
323}
324
325/*
326 * Set the shared region the process should use.
327 * A NULL new shared region means that we just want to release the old
328 * shared region.
329 * The caller should already have an extra reference on the new shared region
330 * (if any).  We release a reference on the old shared region (if any).
331 */
332void
333vm_shared_region_set(
334	task_t			task,
335	vm_shared_region_t	new_shared_region)
336{
337	vm_shared_region_t	old_shared_region;
338
339	SHARED_REGION_TRACE_DEBUG(
340		("shared_region: -> set(%p, %p)\n",
341		 (void *)VM_KERNEL_ADDRPERM(task),
342		 (void *)VM_KERNEL_ADDRPERM(new_shared_region)));
343
344	task_lock(task);
345	vm_shared_region_lock();
346
347	old_shared_region = task->shared_region;
348	if (new_shared_region) {
349		assert(new_shared_region->sr_ref_count > 0);
350	}
351
352	task->shared_region = new_shared_region;
353
354	vm_shared_region_unlock();
355	task_unlock(task);
356
357	if (old_shared_region) {
358		assert(old_shared_region->sr_ref_count > 0);
359		vm_shared_region_deallocate(old_shared_region);
360	}
361
362	SHARED_REGION_TRACE_DEBUG(
363		("shared_region: set(%p) <- old=%p new=%p\n",
364		 (void *)VM_KERNEL_ADDRPERM(task),
365		 (void *)VM_KERNEL_ADDRPERM(old_shared_region),
366		 (void *)VM_KERNEL_ADDRPERM(new_shared_region)));
367}
368
369/*
370 * Lookup up the shared region for the desired environment.
371 * If none is found, create a new (empty) one.
372 * Grab an extra reference on the returned shared region, to make sure
373 * it doesn't get destroyed before the caller is done with it.  The caller
374 * is responsible for consuming that extra reference if necessary.
375 */
376vm_shared_region_t
377vm_shared_region_lookup(
378	void		*root_dir,
379	cpu_type_t	cputype,
380	boolean_t	is_64bit)
381{
382	vm_shared_region_t	shared_region;
383	vm_shared_region_t	new_shared_region;
384
385	SHARED_REGION_TRACE_DEBUG(
386		("shared_region: -> lookup(root=%p,cpu=%d,64bit=%d)\n",
387
388		 (void *)VM_KERNEL_ADDRPERM(root_dir), cputype, is_64bit));
389
390	shared_region = NULL;
391	new_shared_region = NULL;
392
393	vm_shared_region_lock();
394	for (;;) {
395		queue_iterate(&vm_shared_region_queue,
396			      shared_region,
397			      vm_shared_region_t,
398			      sr_q) {
399			assert(shared_region->sr_ref_count > 0);
400			if (shared_region->sr_cpu_type == cputype &&
401			    shared_region->sr_root_dir == root_dir &&
402			    shared_region->sr_64bit == is_64bit) {
403				/* found a match ! */
404				vm_shared_region_reference_locked(shared_region);
405				goto done;
406			}
407		}
408		if (new_shared_region == NULL) {
409			/* no match: create a new one */
410			vm_shared_region_unlock();
411			new_shared_region = vm_shared_region_create(root_dir,
412								    cputype,
413								    is_64bit);
414			/* do the lookup again, in case we lost a race */
415			vm_shared_region_lock();
416			continue;
417		}
418		/* still no match: use our new one */
419		shared_region = new_shared_region;
420		new_shared_region = NULL;
421		queue_enter(&vm_shared_region_queue,
422			    shared_region,
423			    vm_shared_region_t,
424			    sr_q);
425		break;
426	}
427
428done:
429	vm_shared_region_unlock();
430
431	if (new_shared_region) {
432		/*
433		 * We lost a race with someone else to create a new shared
434		 * region for that environment.  Get rid of our unused one.
435		 */
436		assert(new_shared_region->sr_ref_count == 1);
437		new_shared_region->sr_ref_count--;
438		vm_shared_region_destroy(new_shared_region);
439		new_shared_region = NULL;
440	}
441
442	SHARED_REGION_TRACE_DEBUG(
443		("shared_region: lookup(root=%p,cpu=%d,64bit=%d) <- %p\n",
444		 (void *)VM_KERNEL_ADDRPERM(root_dir),
445		 cputype, is_64bit,
446		 (void *)VM_KERNEL_ADDRPERM(shared_region)));
447
448	assert(shared_region->sr_ref_count > 0);
449	return shared_region;
450}
451
452/*
453 * Take an extra reference on a shared region.
454 * The vm_shared_region_lock should already be held by the caller.
455 */
456static void
457vm_shared_region_reference_locked(
458	vm_shared_region_t	shared_region)
459{
460#if DEBUG
461	lck_mtx_assert(&vm_shared_region_lock, LCK_MTX_ASSERT_OWNED);
462#endif
463
464	SHARED_REGION_TRACE_DEBUG(
465		("shared_region: -> reference_locked(%p)\n",
466		 (void *)VM_KERNEL_ADDRPERM(shared_region)));
467	assert(shared_region->sr_ref_count > 0);
468	shared_region->sr_ref_count++;
469
470	if (shared_region->sr_timer_call != NULL) {
471		boolean_t cancelled;
472
473		/* cancel and free any pending timeout */
474		cancelled = thread_call_cancel(shared_region->sr_timer_call);
475		if (cancelled) {
476			thread_call_free(shared_region->sr_timer_call);
477			shared_region->sr_timer_call = NULL;
478			/* release the reference held by the cancelled timer */
479			shared_region->sr_ref_count--;
480		} else {
481			/* the timer will drop the reference and free itself */
482		}
483	}
484
485	SHARED_REGION_TRACE_DEBUG(
486		("shared_region: reference_locked(%p) <- %d\n",
487		 (void *)VM_KERNEL_ADDRPERM(shared_region),
488		 shared_region->sr_ref_count));
489}
490
491/*
492 * Release a reference on the shared region.
493 * Destroy it if there are no references left.
494 */
495void
496vm_shared_region_deallocate(
497	vm_shared_region_t	shared_region)
498{
499	SHARED_REGION_TRACE_DEBUG(
500		("shared_region: -> deallocate(%p)\n",
501		 (void *)VM_KERNEL_ADDRPERM(shared_region)));
502
503	vm_shared_region_lock();
504
505	assert(shared_region->sr_ref_count > 0);
506
507	if (shared_region->sr_root_dir == NULL) {
508		/*
509		 * Local (i.e. based on the boot volume) shared regions
510		 * can persist or not based on the "shared_region_persistence"
511		 * sysctl.
512		 * Make sure that this one complies.
513		 *
514		 * See comments in vm_shared_region_slide() for notes about
515		 * shared regions we have slid (which are not torn down currently).
516		 */
517		if (shared_region_persistence &&
518		    !shared_region->sr_persists) {
519			/* make this one persistent */
520			shared_region->sr_ref_count++;
521			shared_region->sr_persists = TRUE;
522		} else if (!shared_region_persistence &&
523			   shared_region->sr_persists) {
524			/* make this one no longer persistent */
525			assert(shared_region->sr_ref_count > 1);
526			shared_region->sr_ref_count--;
527			shared_region->sr_persists = FALSE;
528		}
529	}
530
531	assert(shared_region->sr_ref_count > 0);
532	shared_region->sr_ref_count--;
533	SHARED_REGION_TRACE_DEBUG(
534		("shared_region: deallocate(%p): ref now %d\n",
535		 (void *)VM_KERNEL_ADDRPERM(shared_region),
536		 shared_region->sr_ref_count));
537
538	if (shared_region->sr_ref_count == 0) {
539		uint64_t deadline;
540
541		assert(!shared_region->sr_slid);
542
543		if (shared_region->sr_timer_call == NULL) {
544			/* hold one reference for the timer */
545			assert(! shared_region->sr_mapping_in_progress);
546			shared_region->sr_ref_count++;
547
548			/* set up the timer */
549			shared_region->sr_timer_call = thread_call_allocate(
550				(thread_call_func_t) vm_shared_region_timeout,
551				(thread_call_param_t) shared_region);
552
553			/* schedule the timer */
554			clock_interval_to_deadline(shared_region_destroy_delay,
555						   1000 * 1000 * 1000,
556						   &deadline);
557			thread_call_enter_delayed(shared_region->sr_timer_call,
558						  deadline);
559
560			SHARED_REGION_TRACE_DEBUG(
561				("shared_region: deallocate(%p): armed timer\n",
562				 (void *)VM_KERNEL_ADDRPERM(shared_region)));
563
564			vm_shared_region_unlock();
565		} else {
566			/* timer expired: let go of this shared region */
567
568			/*
569			 * We can't properly handle teardown of a slid object today.
570			 */
571			assert(!shared_region->sr_slid);
572
573			/*
574			 * Remove it from the queue first, so no one can find
575			 * it...
576			 */
577			queue_remove(&vm_shared_region_queue,
578				     shared_region,
579				     vm_shared_region_t,
580				     sr_q);
581			vm_shared_region_unlock();
582
583			/* ... and destroy it */
584			vm_shared_region_destroy(shared_region);
585			shared_region = NULL;
586		}
587	} else {
588		vm_shared_region_unlock();
589	}
590
591	SHARED_REGION_TRACE_DEBUG(
592		("shared_region: deallocate(%p) <-\n",
593		 (void *)VM_KERNEL_ADDRPERM(shared_region)));
594}
595
596void
597vm_shared_region_timeout(
598	thread_call_param_t	param0,
599	__unused thread_call_param_t	param1)
600{
601	vm_shared_region_t	shared_region;
602
603	shared_region = (vm_shared_region_t) param0;
604
605	vm_shared_region_deallocate(shared_region);
606}
607
608/*
609 * Create a new (empty) shared region for a new environment.
610 */
611static vm_shared_region_t
612vm_shared_region_create(
613	void			*root_dir,
614	cpu_type_t		cputype,
615	boolean_t		is_64bit)
616{
617	kern_return_t		kr;
618	vm_named_entry_t	mem_entry;
619	ipc_port_t		mem_entry_port;
620	vm_shared_region_t	shared_region;
621	vm_shared_region_slide_info_t si;
622	vm_map_t		sub_map;
623	mach_vm_offset_t	base_address, pmap_nesting_start;
624	mach_vm_size_t		size, pmap_nesting_size;
625
626	SHARED_REGION_TRACE_DEBUG(
627		("shared_region: -> create(root=%p,cpu=%d,64bit=%d)\n",
628		 (void *)VM_KERNEL_ADDRPERM(root_dir), cputype, is_64bit));
629
630	base_address = 0;
631	size = 0;
632	mem_entry = NULL;
633	mem_entry_port = IPC_PORT_NULL;
634	sub_map = VM_MAP_NULL;
635
636	/* create a new shared region structure... */
637	shared_region = kalloc(sizeof (*shared_region));
638	if (shared_region == NULL) {
639		SHARED_REGION_TRACE_ERROR(
640			("shared_region: create: couldn't allocate\n"));
641		goto done;
642	}
643
644	/* figure out the correct settings for the desired environment */
645	if (is_64bit) {
646		switch (cputype) {
647		case CPU_TYPE_I386:
648			base_address = SHARED_REGION_BASE_X86_64;
649			size = SHARED_REGION_SIZE_X86_64;
650			pmap_nesting_start = SHARED_REGION_NESTING_BASE_X86_64;
651			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_X86_64;
652			break;
653		case CPU_TYPE_POWERPC:
654			base_address = SHARED_REGION_BASE_PPC64;
655			size = SHARED_REGION_SIZE_PPC64;
656			pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC64;
657			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC64;
658			break;
659		default:
660			SHARED_REGION_TRACE_ERROR(
661				("shared_region: create: unknown cpu type %d\n",
662				 cputype));
663			kfree(shared_region, sizeof (*shared_region));
664			shared_region = NULL;
665			goto done;
666		}
667	} else {
668		switch (cputype) {
669		case CPU_TYPE_I386:
670			base_address = SHARED_REGION_BASE_I386;
671			size = SHARED_REGION_SIZE_I386;
672			pmap_nesting_start = SHARED_REGION_NESTING_BASE_I386;
673			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_I386;
674			break;
675		case CPU_TYPE_POWERPC:
676			base_address = SHARED_REGION_BASE_PPC;
677			size = SHARED_REGION_SIZE_PPC;
678			pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC;
679			pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC;
680			break;
681		default:
682			SHARED_REGION_TRACE_ERROR(
683				("shared_region: create: unknown cpu type %d\n",
684				 cputype));
685			kfree(shared_region, sizeof (*shared_region));
686			shared_region = NULL;
687			goto done;
688		}
689	}
690
691	/* create a memory entry structure and a Mach port handle */
692	kr = mach_memory_entry_allocate(&mem_entry,
693					&mem_entry_port);
694	if (kr != KERN_SUCCESS) {
695		kfree(shared_region, sizeof (*shared_region));
696		shared_region = NULL;
697		SHARED_REGION_TRACE_ERROR(
698			("shared_region: create: "
699			 "couldn't allocate mem_entry\n"));
700		goto done;
701	}
702
703	/* create a VM sub map and its pmap */
704	sub_map = vm_map_create(pmap_create(NULL, 0, is_64bit),
705				0, size,
706				TRUE);
707	if (sub_map == VM_MAP_NULL) {
708		ipc_port_release_send(mem_entry_port);
709		kfree(shared_region, sizeof (*shared_region));
710		shared_region = NULL;
711		SHARED_REGION_TRACE_ERROR(
712			("shared_region: create: "
713			 "couldn't allocate map\n"));
714		goto done;
715	}
716
717	/* make the memory entry point to the VM sub map */
718	mem_entry->is_sub_map = TRUE;
719	mem_entry->backing.map = sub_map;
720	mem_entry->size = size;
721	mem_entry->protection = VM_PROT_ALL;
722
723	/* make the shared region point at the memory entry */
724	shared_region->sr_mem_entry = mem_entry_port;
725
726	/* fill in the shared region's environment and settings */
727	shared_region->sr_base_address = base_address;
728	shared_region->sr_size = size;
729	shared_region->sr_pmap_nesting_start = pmap_nesting_start;
730	shared_region->sr_pmap_nesting_size = pmap_nesting_size;
731	shared_region->sr_cpu_type = cputype;
732	shared_region->sr_64bit = is_64bit;
733	shared_region->sr_root_dir = root_dir;
734
735	queue_init(&shared_region->sr_q);
736	shared_region->sr_mapping_in_progress = FALSE;
737	shared_region->sr_slide_in_progress = FALSE;
738	shared_region->sr_persists = FALSE;
739	shared_region->sr_slid = FALSE;
740	shared_region->sr_timer_call = NULL;
741	shared_region->sr_first_mapping = (mach_vm_offset_t) -1;
742
743	/* grab a reference for the caller */
744	shared_region->sr_ref_count = 1;
745
746	/* And set up slide info */
747	si = &shared_region->sr_slide_info;
748	si->start = 0;
749	si->end = 0;
750	si->slide = 0;
751	si->slide_object = NULL;
752	si->slide_info_size = 0;
753	si->slide_info_entry = NULL;
754
755done:
756	if (shared_region) {
757		SHARED_REGION_TRACE_INFO(
758			("shared_region: create(root=%p,cpu=%d,64bit=%d,"
759			 "base=0x%llx,size=0x%llx) <- "
760			 "%p mem=(%p,%p) map=%p pmap=%p\n",
761			 (void *)VM_KERNEL_ADDRPERM(root_dir),
762			 cputype, is_64bit, (long long)base_address,
763			 (long long)size,
764			 (void *)VM_KERNEL_ADDRPERM(shared_region),
765			 (void *)VM_KERNEL_ADDRPERM(mem_entry_port),
766			 (void *)VM_KERNEL_ADDRPERM(mem_entry),
767			 (void *)VM_KERNEL_ADDRPERM(sub_map),
768			 (void *)VM_KERNEL_ADDRPERM(sub_map->pmap)));
769	} else {
770		SHARED_REGION_TRACE_INFO(
771			("shared_region: create(root=%p,cpu=%d,64bit=%d,"
772			 "base=0x%llx,size=0x%llx) <- NULL",
773			 (void *)VM_KERNEL_ADDRPERM(root_dir),
774			 cputype, is_64bit, (long long)base_address,
775			 (long long)size));
776	}
777	return shared_region;
778}
779
780/*
781 * Destroy a now-unused shared region.
782 * The shared region is no longer in the queue and can not be looked up.
783 */
784static void
785vm_shared_region_destroy(
786	vm_shared_region_t	shared_region)
787{
788	vm_named_entry_t	mem_entry;
789	vm_map_t		map;
790
791	SHARED_REGION_TRACE_INFO(
792		("shared_region: -> destroy(%p) (root=%p,cpu=%d,64bit=%d)\n",
793		 (void *)VM_KERNEL_ADDRPERM(shared_region),
794		 (void *)VM_KERNEL_ADDRPERM(shared_region->sr_root_dir),
795		 shared_region->sr_cpu_type,
796		 shared_region->sr_64bit));
797
798	assert(shared_region->sr_ref_count == 0);
799	assert(!shared_region->sr_persists);
800	assert(!shared_region->sr_slid);
801
802	mem_entry = (vm_named_entry_t) shared_region->sr_mem_entry->ip_kobject;
803	assert(mem_entry->is_sub_map);
804	assert(!mem_entry->internal);
805	assert(!mem_entry->is_pager);
806	assert(!mem_entry->is_copy);
807	map = mem_entry->backing.map;
808
809	/*
810	 * Clean up the pmap first.  The virtual addresses that were
811	 * entered in this possibly "nested" pmap may have different values
812	 * than the VM map's min and max offsets, if the VM sub map was
813	 * mapped at a non-zero offset in the processes' main VM maps, which
814	 * is usually the case, so the clean-up we do in vm_map_destroy() would
815	 * not be enough.
816	 */
817	if (map->pmap) {
818		pmap_remove(map->pmap,
819			    shared_region->sr_base_address,
820			    (shared_region->sr_base_address +
821			     shared_region->sr_size));
822	}
823
824	/*
825	 * Release our (one and only) handle on the memory entry.
826	 * This will generate a no-senders notification, which will be processed
827	 * by ipc_kobject_notify(), which will release the one and only
828	 * reference on the memory entry and cause it to be destroyed, along
829	 * with the VM sub map and its pmap.
830	 */
831	mach_memory_entry_port_release(shared_region->sr_mem_entry);
832	mem_entry = NULL;
833	shared_region->sr_mem_entry = IPC_PORT_NULL;
834
835	if (shared_region->sr_timer_call) {
836		thread_call_free(shared_region->sr_timer_call);
837	}
838
839#if 0
840	/*
841	 * If slid, free those resources.  We'll want this eventually,
842	 * but can't handle it properly today.
843	 */
844	si = &shared_region->sr_slide_info;
845	if (si->slide_info_entry) {
846		kmem_free(kernel_map,
847			  (vm_offset_t) si->slide_info_entry,
848			  (vm_size_t) si->slide_info_size);
849		vm_object_deallocate(si->slide_object);
850	}
851#endif
852
853	/* release the shared region structure... */
854	kfree(shared_region, sizeof (*shared_region));
855
856	SHARED_REGION_TRACE_DEBUG(
857		("shared_region: destroy(%p) <-\n",
858		 (void *)VM_KERNEL_ADDRPERM(shared_region)));
859	shared_region = NULL;
860
861}
862
863/*
864 * Gets the address of the first (in time) mapping in the shared region.
865 */
866kern_return_t
867vm_shared_region_start_address(
868	vm_shared_region_t	shared_region,
869	mach_vm_offset_t	*start_address)
870{
871	kern_return_t		kr;
872	mach_vm_offset_t	sr_base_address;
873	mach_vm_offset_t	sr_first_mapping;
874
875	SHARED_REGION_TRACE_DEBUG(
876		("shared_region: -> start_address(%p)\n",
877		 (void *)VM_KERNEL_ADDRPERM(shared_region)));
878	assert(shared_region->sr_ref_count > 1);
879
880	vm_shared_region_lock();
881
882	/*
883	 * Wait if there's another thread establishing a mapping
884	 * in this shared region right when we're looking at it.
885	 * We want a consistent view of the map...
886	 */
887	while (shared_region->sr_mapping_in_progress) {
888		/* wait for our turn... */
889		assert(shared_region->sr_ref_count > 1);
890		vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
891				       THREAD_UNINT);
892	}
893	assert(! shared_region->sr_mapping_in_progress);
894	assert(shared_region->sr_ref_count > 1);
895
896	sr_base_address = shared_region->sr_base_address;
897	sr_first_mapping = shared_region->sr_first_mapping;
898
899	if (sr_first_mapping == (mach_vm_offset_t) -1) {
900		/* shared region is empty */
901		kr = KERN_INVALID_ADDRESS;
902	} else {
903		kr = KERN_SUCCESS;
904		*start_address = sr_base_address + sr_first_mapping;
905	}
906
907	vm_shared_region_unlock();
908
909	SHARED_REGION_TRACE_DEBUG(
910		("shared_region: start_address(%p) <- 0x%llx\n",
911		 (void *)VM_KERNEL_ADDRPERM(shared_region),
912		 (long long)shared_region->sr_base_address));
913
914	return kr;
915}
916
917void
918vm_shared_region_undo_mappings(
919	vm_map_t sr_map,
920	mach_vm_offset_t sr_base_address,
921	struct shared_file_mapping_np *mappings,
922	unsigned int mappings_count)
923{
924	unsigned int		j = 0;
925	vm_shared_region_t	shared_region = NULL;
926	boolean_t		reset_shared_region_state = FALSE;
927
928	shared_region = vm_shared_region_get(current_task());
929	if (shared_region == NULL) {
930		printf("Failed to undo mappings because of NULL shared region.\n");
931		return;
932	}
933
934
935	if (sr_map == NULL) {
936		ipc_port_t		sr_handle;
937		vm_named_entry_t	sr_mem_entry;
938
939		vm_shared_region_lock();
940		assert(shared_region->sr_ref_count > 1);
941
942		while (shared_region->sr_mapping_in_progress) {
943			/* wait for our turn... */
944			vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
945					       THREAD_UNINT);
946		}
947		assert(! shared_region->sr_mapping_in_progress);
948		assert(shared_region->sr_ref_count > 1);
949		/* let others know we're working in this shared region */
950		shared_region->sr_mapping_in_progress = TRUE;
951
952		vm_shared_region_unlock();
953
954		reset_shared_region_state = TRUE;
955
956		/* no need to lock because this data is never modified... */
957		sr_handle = shared_region->sr_mem_entry;
958		sr_mem_entry = (vm_named_entry_t) sr_handle->ip_kobject;
959		sr_map = sr_mem_entry->backing.map;
960		sr_base_address = shared_region->sr_base_address;
961	}
962	/*
963	 * Undo the mappings we've established so far.
964	 */
965	for (j = 0; j < mappings_count; j++) {
966		kern_return_t kr2;
967
968		if (mappings[j].sfm_size == 0) {
969			/*
970			 * We didn't establish this
971			 * mapping, so nothing to undo.
972			 */
973			continue;
974		}
975		SHARED_REGION_TRACE_INFO(
976			("shared_region: mapping[%d]: "
977			 "address:0x%016llx "
978			 "size:0x%016llx "
979			 "offset:0x%016llx "
980			 "maxprot:0x%x prot:0x%x: "
981			 "undoing...\n",
982			 j,
983			 (long long)mappings[j].sfm_address,
984			 (long long)mappings[j].sfm_size,
985			 (long long)mappings[j].sfm_file_offset,
986			 mappings[j].sfm_max_prot,
987			 mappings[j].sfm_init_prot));
988		kr2 = mach_vm_deallocate(
989			sr_map,
990			(mappings[j].sfm_address -
991			 sr_base_address),
992			mappings[j].sfm_size);
993		assert(kr2 == KERN_SUCCESS);
994	}
995
996	if (reset_shared_region_state) {
997		vm_shared_region_lock();
998		assert(shared_region->sr_ref_count > 1);
999		assert(shared_region->sr_mapping_in_progress);
1000		/* we're done working on that shared region */
1001		shared_region->sr_mapping_in_progress = FALSE;
1002		thread_wakeup((event_t) &shared_region->sr_mapping_in_progress);
1003		vm_shared_region_unlock();
1004		reset_shared_region_state = FALSE;
1005	}
1006
1007	vm_shared_region_deallocate(shared_region);
1008}
1009
1010/*
1011 * Establish some mappings of a file in the shared region.
1012 * This is used by "dyld" via the shared_region_map_np() system call
1013 * to populate the shared region with the appropriate shared cache.
1014 *
1015 * One could also call it several times to incrementally load several
1016 * libraries, as long as they do not overlap.
1017 * It will return KERN_SUCCESS if the mappings were successfully established
1018 * or if they were already established identically by another process.
1019 */
1020kern_return_t
1021vm_shared_region_map_file(
1022	vm_shared_region_t		shared_region,
1023	unsigned int			mappings_count,
1024	struct shared_file_mapping_np	*mappings,
1025	memory_object_control_t		file_control,
1026	memory_object_size_t		file_size,
1027	void				*root_dir,
1028	uint32_t			slide,
1029	user_addr_t			slide_start,
1030	user_addr_t			slide_size)
1031{
1032	kern_return_t		kr;
1033	vm_object_t		file_object;
1034	ipc_port_t		sr_handle;
1035	vm_named_entry_t	sr_mem_entry;
1036	vm_map_t		sr_map;
1037	mach_vm_offset_t	sr_base_address;
1038	unsigned int		i;
1039	mach_port_t		map_port;
1040	vm_map_offset_t		target_address;
1041	vm_object_t		object;
1042	vm_object_size_t	obj_size;
1043	struct shared_file_mapping_np	*mapping_to_slide = NULL;
1044	mach_vm_offset_t	first_mapping = (mach_vm_offset_t) -1;
1045
1046
1047	kr = KERN_SUCCESS;
1048
1049	vm_shared_region_lock();
1050	assert(shared_region->sr_ref_count > 1);
1051
1052	if (shared_region->sr_root_dir != root_dir) {
1053		/*
1054		 * This shared region doesn't match the current root
1055		 * directory of this process.  Deny the mapping to
1056		 * avoid tainting the shared region with something that
1057		 * doesn't quite belong into it.
1058		 */
1059		vm_shared_region_unlock();
1060		kr = KERN_PROTECTION_FAILURE;
1061		goto done;
1062	}
1063
1064	/*
1065	 * Make sure we handle only one mapping at a time in a given
1066	 * shared region, to avoid race conditions.  This should not
1067	 * happen frequently...
1068	 */
1069	while (shared_region->sr_mapping_in_progress) {
1070		/* wait for our turn... */
1071		vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1072				       THREAD_UNINT);
1073	}
1074	assert(! shared_region->sr_mapping_in_progress);
1075	assert(shared_region->sr_ref_count > 1);
1076	/* let others know we're working in this shared region */
1077	shared_region->sr_mapping_in_progress = TRUE;
1078
1079	vm_shared_region_unlock();
1080
1081	/* no need to lock because this data is never modified... */
1082	sr_handle = shared_region->sr_mem_entry;
1083	sr_mem_entry = (vm_named_entry_t) sr_handle->ip_kobject;
1084	sr_map = sr_mem_entry->backing.map;
1085	sr_base_address = shared_region->sr_base_address;
1086
1087	SHARED_REGION_TRACE_DEBUG(
1088		("shared_region: -> map(%p,%d,%p,%p,0x%llx)\n",
1089		 (void *)VM_KERNEL_ADDRPERM(shared_region), mappings_count,
1090		 (void *)VM_KERNEL_ADDRPERM(mappings),
1091		 (void *)VM_KERNEL_ADDRPERM(file_control), file_size));
1092
1093	/* get the VM object associated with the file to be mapped */
1094	file_object = memory_object_control_to_vm_object(file_control);
1095
1096	/* establish the mappings */
1097	for (i = 0; i < mappings_count; i++) {
1098		SHARED_REGION_TRACE_INFO(
1099			("shared_region: mapping[%d]: "
1100			 "address:0x%016llx size:0x%016llx offset:0x%016llx "
1101			 "maxprot:0x%x prot:0x%x\n",
1102			 i,
1103			 (long long)mappings[i].sfm_address,
1104			 (long long)mappings[i].sfm_size,
1105			 (long long)mappings[i].sfm_file_offset,
1106			 mappings[i].sfm_max_prot,
1107			 mappings[i].sfm_init_prot));
1108
1109		if (mappings[i].sfm_init_prot & VM_PROT_ZF) {
1110			/* zero-filled memory */
1111			map_port = MACH_PORT_NULL;
1112		} else {
1113			/* file-backed memory */
1114			map_port = (ipc_port_t) file_object->pager;
1115		}
1116
1117		if (mappings[i].sfm_init_prot & VM_PROT_SLIDE) {
1118			/*
1119			 * This is the mapping that needs to be slid.
1120			 */
1121			if (mapping_to_slide != NULL) {
1122				SHARED_REGION_TRACE_INFO(
1123					("shared_region: mapping[%d]: "
1124					 "address:0x%016llx size:0x%016llx "
1125					 "offset:0x%016llx "
1126					 "maxprot:0x%x prot:0x%x "
1127					 "will not be slid as only one such mapping is allowed...\n",
1128					 i,
1129					 (long long)mappings[i].sfm_address,
1130					 (long long)mappings[i].sfm_size,
1131					 (long long)mappings[i].sfm_file_offset,
1132					 mappings[i].sfm_max_prot,
1133					 mappings[i].sfm_init_prot));
1134			} else {
1135				mapping_to_slide = &mappings[i];
1136			}
1137		}
1138
1139		/* mapping's address is relative to the shared region base */
1140		target_address =
1141			mappings[i].sfm_address - sr_base_address;
1142
1143		/* establish that mapping, OK if it's "already" there */
1144		if (map_port == MACH_PORT_NULL) {
1145			/*
1146			 * We want to map some anonymous memory in a
1147			 * shared region.
1148			 * We have to create the VM object now, so that it
1149			 * can be mapped "copy-on-write".
1150			 */
1151			obj_size = vm_map_round_page(mappings[i].sfm_size,
1152						     VM_MAP_PAGE_MASK(sr_map));
1153			object = vm_object_allocate(obj_size);
1154			if (object == VM_OBJECT_NULL) {
1155				kr = KERN_RESOURCE_SHORTAGE;
1156			} else {
1157				kr = vm_map_enter(
1158					sr_map,
1159					&target_address,
1160					vm_map_round_page(mappings[i].sfm_size,
1161							  VM_MAP_PAGE_MASK(sr_map)),
1162					0,
1163					VM_FLAGS_FIXED | VM_FLAGS_ALREADY,
1164					object,
1165					0,
1166					TRUE,
1167					mappings[i].sfm_init_prot & VM_PROT_ALL,
1168					mappings[i].sfm_max_prot & VM_PROT_ALL,
1169					VM_INHERIT_DEFAULT);
1170			}
1171		} else {
1172			object = VM_OBJECT_NULL; /* no anonymous memory here */
1173			kr = vm_map_enter_mem_object(
1174				sr_map,
1175				&target_address,
1176				vm_map_round_page(mappings[i].sfm_size,
1177						  VM_MAP_PAGE_MASK(sr_map)),
1178				0,
1179				VM_FLAGS_FIXED | VM_FLAGS_ALREADY,
1180				map_port,
1181				mappings[i].sfm_file_offset,
1182				TRUE,
1183				mappings[i].sfm_init_prot & VM_PROT_ALL,
1184				mappings[i].sfm_max_prot & VM_PROT_ALL,
1185				VM_INHERIT_DEFAULT);
1186		}
1187
1188		if (kr == KERN_SUCCESS) {
1189			/*
1190			 * Record the first (chronologically) successful
1191			 * mapping in this shared region.
1192			 * We're protected by "sr_mapping_in_progress" here,
1193			 * so no need to lock "shared_region".
1194			 */
1195			if (first_mapping == (mach_vm_offset_t) -1) {
1196				first_mapping = target_address;
1197			}
1198		} else {
1199			if (map_port == MACH_PORT_NULL) {
1200				/*
1201				 * Get rid of the VM object we just created
1202				 * but failed to map.
1203				 */
1204				vm_object_deallocate(object);
1205				object = VM_OBJECT_NULL;
1206			}
1207			if (kr == KERN_MEMORY_PRESENT) {
1208				/*
1209				 * This exact mapping was already there:
1210				 * that's fine.
1211				 */
1212				SHARED_REGION_TRACE_INFO(
1213					("shared_region: mapping[%d]: "
1214					 "address:0x%016llx size:0x%016llx "
1215					 "offset:0x%016llx "
1216					 "maxprot:0x%x prot:0x%x "
1217					 "already mapped...\n",
1218					 i,
1219					 (long long)mappings[i].sfm_address,
1220					 (long long)mappings[i].sfm_size,
1221					 (long long)mappings[i].sfm_file_offset,
1222					 mappings[i].sfm_max_prot,
1223					 mappings[i].sfm_init_prot));
1224				/*
1225				 * We didn't establish this mapping ourselves;
1226				 * let's reset its size, so that we do not
1227				 * attempt to undo it if an error occurs later.
1228				 */
1229				mappings[i].sfm_size = 0;
1230				kr = KERN_SUCCESS;
1231			} else {
1232				/* this mapping failed ! */
1233				SHARED_REGION_TRACE_ERROR(
1234					("shared_region: mapping[%d]: "
1235					 "address:0x%016llx size:0x%016llx "
1236					 "offset:0x%016llx "
1237					 "maxprot:0x%x prot:0x%x failed 0x%x\n",
1238					 i,
1239					 (long long)mappings[i].sfm_address,
1240					 (long long)mappings[i].sfm_size,
1241					 (long long)mappings[i].sfm_file_offset,
1242					 mappings[i].sfm_max_prot,
1243					 mappings[i].sfm_init_prot,
1244					 kr));
1245
1246				vm_shared_region_undo_mappings(sr_map, sr_base_address, mappings, i);
1247				break;
1248			}
1249
1250		}
1251
1252	}
1253
1254	if (kr == KERN_SUCCESS &&
1255	    slide &&
1256	    mapping_to_slide != NULL) {
1257		kr = vm_shared_region_slide(slide,
1258					    mapping_to_slide->sfm_file_offset,
1259					    mapping_to_slide->sfm_size,
1260					    slide_start,
1261					    slide_size,
1262					    file_control);
1263		if (kr  != KERN_SUCCESS) {
1264			SHARED_REGION_TRACE_ERROR(
1265				("shared_region: region_slide("
1266				 "slide:0x%x start:0x%016llx "
1267				 "size:0x%016llx) failed 0x%x\n",
1268				 slide,
1269				 (long long)slide_start,
1270				 (long long)slide_size,
1271				 kr));
1272			vm_shared_region_undo_mappings(sr_map,
1273						       sr_base_address,
1274						       mappings,
1275						       mappings_count);
1276		}
1277	}
1278
1279	vm_shared_region_lock();
1280	assert(shared_region->sr_ref_count > 1);
1281	assert(shared_region->sr_mapping_in_progress);
1282	/* set "sr_first_mapping"; dyld uses it to validate the shared cache */
1283	if (kr == KERN_SUCCESS &&
1284	    shared_region->sr_first_mapping == (mach_vm_offset_t) -1) {
1285		shared_region->sr_first_mapping = first_mapping;
1286	}
1287	/* we're done working on that shared region */
1288	shared_region->sr_mapping_in_progress = FALSE;
1289	thread_wakeup((event_t) &shared_region->sr_mapping_in_progress);
1290	vm_shared_region_unlock();
1291
1292done:
1293	SHARED_REGION_TRACE_DEBUG(
1294		("shared_region: map(%p,%d,%p,%p,0x%llx) <- 0x%x \n",
1295		 (void *)VM_KERNEL_ADDRPERM(shared_region), mappings_count,
1296		 (void *)VM_KERNEL_ADDRPERM(mappings),
1297		 (void *)VM_KERNEL_ADDRPERM(file_control), file_size, kr));
1298	return kr;
1299}
1300
1301/*
1302 * Enter the appropriate shared region into "map" for "task".
1303 * This involves looking up the shared region (and possibly creating a new
1304 * one) for the desired environment, then mapping the VM sub map into the
1305 * task's VM "map", with the appropriate level of pmap-nesting.
1306 */
1307kern_return_t
1308vm_shared_region_enter(
1309	struct _vm_map		*map,
1310	struct task		*task,
1311	void			*fsroot,
1312	cpu_type_t		cpu)
1313{
1314	kern_return_t		kr;
1315	vm_shared_region_t	shared_region;
1316	vm_map_offset_t		sr_address, sr_offset, target_address;
1317	vm_map_size_t		sr_size, mapping_size;
1318	vm_map_offset_t		sr_pmap_nesting_start;
1319	vm_map_size_t		sr_pmap_nesting_size;
1320	ipc_port_t		sr_handle;
1321	boolean_t		is_64bit;
1322
1323	is_64bit = task_has_64BitAddr(task);
1324
1325	SHARED_REGION_TRACE_DEBUG(
1326		("shared_region: -> "
1327		 "enter(map=%p,task=%p,root=%p,cpu=%d,64bit=%d)\n",
1328		 (void *)VM_KERNEL_ADDRPERM(map),
1329		 (void *)VM_KERNEL_ADDRPERM(task),
1330		 (void *)VM_KERNEL_ADDRPERM(fsroot), cpu, is_64bit));
1331
1332	/* lookup (create if needed) the shared region for this environment */
1333	shared_region = vm_shared_region_lookup(fsroot, cpu, is_64bit);
1334	if (shared_region == NULL) {
1335		/* this should not happen ! */
1336		SHARED_REGION_TRACE_ERROR(
1337			("shared_region: -> "
1338			 "enter(map=%p,task=%p,root=%p,cpu=%d,64bit=%d): "
1339			 "lookup failed !\n",
1340			 (void *)VM_KERNEL_ADDRPERM(map),
1341			 (void *)VM_KERNEL_ADDRPERM(task),
1342			 (void *)VM_KERNEL_ADDRPERM(fsroot), cpu, is_64bit));
1343		//panic("shared_region_enter: lookup failed\n");
1344		return KERN_FAILURE;
1345	}
1346
1347	/* let the task use that shared region */
1348	vm_shared_region_set(task, shared_region);
1349
1350	kr = KERN_SUCCESS;
1351	/* no need to lock since this data is never modified */
1352	sr_address = shared_region->sr_base_address;
1353	sr_size = shared_region->sr_size;
1354	sr_handle = shared_region->sr_mem_entry;
1355	sr_pmap_nesting_start = shared_region->sr_pmap_nesting_start;
1356	sr_pmap_nesting_size = shared_region->sr_pmap_nesting_size;
1357
1358	/*
1359	 * Start mapping the shared region's VM sub map into the task's VM map.
1360	 */
1361	sr_offset = 0;
1362
1363	if (sr_pmap_nesting_start > sr_address) {
1364		/* we need to map a range without pmap-nesting first */
1365		target_address = sr_address;
1366		mapping_size = sr_pmap_nesting_start - sr_address;
1367		kr = vm_map_enter_mem_object(
1368			map,
1369			&target_address,
1370			mapping_size,
1371			0,
1372			VM_FLAGS_FIXED,
1373			sr_handle,
1374			sr_offset,
1375			TRUE,
1376			VM_PROT_READ,
1377			VM_PROT_ALL,
1378			VM_INHERIT_SHARE);
1379		if (kr != KERN_SUCCESS) {
1380			SHARED_REGION_TRACE_ERROR(
1381				("shared_region: enter(%p,%p,%p,%d,%d): "
1382				 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
1383				 (void *)VM_KERNEL_ADDRPERM(map),
1384				 (void *)VM_KERNEL_ADDRPERM(task),
1385				 (void *)VM_KERNEL_ADDRPERM(fsroot),
1386				 cpu, is_64bit,
1387				 (long long)target_address,
1388				 (long long)mapping_size,
1389				 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
1390			goto done;
1391		}
1392		SHARED_REGION_TRACE_DEBUG(
1393			("shared_region: enter(%p,%p,%p,%d,%d): "
1394			 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
1395			 (void *)VM_KERNEL_ADDRPERM(map),
1396			 (void *)VM_KERNEL_ADDRPERM(task),
1397			 (void *)VM_KERNEL_ADDRPERM(fsroot), cpu, is_64bit,
1398			 (long long)target_address, (long long)mapping_size,
1399			 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
1400		sr_offset += mapping_size;
1401		sr_size -= mapping_size;
1402	}
1403	/*
1404	 * We may need to map several pmap-nested portions, due to platform
1405	 * specific restrictions on pmap nesting.
1406	 * The pmap-nesting is triggered by the "VM_MEMORY_SHARED_PMAP" alias...
1407	 */
1408	for (;
1409	     sr_pmap_nesting_size > 0;
1410	     sr_offset += mapping_size,
1411		     sr_size -= mapping_size,
1412		     sr_pmap_nesting_size -= mapping_size) {
1413		target_address = sr_address + sr_offset;
1414		mapping_size = sr_pmap_nesting_size;
1415		if (mapping_size > pmap_nesting_size_max) {
1416			mapping_size = (vm_map_offset_t) pmap_nesting_size_max;
1417		}
1418		kr = vm_map_enter_mem_object(
1419			map,
1420			&target_address,
1421			mapping_size,
1422			0,
1423			(VM_FLAGS_FIXED | VM_MAKE_TAG(VM_MEMORY_SHARED_PMAP)),
1424			sr_handle,
1425			sr_offset,
1426			TRUE,
1427			VM_PROT_READ,
1428			VM_PROT_ALL,
1429			VM_INHERIT_SHARE);
1430		if (kr != KERN_SUCCESS) {
1431			SHARED_REGION_TRACE_ERROR(
1432				("shared_region: enter(%p,%p,%p,%d,%d): "
1433				 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
1434				 (void *)VM_KERNEL_ADDRPERM(map),
1435				 (void *)VM_KERNEL_ADDRPERM(task),
1436				 (void *)VM_KERNEL_ADDRPERM(fsroot),
1437				 cpu, is_64bit,
1438				 (long long)target_address,
1439				 (long long)mapping_size,
1440				 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
1441			goto done;
1442		}
1443		SHARED_REGION_TRACE_DEBUG(
1444			("shared_region: enter(%p,%p,%p,%d,%d): "
1445			 "nested vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
1446			 (void *)VM_KERNEL_ADDRPERM(map),
1447			 (void *)VM_KERNEL_ADDRPERM(task),
1448			 (void *)VM_KERNEL_ADDRPERM(fsroot), cpu, is_64bit,
1449			 (long long)target_address, (long long)mapping_size,
1450			 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
1451	}
1452	if (sr_size > 0) {
1453		/* and there's some left to be mapped without pmap-nesting */
1454		target_address = sr_address + sr_offset;
1455		mapping_size = sr_size;
1456		kr = vm_map_enter_mem_object(
1457			map,
1458			&target_address,
1459			mapping_size,
1460			0,
1461			VM_FLAGS_FIXED,
1462			sr_handle,
1463			sr_offset,
1464			TRUE,
1465			VM_PROT_READ,
1466			VM_PROT_ALL,
1467			VM_INHERIT_SHARE);
1468		if (kr != KERN_SUCCESS) {
1469			SHARED_REGION_TRACE_ERROR(
1470				("shared_region: enter(%p,%p,%p,%d,%d): "
1471				 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
1472				 (void *)VM_KERNEL_ADDRPERM(map),
1473				 (void *)VM_KERNEL_ADDRPERM(task),
1474				 (void *)VM_KERNEL_ADDRPERM(fsroot),
1475				 cpu, is_64bit,
1476				 (long long)target_address,
1477				 (long long)mapping_size,
1478				 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
1479			goto done;
1480		}
1481		SHARED_REGION_TRACE_DEBUG(
1482			("shared_region: enter(%p,%p,%p,%d,%d): "
1483			 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
1484			 (void *)VM_KERNEL_ADDRPERM(map),
1485			 (void *)VM_KERNEL_ADDRPERM(task),
1486			 (void *)VM_KERNEL_ADDRPERM(fsroot), cpu, is_64bit,
1487			 (long long)target_address, (long long)mapping_size,
1488			 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
1489		sr_offset += mapping_size;
1490		sr_size -= mapping_size;
1491	}
1492	assert(sr_size == 0);
1493
1494done:
1495	SHARED_REGION_TRACE_DEBUG(
1496		("shared_region: enter(%p,%p,%p,%d,%d) <- 0x%x\n",
1497		 (void *)VM_KERNEL_ADDRPERM(map),
1498		 (void *)VM_KERNEL_ADDRPERM(task),
1499		 (void *)VM_KERNEL_ADDRPERM(fsroot), cpu, is_64bit, kr));
1500	return kr;
1501}
1502
1503#define SANE_SLIDE_INFO_SIZE		(2048*1024) /*Can be changed if needed*/
1504struct vm_shared_region_slide_info	slide_info;
1505
1506kern_return_t
1507vm_shared_region_sliding_valid(uint32_t slide)
1508{
1509	kern_return_t kr = KERN_SUCCESS;
1510	vm_shared_region_t sr = vm_shared_region_get(current_task());
1511
1512	/* No region yet? we're fine. */
1513	if (sr == NULL) {
1514		return kr;
1515	}
1516
1517	if ((sr->sr_slid == TRUE) && slide) {
1518	        if (slide != vm_shared_region_get_slide_info(sr)->slide) {
1519			printf("Only one shared region can be slid\n");
1520			kr = KERN_FAILURE;
1521		} else {
1522			/*
1523			 * Request for sliding when we've
1524			 * already done it with exactly the
1525			 * same slide value before.
1526			 * This isn't wrong technically but
1527			 * we don't want to slide again and
1528			 * so we return this value.
1529			 */
1530			kr = KERN_INVALID_ARGUMENT;
1531		}
1532	}
1533	vm_shared_region_deallocate(sr);
1534	return kr;
1535}
1536
1537kern_return_t
1538vm_shared_region_slide_init(
1539		vm_shared_region_t sr,
1540		mach_vm_size_t	slide_info_size,
1541		mach_vm_offset_t start,
1542		mach_vm_size_t size,
1543		uint32_t slide,
1544		memory_object_control_t	sr_file_control)
1545{
1546	kern_return_t kr = KERN_SUCCESS;
1547	vm_object_t object = VM_OBJECT_NULL;
1548	vm_object_offset_t offset = 0;
1549	vm_shared_region_slide_info_t si = vm_shared_region_get_slide_info(sr);
1550	vm_offset_t slide_info_entry;
1551
1552	vm_map_t map = NULL, cur_map = NULL;
1553	boolean_t	is_map_locked = FALSE;
1554
1555	assert(sr->sr_slide_in_progress);
1556	assert(!sr->sr_slid);
1557	assert(si->slide_object == NULL);
1558	assert(si->slide_info_entry == NULL);
1559
1560	if (slide_info_size > SANE_SLIDE_INFO_SIZE) {
1561		printf("Slide_info_size too large: %lx\n", (uintptr_t)slide_info_size);
1562		kr = KERN_FAILURE;
1563		return kr;
1564	}
1565
1566	kr = kmem_alloc(kernel_map,
1567			(vm_offset_t *) &slide_info_entry,
1568			(vm_size_t) slide_info_size);
1569	if (kr != KERN_SUCCESS) {
1570		return kr;
1571	}
1572
1573	if (sr_file_control != MEMORY_OBJECT_CONTROL_NULL) {
1574
1575		object = memory_object_control_to_vm_object(sr_file_control);
1576		vm_object_reference(object);
1577		offset = start;
1578
1579		vm_object_lock(object);
1580	} else {
1581		/*
1582		 * Remove this entire "else" block and all "map" references
1583		 * once we get rid of the shared_region_slide_np()
1584		 * system call.
1585		 */
1586		vm_map_entry_t entry = VM_MAP_ENTRY_NULL;
1587		map = current_map();
1588		vm_map_lock_read(map);
1589		is_map_locked = TRUE;
1590	Retry:
1591		cur_map = map;
1592		if(!vm_map_lookup_entry(map, start, &entry)) {
1593			kr = KERN_INVALID_ARGUMENT;
1594		} else {
1595			vm_object_t shadow_obj = VM_OBJECT_NULL;
1596
1597			if (entry->is_sub_map == TRUE) {
1598				map = entry->object.sub_map;
1599				start -= entry->vme_start;
1600				start += entry->offset;
1601				vm_map_lock_read(map);
1602				vm_map_unlock_read(cur_map);
1603				goto Retry;
1604			} else {
1605				object = entry->object.vm_object;
1606				offset = (start - entry->vme_start) + entry->offset;
1607			}
1608
1609			vm_object_lock(object);
1610			while (object->shadow != VM_OBJECT_NULL) {
1611				shadow_obj = object->shadow;
1612				vm_object_lock(shadow_obj);
1613				vm_object_unlock(object);
1614				object = shadow_obj;
1615			}
1616		}
1617	}
1618
1619	if (object->internal == TRUE) {
1620		kr = KERN_INVALID_ADDRESS;
1621	} else if (object->object_slid) {
1622		/* Can only be slid once */
1623		printf("%s: found vm_object %p already slid?\n", __FUNCTION__, object);
1624		kr = KERN_FAILURE;
1625	} else {
1626
1627		si->slide_info_entry = (vm_shared_region_slide_info_entry_t)slide_info_entry;
1628		si->slide_info_size = slide_info_size;
1629		si->slide_object = object;
1630		si->start = offset;
1631		si->end = si->start + size;
1632		si->slide = slide;
1633
1634		/*
1635		 * If we want to have this region get deallocated/freed
1636		 * then we will have to make sure that we msync(..MS_INVALIDATE..)
1637		 * the pages associated with this shared region. Those pages would
1638		 * have been slid with an older slide value.
1639		 */
1640
1641		/*
1642		 * Pointers in object are held without references; they
1643		 * are disconnected at the time that we destroy the
1644		 * shared region, and since the shared region holds
1645		 * a reference on the object, no references in the other
1646		 * direction are required.
1647		 */
1648		object->object_slid = TRUE;
1649		object->vo_slide_info = si;
1650	}
1651
1652	vm_object_unlock(object);
1653	if (is_map_locked == TRUE) {
1654		vm_map_unlock_read(map);
1655	}
1656
1657	if (kr != KERN_SUCCESS) {
1658		kmem_free(kernel_map, slide_info_entry, slide_info_size);
1659	}
1660	return kr;
1661}
1662
1663void*
1664vm_shared_region_get_slide_info_entry(vm_shared_region_t sr) {
1665	return (void*)sr->sr_slide_info.slide_info_entry;
1666}
1667
1668
1669kern_return_t
1670vm_shared_region_slide_sanity_check(vm_shared_region_t sr)
1671{
1672	uint32_t pageIndex=0;
1673	uint16_t entryIndex=0;
1674	uint16_t *toc = NULL;
1675	vm_shared_region_slide_info_t si;
1676	vm_shared_region_slide_info_entry_t s_info;
1677	kern_return_t kr;
1678
1679	si = vm_shared_region_get_slide_info(sr);
1680	s_info = si->slide_info_entry;
1681	toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset);
1682
1683	kr = mach_vm_protect(kernel_map,
1684			     (mach_vm_offset_t)(vm_offset_t)s_info,
1685			     (mach_vm_size_t) si->slide_info_size,
1686			     TRUE, VM_PROT_READ);
1687	if (kr != KERN_SUCCESS) {
1688		panic("vm_shared_region_slide_sanity_check: vm_protect() error 0x%x\n", kr);
1689	}
1690
1691	for (;pageIndex < s_info->toc_count; pageIndex++) {
1692
1693		entryIndex =  (uint16_t)(toc[pageIndex]);
1694
1695		if (entryIndex >= s_info->entry_count) {
1696			printf("No sliding bitmap entry for pageIndex: %d at entryIndex: %d amongst %d entries\n", pageIndex, entryIndex, s_info->entry_count);
1697			goto fail;
1698		}
1699
1700	}
1701	return KERN_SUCCESS;
1702fail:
1703	if (si->slide_info_entry != NULL) {
1704		kmem_free(kernel_map,
1705			  (vm_offset_t) si->slide_info_entry,
1706			  (vm_size_t) si->slide_info_size);
1707
1708		vm_object_lock(si->slide_object);
1709		si->slide_object->object_slid = FALSE;
1710		si->slide_object->vo_slide_info = NULL;
1711		vm_object_unlock(si->slide_object);
1712
1713		vm_object_deallocate(si->slide_object);
1714	        si->slide_object	= NULL;
1715		si->start = 0;
1716		si->end = 0;
1717		si->slide = 0;
1718		si->slide_info_entry = NULL;
1719		si->slide_info_size = 0;
1720	}
1721	return KERN_FAILURE;
1722}
1723
1724kern_return_t
1725vm_shared_region_slide_page(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
1726{
1727	uint16_t *toc = NULL;
1728	slide_info_entry_toc_t bitmap = NULL;
1729	uint32_t i=0, j=0;
1730	uint8_t b = 0;
1731	uint32_t slide = si->slide;
1732	int is_64 = task_has_64BitAddr(current_task());
1733
1734	vm_shared_region_slide_info_entry_t s_info = si->slide_info_entry;
1735	toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset);
1736
1737	if (pageIndex >= s_info->toc_count) {
1738		printf("No slide entry for this page in toc. PageIndex: %d Toc Count: %d\n", pageIndex, s_info->toc_count);
1739	} else {
1740		uint16_t entryIndex =  (uint16_t)(toc[pageIndex]);
1741		slide_info_entry_toc_t slide_info_entries = (slide_info_entry_toc_t)((uintptr_t)s_info + s_info->entry_offset);
1742
1743		if (entryIndex >= s_info->entry_count) {
1744			printf("No sliding bitmap entry for entryIndex: %d amongst %d entries\n", entryIndex, s_info->entry_count);
1745		} else {
1746			bitmap = &slide_info_entries[entryIndex];
1747
1748			for(i=0; i < NUM_SLIDING_BITMAPS_PER_PAGE; ++i) {
1749				b = bitmap->entry[i];
1750				if (b!=0) {
1751					for (j=0; j <8; ++j) {
1752						if (b & (1 <<j)){
1753							uint32_t *ptr_to_slide;
1754							uint32_t old_value;
1755
1756							ptr_to_slide = (uint32_t*)((uintptr_t)(vaddr)+(sizeof(uint32_t)*(i*8 +j)));
1757							old_value = *ptr_to_slide;
1758							*ptr_to_slide += slide;
1759							if (is_64 && *ptr_to_slide < old_value) {
1760								/*
1761								 * We just slid the low 32 bits of a 64-bit pointer
1762								 * and it looks like there should have been a carry-over
1763								 * to the upper 32 bits.
1764								 * The sliding failed...
1765								 */
1766								printf("vm_shared_region_slide() carry over: i=%d j=%d b=0x%x slide=0x%x old=0x%x new=0x%x\n",
1767								       i, j, b, slide, old_value, *ptr_to_slide);
1768								return KERN_FAILURE;
1769							}
1770						}
1771					}
1772				}
1773			}
1774		}
1775	}
1776
1777	return KERN_SUCCESS;
1778}
1779
1780/******************************************************************************/
1781/* Comm page support                                                          */
1782/******************************************************************************/
1783
1784ipc_port_t commpage32_handle = IPC_PORT_NULL;
1785ipc_port_t commpage64_handle = IPC_PORT_NULL;
1786vm_named_entry_t commpage32_entry = NULL;
1787vm_named_entry_t commpage64_entry = NULL;
1788vm_map_t commpage32_map = VM_MAP_NULL;
1789vm_map_t commpage64_map = VM_MAP_NULL;
1790
1791ipc_port_t commpage_text32_handle = IPC_PORT_NULL;
1792ipc_port_t commpage_text64_handle = IPC_PORT_NULL;
1793vm_named_entry_t commpage_text32_entry = NULL;
1794vm_named_entry_t commpage_text64_entry = NULL;
1795vm_map_t commpage_text32_map = VM_MAP_NULL;
1796vm_map_t commpage_text64_map = VM_MAP_NULL;
1797
1798user32_addr_t commpage_text32_location = (user32_addr_t) _COMM_PAGE32_TEXT_START;
1799user64_addr_t commpage_text64_location = (user64_addr_t) _COMM_PAGE64_TEXT_START;
1800
1801#if defined(__i386__) || defined(__x86_64__)
1802/*
1803 * Create a memory entry, VM submap and pmap for one commpage.
1804 */
1805static void
1806_vm_commpage_init(
1807	ipc_port_t	*handlep,
1808	vm_map_size_t	size)
1809{
1810	kern_return_t		kr;
1811	vm_named_entry_t	mem_entry;
1812	vm_map_t		new_map;
1813
1814	SHARED_REGION_TRACE_DEBUG(
1815		("commpage: -> _init(0x%llx)\n",
1816		 (long long)size));
1817
1818	kr = mach_memory_entry_allocate(&mem_entry,
1819					handlep);
1820	if (kr != KERN_SUCCESS) {
1821		panic("_vm_commpage_init: could not allocate mem_entry");
1822	}
1823	new_map = vm_map_create(pmap_create(NULL, 0, FALSE), 0, size, TRUE);
1824	if (new_map == VM_MAP_NULL) {
1825		panic("_vm_commpage_init: could not allocate VM map");
1826	}
1827	mem_entry->backing.map = new_map;
1828	mem_entry->internal = TRUE;
1829	mem_entry->is_sub_map = TRUE;
1830	mem_entry->offset = 0;
1831	mem_entry->protection = VM_PROT_ALL;
1832	mem_entry->size = size;
1833
1834	SHARED_REGION_TRACE_DEBUG(
1835		("commpage: _init(0x%llx) <- %p\n",
1836		 (long long)size, (void *)VM_KERNEL_ADDRPERM(*handlep)));
1837}
1838#endif
1839
1840
1841/*
1842 *Initialize the comm text pages at boot time
1843 */
1844 extern u_int32_t random(void);
1845 void
1846vm_commpage_text_init(void)
1847{
1848	SHARED_REGION_TRACE_DEBUG(
1849		("commpage text: ->init()\n"));
1850#if defined(__i386__) || defined(__x86_64__)
1851	/* create the 32 bit comm text page */
1852	unsigned int offset = (random() % _PFZ32_SLIDE_RANGE) << PAGE_SHIFT; /* restricting to 32bMAX-2PAGE */
1853	_vm_commpage_init(&commpage_text32_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
1854	commpage_text32_entry = (vm_named_entry_t) commpage_text32_handle->ip_kobject;
1855	commpage_text32_map = commpage_text32_entry->backing.map;
1856	commpage_text32_location = (user32_addr_t) (_COMM_PAGE32_TEXT_START + offset);
1857	/* XXX if (cpu_is_64bit_capable()) ? */
1858        /* create the 64-bit comm page */
1859	offset = (random() % _PFZ64_SLIDE_RANGE) << PAGE_SHIFT; /* restricting sliding upto 2Mb range */
1860        _vm_commpage_init(&commpage_text64_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
1861        commpage_text64_entry = (vm_named_entry_t) commpage_text64_handle->ip_kobject;
1862        commpage_text64_map = commpage_text64_entry->backing.map;
1863	commpage_text64_location = (user64_addr_t) (_COMM_PAGE64_TEXT_START + offset);
1864
1865	commpage_text_populate();
1866#else
1867#error Unknown architecture.
1868#endif /* __i386__ || __x86_64__ */
1869	/* populate the routines in here */
1870	SHARED_REGION_TRACE_DEBUG(
1871                ("commpage text: init() <-\n"));
1872
1873}
1874
1875/*
1876 * Initialize the comm pages at boot time.
1877 */
1878void
1879vm_commpage_init(void)
1880{
1881	SHARED_REGION_TRACE_DEBUG(
1882		("commpage: -> init()\n"));
1883
1884#if defined(__i386__) || defined(__x86_64__)
1885	/* create the 32-bit comm page */
1886	_vm_commpage_init(&commpage32_handle, _COMM_PAGE32_AREA_LENGTH);
1887	commpage32_entry = (vm_named_entry_t) commpage32_handle->ip_kobject;
1888	commpage32_map = commpage32_entry->backing.map;
1889
1890	/* XXX if (cpu_is_64bit_capable()) ? */
1891	/* create the 64-bit comm page */
1892	_vm_commpage_init(&commpage64_handle, _COMM_PAGE64_AREA_LENGTH);
1893	commpage64_entry = (vm_named_entry_t) commpage64_handle->ip_kobject;
1894	commpage64_map = commpage64_entry->backing.map;
1895
1896#endif /* __i386__ || __x86_64__ */
1897
1898	/* populate them according to this specific platform */
1899	commpage_populate();
1900	__commpage_setup = 1;
1901#if defined(__i386__) || defined(__x86_64__)
1902	if (__system_power_source == 0) {
1903		post_sys_powersource_internal(0, 1);
1904	}
1905#endif /* __i386__ || __x86_64__ */
1906
1907	SHARED_REGION_TRACE_DEBUG(
1908		("commpage: init() <-\n"));
1909}
1910
1911/*
1912 * Enter the appropriate comm page into the task's address space.
1913 * This is called at exec() time via vm_map_exec().
1914 */
1915kern_return_t
1916vm_commpage_enter(
1917	vm_map_t	map,
1918	task_t		task)
1919{
1920	ipc_port_t		commpage_handle, commpage_text_handle;
1921	vm_map_offset_t		commpage_address, objc_address, commpage_text_address;
1922	vm_map_size_t		commpage_size, objc_size, commpage_text_size;
1923	int			vm_flags;
1924	kern_return_t		kr;
1925
1926	SHARED_REGION_TRACE_DEBUG(
1927		("commpage: -> enter(%p,%p)\n",
1928		 (void *)VM_KERNEL_ADDRPERM(map),
1929		 (void *)VM_KERNEL_ADDRPERM(task)));
1930
1931	commpage_text_size = _COMM_PAGE_TEXT_AREA_LENGTH;
1932	/* the comm page is likely to be beyond the actual end of the VM map */
1933	vm_flags = VM_FLAGS_FIXED | VM_FLAGS_BEYOND_MAX;
1934
1935	/* select the appropriate comm page for this task */
1936	assert(! (task_has_64BitAddr(task) ^ vm_map_is_64bit(map)));
1937	if (task_has_64BitAddr(task)) {
1938		commpage_handle = commpage64_handle;
1939		commpage_address = (vm_map_offset_t) _COMM_PAGE64_BASE_ADDRESS;
1940		commpage_size = _COMM_PAGE64_AREA_LENGTH;
1941		objc_size = _COMM_PAGE64_OBJC_SIZE;
1942		objc_address = _COMM_PAGE64_OBJC_BASE;
1943		commpage_text_handle = commpage_text64_handle;
1944		commpage_text_address = (vm_map_offset_t) commpage_text64_location;
1945	} else {
1946		commpage_handle = commpage32_handle;
1947		commpage_address =
1948			(vm_map_offset_t)(unsigned) _COMM_PAGE32_BASE_ADDRESS;
1949		commpage_size = _COMM_PAGE32_AREA_LENGTH;
1950		objc_size = _COMM_PAGE32_OBJC_SIZE;
1951		objc_address = _COMM_PAGE32_OBJC_BASE;
1952		commpage_text_handle = commpage_text32_handle;
1953		commpage_text_address = (vm_map_offset_t) commpage_text32_location;
1954	}
1955
1956	if ((commpage_address & (pmap_nesting_size_min - 1)) == 0 &&
1957	    (commpage_size & (pmap_nesting_size_min - 1)) == 0) {
1958		/* the commpage is properly aligned or sized for pmap-nesting */
1959		vm_flags |= VM_MAKE_TAG(VM_MEMORY_SHARED_PMAP);
1960	}
1961	/* map the comm page in the task's address space */
1962	assert(commpage_handle != IPC_PORT_NULL);
1963	kr = vm_map_enter_mem_object(
1964		map,
1965		&commpage_address,
1966		commpage_size,
1967		0,
1968		vm_flags,
1969		commpage_handle,
1970		0,
1971		FALSE,
1972		VM_PROT_READ,
1973		VM_PROT_READ,
1974		VM_INHERIT_SHARE);
1975	if (kr != KERN_SUCCESS) {
1976		SHARED_REGION_TRACE_ERROR(
1977			("commpage: enter(%p,0x%llx,0x%llx) "
1978			 "commpage %p mapping failed 0x%x\n",
1979			 (void *)VM_KERNEL_ADDRPERM(map),
1980			 (long long)commpage_address,
1981			 (long long)commpage_size,
1982			 (void *)VM_KERNEL_ADDRPERM(commpage_handle), kr));
1983	}
1984
1985	/* map the comm text page in the task's address space */
1986	assert(commpage_text_handle != IPC_PORT_NULL);
1987	kr = vm_map_enter_mem_object(
1988		map,
1989		&commpage_text_address,
1990		commpage_text_size,
1991		0,
1992		vm_flags,
1993		commpage_text_handle,
1994		0,
1995		FALSE,
1996		VM_PROT_READ|VM_PROT_EXECUTE,
1997		VM_PROT_READ|VM_PROT_EXECUTE,
1998		VM_INHERIT_SHARE);
1999	if (kr != KERN_SUCCESS) {
2000		SHARED_REGION_TRACE_ERROR(
2001			("commpage text: enter(%p,0x%llx,0x%llx) "
2002			 "commpage text %p mapping failed 0x%x\n",
2003			 (void *)VM_KERNEL_ADDRPERM(map),
2004			 (long long)commpage_text_address,
2005			 (long long)commpage_text_size,
2006			 (void *)VM_KERNEL_ADDRPERM(commpage_text_handle), kr));
2007	}
2008
2009	/*
2010	 * Since we're here, we also pre-allocate some virtual space for the
2011	 * Objective-C run-time, if needed...
2012	 */
2013	if (objc_size != 0) {
2014		kr = vm_map_enter_mem_object(
2015			map,
2016			&objc_address,
2017			objc_size,
2018			0,
2019			VM_FLAGS_FIXED | VM_FLAGS_BEYOND_MAX,
2020			IPC_PORT_NULL,
2021			0,
2022			FALSE,
2023			VM_PROT_ALL,
2024			VM_PROT_ALL,
2025			VM_INHERIT_DEFAULT);
2026		if (kr != KERN_SUCCESS) {
2027			SHARED_REGION_TRACE_ERROR(
2028				("commpage: enter(%p,0x%llx,0x%llx) "
2029				 "objc mapping failed 0x%x\n",
2030				 (void *)VM_KERNEL_ADDRPERM(map),
2031				 (long long)objc_address,
2032				 (long long)objc_size, kr));
2033		}
2034	}
2035
2036	SHARED_REGION_TRACE_DEBUG(
2037		("commpage: enter(%p,%p) <- 0x%x\n",
2038		 (void *)VM_KERNEL_ADDRPERM(map),
2039		 (void *)VM_KERNEL_ADDRPERM(task), kr));
2040	return kr;
2041}
2042
2043int
2044vm_shared_region_slide(uint32_t slide,
2045			mach_vm_offset_t	entry_start_address,
2046			mach_vm_size_t		entry_size,
2047			mach_vm_offset_t	slide_start,
2048			mach_vm_size_t		slide_size,
2049			memory_object_control_t	sr_file_control)
2050{
2051	void *slide_info_entry = NULL;
2052	int			error;
2053	vm_shared_region_t	sr;
2054
2055	SHARED_REGION_TRACE_DEBUG(
2056		("vm_shared_region_slide: -> slide %#x, entry_start %#llx, entry_size %#llx, slide_start %#llx, slide_size %#llx\n",
2057		 slide, entry_start_address, entry_size, slide_start, slide_size));
2058
2059	sr = vm_shared_region_get(current_task());
2060	if (sr == NULL) {
2061		printf("%s: no shared region?\n", __FUNCTION__);
2062		SHARED_REGION_TRACE_DEBUG(
2063			("vm_shared_region_slide: <- %d (no shared region)\n",
2064			 KERN_FAILURE));
2065		return KERN_FAILURE;
2066	}
2067
2068	/*
2069	 * Protect from concurrent access.
2070	 */
2071	vm_shared_region_lock();
2072	while(sr->sr_slide_in_progress) {
2073		vm_shared_region_sleep(&sr->sr_slide_in_progress, THREAD_UNINT);
2074	}
2075	if (sr->sr_slid
2076			|| shared_region_completed_slide
2077			) {
2078		vm_shared_region_unlock();
2079
2080		vm_shared_region_deallocate(sr);
2081		printf("%s: shared region already slid?\n", __FUNCTION__);
2082		SHARED_REGION_TRACE_DEBUG(
2083			("vm_shared_region_slide: <- %d (already slid)\n",
2084			 KERN_FAILURE));
2085		return KERN_FAILURE;
2086	}
2087
2088	sr->sr_slide_in_progress = TRUE;
2089	vm_shared_region_unlock();
2090
2091	if((error = vm_shared_region_slide_init(sr, slide_size, entry_start_address, entry_size, slide, sr_file_control))) {
2092		printf("slide_info initialization failed with kr=%d\n", error);
2093		goto done;
2094	}
2095
2096	slide_info_entry = vm_shared_region_get_slide_info_entry(sr);
2097	if (slide_info_entry == NULL){
2098		error = KERN_FAILURE;
2099	} else {
2100		error = copyin((user_addr_t)slide_start,
2101			       slide_info_entry,
2102			       (vm_size_t)slide_size);
2103		if (error) {
2104			error = KERN_INVALID_ADDRESS;
2105		}
2106	}
2107	if (error) {
2108		goto done;
2109	}
2110
2111	if (vm_shared_region_slide_sanity_check(sr) != KERN_SUCCESS) {
2112 		error = KERN_INVALID_ARGUMENT;
2113 		printf("Sanity Check failed for slide_info\n");
2114 	} else {
2115#if DEBUG
2116		printf("Succesfully init slide_info with start_address: %p region_size: %ld slide_header_size: %ld\n",
2117 				(void*)(uintptr_t)entry_start_address,
2118 				(unsigned long)entry_size,
2119 				(unsigned long)slide_size);
2120#endif
2121	}
2122done:
2123	vm_shared_region_lock();
2124
2125	assert(sr->sr_slide_in_progress);
2126	assert(sr->sr_slid == FALSE);
2127	sr->sr_slide_in_progress = FALSE;
2128	thread_wakeup(&sr->sr_slide_in_progress);
2129
2130	if (error == KERN_SUCCESS) {
2131		sr->sr_slid = TRUE;
2132
2133		/*
2134		 * We don't know how to tear down a slid shared region today, because
2135		 * we would have to invalidate all the pages that have been slid
2136		 * atomically with respect to anyone mapping the shared region afresh.
2137		 * Therefore, take a dangling reference to prevent teardown.
2138		 */
2139		sr->sr_ref_count++;
2140		shared_region_completed_slide = TRUE;
2141	}
2142	vm_shared_region_unlock();
2143
2144	vm_shared_region_deallocate(sr);
2145
2146	SHARED_REGION_TRACE_DEBUG(
2147		("vm_shared_region_slide: <- %d\n",
2148		 error));
2149
2150	return error;
2151}
2152
2153/*
2154 * This is called from powermanagement code to let kernel know the current source of power.
2155 * 0 if it is external source (connected to power )
2156 * 1 if it is internal power source ie battery
2157 */
2158void
2159#if defined(__i386__) || defined(__x86_64__)
2160post_sys_powersource(int i)
2161#else
2162post_sys_powersource(__unused int i)
2163#endif
2164{
2165#if defined(__i386__) || defined(__x86_64__)
2166	post_sys_powersource_internal(i, 0);
2167#endif /* __i386__ || __x86_64__ */
2168}
2169
2170
2171#if defined(__i386__) || defined(__x86_64__)
2172static void
2173post_sys_powersource_internal(int i, int internal)
2174{
2175	if (internal == 0)
2176		__system_power_source = i;
2177
2178	if (__commpage_setup != 0) {
2179		if (__system_power_source != 0)
2180			commpage_set_spin_count(0);
2181		else
2182			commpage_set_spin_count(MP_SPIN_TRIES);
2183	}
2184}
2185#endif /* __i386__ || __x86_64__ */
2186
2187