1/*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved.  The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34/*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections.  This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40
41#include <meta_features.h>
42
43#include <kern/task.h>
44#include <kern/thread.h>
45#include <kern/debug.h>
46#include <kern/lock.h>
47#include <mach/mach_traps.h>
48#include <mach/port.h>
49#include <mach/task.h>
50#include <mach/task_access.h>
51#include <mach/task_special_ports.h>
52#include <mach/time_value.h>
53#include <mach/vm_map.h>
54#include <mach/vm_param.h>
55#include <mach/vm_prot.h>
56
57#include <sys/file_internal.h>
58#include <sys/param.h>
59#include <sys/systm.h>
60#include <sys/dir.h>
61#include <sys/namei.h>
62#include <sys/proc_internal.h>
63#include <sys/kauth.h>
64#include <sys/vm.h>
65#include <sys/file.h>
66#include <sys/vnode_internal.h>
67#include <sys/mount.h>
68#include <sys/trace.h>
69#include <sys/kernel.h>
70#include <sys/ubc_internal.h>
71#include <sys/user.h>
72#include <sys/syslog.h>
73#include <sys/stat.h>
74#include <sys/sysproto.h>
75#include <sys/mman.h>
76#include <sys/sysctl.h>
77
78#include <bsm/audit_kernel.h>
79#include <bsm/audit_kevents.h>
80
81#include <kern/kalloc.h>
82#include <vm/vm_map.h>
83#include <vm/vm_kern.h>
84
85#include <machine/spl.h>
86
87#include <mach/shared_region.h>
88#include <vm/vm_shared_region.h>
89
90#include <vm/vm_protos.h>
91
92/*
93 * Sysctl's related to data/stack execution.  See osfmk/vm/vm_map.c
94 */
95
96#ifndef SECURE_KERNEL
97extern int allow_stack_exec, allow_data_exec;
98
99SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW, &allow_stack_exec, 0, "");
100SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW, &allow_data_exec, 0, "");
101#endif /* !SECURE_KERNEL */
102
103#if CONFIG_NO_PRINTF_STRINGS
104void
105log_stack_execution_failure(__unused addr64_t a, __unused vm_prot_t b)
106{
107}
108#else
109static const char *prot_values[] = {
110	"none",
111	"read-only",
112	"write-only",
113	"read-write",
114	"execute-only",
115	"read-execute",
116	"write-execute",
117	"read-write-execute"
118};
119
120void
121log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
122{
123	printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
124		current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
125}
126#endif
127
128
129int
130useracc(
131	user_addr_t	addr,
132	user_size_t	len,
133	int	prot)
134{
135	return (vm_map_check_protection(
136			current_map(),
137			vm_map_trunc_page(addr), vm_map_round_page(addr+len),
138			prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
139}
140
141int
142vslock(
143	user_addr_t	addr,
144	user_size_t	len)
145{
146	kern_return_t kret;
147	kret = vm_map_wire(current_map(), vm_map_trunc_page(addr),
148			vm_map_round_page(addr+len),
149			VM_PROT_READ | VM_PROT_WRITE ,FALSE);
150
151	switch (kret) {
152	case KERN_SUCCESS:
153		return (0);
154	case KERN_INVALID_ADDRESS:
155	case KERN_NO_SPACE:
156		return (ENOMEM);
157	case KERN_PROTECTION_FAILURE:
158		return (EACCES);
159	default:
160		return (EINVAL);
161	}
162}
163
164int
165vsunlock(
166	user_addr_t addr,
167	user_size_t len,
168	__unused int dirtied)
169{
170#if FIXME  /* [ */
171	pmap_t		pmap;
172	vm_page_t	pg;
173	vm_map_offset_t	vaddr;
174	ppnum_t		paddr;
175#endif  /* FIXME ] */
176	kern_return_t kret;
177
178#if FIXME  /* [ */
179	if (dirtied) {
180		pmap = get_task_pmap(current_task());
181		for (vaddr = vm_map_trunc_page(addr);
182		     vaddr < vm_map_round_page(addr+len);
183				vaddr += PAGE_SIZE) {
184			paddr = pmap_extract(pmap, vaddr);
185			pg = PHYS_TO_VM_PAGE(paddr);
186			vm_page_set_modified(pg);
187		}
188	}
189#endif  /* FIXME ] */
190#ifdef	lint
191	dirtied++;
192#endif	/* lint */
193	kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr),
194				vm_map_round_page(addr+len), FALSE);
195	switch (kret) {
196	case KERN_SUCCESS:
197		return (0);
198	case KERN_INVALID_ADDRESS:
199	case KERN_NO_SPACE:
200		return (ENOMEM);
201	case KERN_PROTECTION_FAILURE:
202		return (EACCES);
203	default:
204		return (EINVAL);
205	}
206}
207
208int
209subyte(
210	user_addr_t addr,
211	int byte)
212{
213	char character;
214
215	character = (char)byte;
216	return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
217}
218
219int
220suibyte(
221	user_addr_t addr,
222	int byte)
223{
224	char character;
225
226	character = (char)byte;
227	return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
228}
229
230int fubyte(user_addr_t addr)
231{
232	unsigned char byte;
233
234	if (copyin(addr, (void *) &byte, sizeof(char)))
235		return(-1);
236	return(byte);
237}
238
239int fuibyte(user_addr_t addr)
240{
241	unsigned char byte;
242
243	if (copyin(addr, (void *) &(byte), sizeof(char)))
244		return(-1);
245	return(byte);
246}
247
248int
249suword(
250	user_addr_t addr,
251	long word)
252{
253	return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
254}
255
256long fuword(user_addr_t addr)
257{
258	long word;
259
260	if (copyin(addr, (void *) &word, sizeof(int)))
261		return(-1);
262	return(word);
263}
264
265/* suiword and fuiword are the same as suword and fuword, respectively */
266
267int
268suiword(
269	user_addr_t addr,
270	long word)
271{
272	return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
273}
274
275long fuiword(user_addr_t addr)
276{
277	long word;
278
279	if (copyin(addr, (void *) &word, sizeof(int)))
280		return(-1);
281	return(word);
282}
283
284/*
285 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
286 * fetching and setting of process-sized size_t and pointer values.
287 */
288int
289sulong(user_addr_t addr, int64_t word)
290{
291
292	if (IS_64BIT_PROCESS(current_proc())) {
293		return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
294	} else {
295		return(suiword(addr, (long)word));
296	}
297}
298
299int64_t
300fulong(user_addr_t addr)
301{
302	int64_t longword;
303
304	if (IS_64BIT_PROCESS(current_proc())) {
305		if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
306			return(-1);
307		return(longword);
308	} else {
309		return((int64_t)fuiword(addr));
310	}
311}
312
313int
314suulong(user_addr_t addr, uint64_t uword)
315{
316
317	if (IS_64BIT_PROCESS(current_proc())) {
318		return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
319	} else {
320		return(suiword(addr, (u_long)uword));
321	}
322}
323
324uint64_t
325fuulong(user_addr_t addr)
326{
327	uint64_t ulongword;
328
329	if (IS_64BIT_PROCESS(current_proc())) {
330		if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
331			return(-1ULL);
332		return(ulongword);
333	} else {
334		return((uint64_t)fuiword(addr));
335	}
336}
337
338int
339swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
340{
341	return(ENOTSUP);
342}
343
344
345kern_return_t
346pid_for_task(
347	struct pid_for_task_args *args)
348{
349	mach_port_name_t	t = args->t;
350	user_addr_t		pid_addr  = args->pid;
351	proc_t p;
352	task_t		t1;
353	int	pid = -1;
354	kern_return_t	err = KERN_SUCCESS;
355
356	AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
357	AUDIT_ARG(mach_port1, t);
358
359	t1 = port_name_to_task(t);
360
361	if (t1 == TASK_NULL) {
362		err = KERN_FAILURE;
363		goto pftout;
364	} else {
365		p = get_bsdtask_info(t1);
366		if (p) {
367			pid  = proc_pid(p);
368			err = KERN_SUCCESS;
369		} else {
370			err = KERN_FAILURE;
371		}
372	}
373	task_deallocate(t1);
374pftout:
375	AUDIT_ARG(pid, pid);
376	(void) copyout((char *) &pid, pid_addr, sizeof(int));
377	AUDIT_MACH_SYSCALL_EXIT(err);
378	return(err);
379}
380
381/*
382 *
383 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
384 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
385 *
386 */
387static  int tfp_policy = KERN_TFP_POLICY_DEFAULT;
388
389/*
390 *	Routine:	task_for_pid_posix_check
391 *	Purpose:
392 *			Verify that the current process should be allowed to
393 *			get the target process's task port. This is only
394 *			permitted if:
395 *			- The current process is root
396 *			OR all of the following are true:
397 *			- The target process's real, effective, and saved uids
398 *			  are the same as the current proc's euid,
399 *			- The target process's group set is a subset of the
400 *			  calling process's group set, and
401 *			- The target process hasn't switched credentials.
402 *
403 *	Returns:	TRUE: permitted
404 *			FALSE: denied
405 */
406static int
407task_for_pid_posix_check(proc_t target)
408{
409	kauth_cred_t targetcred, mycred;
410	uid_t myuid;
411	int allowed;
412
413	/* No task_for_pid on bad targets */
414	if (target == PROC_NULL || target->p_stat == SZOMB) {
415		return FALSE;
416	}
417
418	mycred = kauth_cred_get();
419	myuid = kauth_cred_getuid(mycred);
420
421	/* If we're running as root, the check passes */
422	if (kauth_cred_issuser(mycred))
423		return TRUE;
424
425	/* We're allowed to get our own task port */
426	if (target == current_proc())
427		return TRUE;
428
429	/*
430	 * Under DENY, only root can get another proc's task port,
431	 * so no more checks are needed.
432	 */
433	if (tfp_policy == KERN_TFP_POLICY_DENY) {
434		return FALSE;
435	}
436
437	targetcred = kauth_cred_proc_ref(target);
438	allowed = TRUE;
439
440	/* Do target's ruid, euid, and saved uid match my euid? */
441	if ((kauth_cred_getuid(targetcred) != myuid) ||
442			(targetcred->cr_ruid != myuid) ||
443			(targetcred->cr_svuid != myuid)) {
444		allowed = FALSE;
445		goto out;
446	}
447
448	/* Are target's groups a subset of my groups? */
449	if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
450			allowed == 0) {
451		allowed = FALSE;
452		goto out;
453	}
454
455	/* Has target switched credentials? */
456	if (target->p_flag & P_SUGID) {
457		allowed = FALSE;
458		goto out;
459	}
460
461out:
462	kauth_cred_unref(&targetcred);
463	return allowed;
464}
465
466/*
467 *	Routine:	task_for_pid
468 *	Purpose:
469 *		Get the task port for another "process", named by its
470 *		process ID on the same host as "target_task".
471 *
472 *		Only permitted to privileged processes, or processes
473 *		with the same user ID.
474 *
475 * XXX This should be a BSD system call, not a Mach trap!!!
476 */
477kern_return_t
478task_for_pid(
479	struct task_for_pid_args *args)
480{
481	mach_port_name_t	target_tport = args->target_tport;
482	int			pid = args->pid;
483	user_addr_t		task_addr = args->t;
484	struct uthread		*uthread;
485	proc_t 			p = PROC_NULL;
486	task_t			t1 = TASK_NULL;
487	mach_port_name_t	tret = MACH_PORT_NULL;
488 	ipc_port_t 		tfpport;
489	void * sright;
490	int error = 0;
491
492	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
493	AUDIT_ARG(pid, pid);
494	AUDIT_ARG(mach_port1, target_tport);
495
496#if defined(SECURE_KERNEL)
497	if (0 == pid) {
498		(void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
499		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
500		return(KERN_FAILURE);
501	}
502#endif
503
504	t1 = port_name_to_task(target_tport);
505	if (t1 == TASK_NULL) {
506		(void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
507		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
508		return(KERN_FAILURE);
509	}
510
511
512	/*
513	 * Delayed binding of thread credential to process credential, if we
514	 * are not running with an explicitly set thread credential.
515	 */
516	uthread = get_bsdthread_info(current_thread());
517	kauth_cred_uthread_update(uthread, current_proc());
518
519	p = proc_find(pid);
520	AUDIT_ARG(process, p);
521
522	if (!(task_for_pid_posix_check(p))) {
523		error = KERN_FAILURE;
524		goto tfpout;
525	}
526
527	if (p->task != TASK_NULL) {
528		/* If we aren't root and target's task access port is set... */
529		if (!kauth_cred_issuser(kauth_cred_get()) &&
530			p != current_proc() &&
531			(task_get_task_access_port(p->task, &tfpport) == 0) &&
532			(tfpport != IPC_PORT_NULL)) {
533
534			if (tfpport == IPC_PORT_DEAD) {
535				error = KERN_PROTECTION_FAILURE;
536				goto tfpout;
537			}
538
539			/* Call up to the task access server */
540			error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
541
542			if (error != MACH_MSG_SUCCESS) {
543				if (error == MACH_RCV_INTERRUPTED)
544					error = KERN_ABORTED;
545				else
546					error = KERN_FAILURE;
547				goto tfpout;
548			}
549		}
550#if CONFIG_MACF
551		error = mac_proc_check_get_task(kauth_cred_get(), p);
552		if (error) {
553			error = KERN_FAILURE;
554			goto tfpout;
555		}
556#endif
557
558		/* Grant task port access */
559		task_reference(p->task);
560		sright = (void *) convert_task_to_port(p->task);
561		tret = ipc_port_copyout_send(
562				sright,
563				get_task_ipcspace(current_task()));
564	}
565	error = KERN_SUCCESS;
566
567tfpout:
568	task_deallocate(t1);
569	AUDIT_ARG(mach_port2, tret);
570	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
571	if (p != PROC_NULL)
572		proc_rele(p);
573	AUDIT_MACH_SYSCALL_EXIT(error);
574	return(error);
575}
576
577/*
578 *	Routine:	task_name_for_pid
579 *	Purpose:
580 *		Get the task name port for another "process", named by its
581 *		process ID on the same host as "target_task".
582 *
583 *		Only permitted to privileged processes, or processes
584 *		with the same user ID.
585 *
586 * XXX This should be a BSD system call, not a Mach trap!!!
587 */
588
589kern_return_t
590task_name_for_pid(
591	struct task_name_for_pid_args *args)
592{
593	mach_port_name_t	target_tport = args->target_tport;
594	int			pid = args->pid;
595	user_addr_t		task_addr = args->t;
596	struct uthread		*uthread;
597	proc_t		p = PROC_NULL;
598	task_t		t1;
599	mach_port_name_t	tret;
600	void * sright;
601	int error = 0, refheld = 0;
602	kauth_cred_t target_cred;
603
604	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
605	AUDIT_ARG(pid, pid);
606	AUDIT_ARG(mach_port1, target_tport);
607
608	t1 = port_name_to_task(target_tport);
609	if (t1 == TASK_NULL) {
610		(void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
611		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
612		return(KERN_FAILURE);
613	}
614
615
616	/*
617	 * Delayed binding of thread credential to process credential, if we
618	 * are not running with an explicitly set thread credential.
619	 */
620	uthread = get_bsdthread_info(current_thread());
621	kauth_cred_uthread_update(uthread, current_proc());
622
623	p = proc_find(pid);
624	AUDIT_ARG(process, p);
625	if (p != PROC_NULL) {
626		target_cred = kauth_cred_proc_ref(p);
627		refheld = 1;
628
629		if ((p->p_stat != SZOMB)
630		    && ((current_proc() == p)
631			|| kauth_cred_issuser(kauth_cred_get())
632			|| ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
633			    ((target_cred->cr_ruid == kauth_cred_get()->cr_ruid))))) {
634
635			if (p->task != TASK_NULL) {
636				task_reference(p->task);
637#if CONFIG_MACF
638				error = mac_proc_check_get_task_name(kauth_cred_get(),  p);
639				if (error) {
640					task_deallocate(p->task);
641					goto noperm;
642				}
643#endif
644				sright = (void *)convert_task_name_to_port(p->task);
645				tret = ipc_port_copyout_send(sright,
646						get_task_ipcspace(current_task()));
647			} else
648				tret  = MACH_PORT_NULL;
649
650			AUDIT_ARG(mach_port2, tret);
651			(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
652			task_deallocate(t1);
653			error = KERN_SUCCESS;
654			goto tnfpout;
655		}
656	}
657
658#if CONFIG_MACF
659noperm:
660#endif
661    task_deallocate(t1);
662	tret = MACH_PORT_NULL;
663	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
664	error = KERN_FAILURE;
665tnfpout:
666	if (refheld != 0)
667		kauth_cred_unref(&target_cred);
668	if (p != PROC_NULL)
669		proc_rele(p);
670	AUDIT_MACH_SYSCALL_EXIT(error);
671	return(error);
672}
673
674static int
675sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
676    __unused int arg2, struct sysctl_req *req)
677{
678    int error = 0;
679	int new_value;
680
681    error = SYSCTL_OUT(req, arg1, sizeof(int));
682    if (error || req->newptr == USER_ADDR_NULL)
683        return(error);
684
685	if (!is_suser())
686		return(EPERM);
687
688	if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
689		goto out;
690	}
691	if ((new_value == KERN_TFP_POLICY_DENY)
692		|| (new_value == KERN_TFP_POLICY_DEFAULT))
693			tfp_policy = new_value;
694	else
695			error = EINVAL;
696out:
697    return(error);
698
699}
700
701#if defined(SECURE_KERNEL)
702static int kern_secure_kernel = 1;
703#else
704static int kern_secure_kernel = 0;
705#endif
706
707SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD, &kern_secure_kernel, 0, "");
708
709SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "tfp");
710SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW,
711    &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
712
713SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW,
714	   &shared_region_trace_level, 0, "");
715SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD,
716	   &shared_region_version, 0, "");
717SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW,
718	   &shared_region_persistence, 0, "");
719
720/*
721 * shared_region_check_np:
722 *
723 * This system call is intended for dyld.
724 *
725 * dyld calls this when any process starts to see if the process's shared
726 * region is already set up and ready to use.
727 * This call returns the base address of the first mapping in the
728 * process's shared region's first mapping.
729 * dyld will then check what's mapped at that address.
730 *
731 * If the shared region is empty, dyld will then attempt to map the shared
732 * cache file in the shared region via the shared_region_map_np() system call.
733 *
734 * If something's already mapped in the shared region, dyld will check if it
735 * matches the shared cache it would like to use for that process.
736 * If it matches, evrything's ready and the process can proceed and use the
737 * shared region.
738 * If it doesn't match, dyld will unmap the shared region and map the shared
739 * cache into the process's address space via mmap().
740 *
741 * ERROR VALUES
742 * EINVAL	no shared region
743 * ENOMEM	shared region is empty
744 * EFAULT	bad address for "start_address"
745 */
746int
747shared_region_check_np(
748	__unused struct proc			*p,
749	struct shared_region_check_np_args	*uap,
750	__unused int				*retvalp)
751{
752	vm_shared_region_t	shared_region;
753	mach_vm_offset_t	start_address;
754	int			error;
755	kern_return_t		kr;
756
757	SHARED_REGION_TRACE_DEBUG(
758		("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
759		 current_thread(), p->p_pid, p->p_comm,
760		 (uint64_t)uap->start_address));
761
762	/* retrieve the current tasks's shared region */
763	shared_region = vm_shared_region_get(current_task());
764	if (shared_region != NULL) {
765		/* retrieve address of its first mapping... */
766		kr = vm_shared_region_start_address(shared_region,
767						    &start_address);
768		if (kr != KERN_SUCCESS) {
769			error = ENOMEM;
770		} else {
771			/* ... and give it to the caller */
772			error = copyout(&start_address,
773					(user_addr_t) uap->start_address,
774					sizeof (start_address));
775			if (error) {
776				SHARED_REGION_TRACE_ERROR(
777					("shared_region: %p [%d(%s)] "
778					 "check_np(0x%llx) "
779					 "copyout(0x%llx) error %d\n",
780					 current_thread(), p->p_pid, p->p_comm,
781					 (uint64_t)uap->start_address, (uint64_t)start_address,
782					 error));
783			}
784		}
785		vm_shared_region_deallocate(shared_region);
786	} else {
787		/* no shared region ! */
788		error = EINVAL;
789	}
790
791	SHARED_REGION_TRACE_DEBUG(
792		("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
793		 current_thread(), p->p_pid, p->p_comm,
794		 (uint64_t)uap->start_address, (uint64_t)start_address, error));
795
796	return error;
797}
798
799/*
800 * shared_region_map_np()
801 *
802 * This system call is intended for dyld.
803 *
804 * dyld uses this to map a shared cache file into a shared region.
805 * This is usually done only the first time a shared cache is needed.
806 * Subsequent processes will just use the populated shared region without
807 * requiring any further setup.
808 */
809int
810shared_region_map_np(
811	struct proc				*p,
812	struct shared_region_map_np_args	*uap,
813	__unused int				*retvalp)
814{
815	int				error;
816	kern_return_t			kr;
817	int				fd;
818	struct fileproc			*fp;
819	struct vnode			*vp, *root_vp;
820	struct vnode_attr		va;
821	off_t				fs;
822	memory_object_size_t		file_size;
823	user_addr_t			user_mappings;
824	struct shared_file_mapping_np	*mappings;
825#define SFM_MAX_STACK	8
826	struct shared_file_mapping_np	stack_mappings[SFM_MAX_STACK];
827	unsigned int			mappings_count;
828	vm_size_t			mappings_size;
829	memory_object_control_t		file_control;
830	struct vm_shared_region		*shared_region;
831
832	SHARED_REGION_TRACE_DEBUG(
833		("shared_region: %p [%d(%s)] -> map\n",
834		 current_thread(), p->p_pid, p->p_comm));
835
836	shared_region = NULL;
837	mappings_count = 0;
838	mappings_size = 0;
839	mappings = NULL;
840	fp = NULL;
841	vp = NULL;
842
843	/* get file descriptor for shared region cache file */
844	fd = uap->fd;
845
846	/* get file structure from file descriptor */
847	error = fp_lookup(p, fd, &fp, 0);
848	if (error) {
849		SHARED_REGION_TRACE_ERROR(
850			("shared_region: %p [%d(%s)] map: "
851			 "fd=%d lookup failed (error=%d)\n",
852			 current_thread(), p->p_pid, p->p_comm, fd, error));
853		goto done;
854	}
855
856	/* make sure we're attempting to map a vnode */
857	if (fp->f_fglob->fg_type != DTYPE_VNODE) {
858		SHARED_REGION_TRACE_ERROR(
859			("shared_region: %p [%d(%s)] map: "
860			 "fd=%d not a vnode (type=%d)\n",
861			 current_thread(), p->p_pid, p->p_comm,
862			 fd, fp->f_fglob->fg_type));
863		error = EINVAL;
864		goto done;
865	}
866
867	/* we need at least read permission on the file */
868	if (! (fp->f_fglob->fg_flag & FREAD)) {
869		SHARED_REGION_TRACE_ERROR(
870			("shared_region: %p [%d(%s)] map: "
871			 "fd=%d not readable\n",
872			 current_thread(), p->p_pid, p->p_comm, fd));
873		error = EPERM;
874		goto done;
875	}
876
877	/* get vnode from file structure */
878	error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
879	if (error) {
880		SHARED_REGION_TRACE_ERROR(
881			("shared_region: %p [%d(%s)] map: "
882			 "fd=%d getwithref failed (error=%d)\n",
883			 current_thread(), p->p_pid, p->p_comm, fd, error));
884		goto done;
885	}
886	vp = (struct vnode *) fp->f_fglob->fg_data;
887
888	/* make sure the vnode is a regular file */
889	if (vp->v_type != VREG) {
890		SHARED_REGION_TRACE_ERROR(
891			("shared_region: %p [%d(%s)] map(%p:'%s'): "
892			 "not a file (type=%d)\n",
893			 current_thread(), p->p_pid, p->p_comm,
894			 vp, vp->v_name, vp->v_type));
895		error = EINVAL;
896		goto done;
897	}
898
899	/* make sure vnode is on the process's root volume */
900	root_vp = p->p_fd->fd_rdir;
901	if (root_vp == NULL) {
902		root_vp = rootvnode;
903	}
904	if (vp->v_mount != root_vp->v_mount) {
905		SHARED_REGION_TRACE_ERROR(
906			("shared_region: %p [%d(%s)] map(%p:'%s'): "
907			 "not on process's root volume\n",
908			 current_thread(), p->p_pid, p->p_comm,
909			 vp, vp->v_name));
910		error = EPERM;
911		goto done;
912	}
913
914	/* make sure vnode is owned by "root" */
915	VATTR_INIT(&va);
916	VATTR_WANTED(&va, va_uid);
917	error = vnode_getattr(vp, &va, vfs_context_current());
918	if (error) {
919		SHARED_REGION_TRACE_ERROR(
920			("shared_region: %p [%d(%s)] map(%p:'%s'): "
921			 "vnode_getattr(%p) failed (error=%d)\n",
922			 current_thread(), p->p_pid, p->p_comm,
923			 vp, vp->v_name, vp, error));
924		goto done;
925	}
926	if (va.va_uid != 0) {
927		SHARED_REGION_TRACE_ERROR(
928			("shared_region: %p [%d(%s)] map(%p:'%s'): "
929			 "owned by uid=%d instead of 0\n",
930			 current_thread(), p->p_pid, p->p_comm,
931			 vp, vp->v_name, va.va_uid));
932		error = EPERM;
933		goto done;
934	}
935
936	/* get vnode size */
937	error = vnode_size(vp, &fs, vfs_context_current());
938	if (error) {
939		SHARED_REGION_TRACE_ERROR(
940			("shared_region: %p [%d(%s)] map(%p:'%s'): "
941			 "vnode_size(%p) failed (error=%d)\n",
942			 current_thread(), p->p_pid, p->p_comm,
943			 vp, vp->v_name, vp, error));
944		goto done;
945	}
946	file_size = fs;
947
948	/* get the file's memory object handle */
949	file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
950	if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
951		SHARED_REGION_TRACE_ERROR(
952			("shared_region: %p [%d(%s)] map(%p:'%s'): "
953			 "no memory object\n",
954			 current_thread(), p->p_pid, p->p_comm,
955			 vp, vp->v_name));
956		error = EINVAL;
957		goto done;
958	}
959
960	/* get the list of mappings the caller wants us to establish */
961	mappings_count = uap->count;	/* number of mappings */
962	mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
963	if (mappings_count == 0) {
964		SHARED_REGION_TRACE_INFO(
965			("shared_region: %p [%d(%s)] map(%p:'%s'): "
966			 "no mappings\n",
967			 current_thread(), p->p_pid, p->p_comm,
968			 vp, vp->v_name));
969		error = 0;	/* no mappings: we're done ! */
970		goto done;
971	} else if (mappings_count <= SFM_MAX_STACK) {
972		mappings = &stack_mappings[0];
973	} else {
974		SHARED_REGION_TRACE_ERROR(
975			("shared_region: %p [%d(%s)] map(%p:'%s'): "
976			 "too many mappings (%d)\n",
977			 current_thread(), p->p_pid, p->p_comm,
978			 vp, vp->v_name, mappings_count));
979		error = EINVAL;
980		goto done;
981	}
982
983	user_mappings = uap->mappings;	/* the mappings, in user space */
984	error = copyin(user_mappings,
985		       mappings,
986		       mappings_size);
987	if (error) {
988		SHARED_REGION_TRACE_ERROR(
989			("shared_region: %p [%d(%s)] map(%p:'%s'): "
990			 "copyin(0x%llx, %d) failed (error=%d)\n",
991			 current_thread(), p->p_pid, p->p_comm,
992			 vp, vp->v_name, (uint64_t)user_mappings, mappings_count, error));
993		goto done;
994	}
995
996	/* get the process's shared region (setup in vm_map_exec()) */
997	shared_region = vm_shared_region_get(current_task());
998	if (shared_region == NULL) {
999		SHARED_REGION_TRACE_ERROR(
1000			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1001			 "no shared region\n",
1002			 current_thread(), p->p_pid, p->p_comm,
1003			 vp, vp->v_name));
1004		goto done;
1005	}
1006
1007	/* map the file into that shared region's submap */
1008	kr = vm_shared_region_map_file(shared_region,
1009				       mappings_count,
1010				       mappings,
1011				       file_control,
1012				       file_size,
1013				       (void *) p->p_fd->fd_rdir);
1014	if (kr != KERN_SUCCESS) {
1015		SHARED_REGION_TRACE_ERROR(
1016			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1017			 "vm_shared_region_map_file() failed kr=0x%x\n",
1018			 current_thread(), p->p_pid, p->p_comm,
1019			 vp, vp->v_name, kr));
1020		switch (kr) {
1021		case KERN_INVALID_ADDRESS:
1022			error = EFAULT;
1023			break;
1024		case KERN_PROTECTION_FAILURE:
1025			error = EPERM;
1026			break;
1027		case KERN_NO_SPACE:
1028			error = ENOMEM;
1029			break;
1030		case KERN_FAILURE:
1031		case KERN_INVALID_ARGUMENT:
1032		default:
1033			error = EINVAL;
1034			break;
1035		}
1036		goto done;
1037	}
1038
1039	error = 0;
1040
1041	/* update the vnode's access time */
1042	if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
1043		VATTR_INIT(&va);
1044		nanotime(&va.va_access_time);
1045		VATTR_SET_ACTIVE(&va, va_access_time);
1046		vnode_setattr(vp, &va, vfs_context_current());
1047	}
1048
1049	if (p->p_flag & P_NOSHLIB) {
1050		/* signal that this process is now using split libraries */
1051		OSBitAndAtomic(~((uint32_t)P_NOSHLIB), (UInt32 *)&p->p_flag);
1052	}
1053
1054done:
1055	if (vp != NULL) {
1056		/*
1057		 * release the vnode...
1058		 * ubc_map() still holds it for us in the non-error case
1059		 */
1060		(void) vnode_put(vp);
1061		vp = NULL;
1062	}
1063	if (fp != NULL) {
1064		/* release the file descriptor */
1065		fp_drop(p, fd, fp, 0);
1066		fp = NULL;
1067	}
1068
1069	if (shared_region != NULL) {
1070		vm_shared_region_deallocate(shared_region);
1071	}
1072
1073	SHARED_REGION_TRACE_DEBUG(
1074		("shared_region: %p [%d(%s)] <- map\n",
1075		 current_thread(), p->p_pid, p->p_comm));
1076
1077	return error;
1078}
1079
1080
1081/* sysctl overflow room */
1082
1083/* vm_page_free_target is provided as a makeshift solution for applications that want to
1084	allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1085	reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1086extern unsigned int	vm_page_free_target;
1087SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD,
1088		   &vm_page_free_target, 0, "Pageout daemon free target");
1089
1090