1/*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved.  The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34/*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections.  This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40
41#include <meta_features.h>
42
43#include <kern/task.h>
44#include <kern/thread.h>
45#include <kern/debug.h>
46#include <kern/lock.h>
47#include <kern/extmod_statistics.h>
48#include <mach/mach_traps.h>
49#include <mach/port.h>
50#include <mach/task.h>
51#include <mach/task_access.h>
52#include <mach/task_special_ports.h>
53#include <mach/time_value.h>
54#include <mach/vm_map.h>
55#include <mach/vm_param.h>
56#include <mach/vm_prot.h>
57
58#include <sys/file_internal.h>
59#include <sys/param.h>
60#include <sys/systm.h>
61#include <sys/dir.h>
62#include <sys/namei.h>
63#include <sys/proc_internal.h>
64#include <sys/kauth.h>
65#include <sys/vm.h>
66#include <sys/file.h>
67#include <sys/vnode_internal.h>
68#include <sys/mount.h>
69#include <sys/trace.h>
70#include <sys/kernel.h>
71#include <sys/ubc_internal.h>
72#include <sys/user.h>
73#include <sys/syslog.h>
74#include <sys/stat.h>
75#include <sys/sysproto.h>
76#include <sys/mman.h>
77#include <sys/sysctl.h>
78#include <sys/cprotect.h>
79#include <sys/kpi_socket.h>
80#include <sys/kas_info.h>
81
82#include <security/audit/audit.h>
83#include <security/mac.h>
84#include <bsm/audit_kevents.h>
85
86#include <kern/kalloc.h>
87#include <vm/vm_map.h>
88#include <vm/vm_kern.h>
89#include <vm/vm_pageout.h>
90
91#include <machine/spl.h>
92
93#include <mach/shared_region.h>
94#include <vm/vm_shared_region.h>
95
96#include <vm/vm_protos.h>
97
98#include <sys/kern_memorystatus.h>
99
100int _shared_region_map( struct proc*, int, unsigned int, struct shared_file_mapping_np*, memory_object_control_t*, struct shared_file_mapping_np*);
101int _shared_region_slide(uint32_t, mach_vm_offset_t, mach_vm_size_t, mach_vm_offset_t, mach_vm_size_t, memory_object_control_t);
102int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *);
103
104SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
105
106
107/*
108 * Sysctl's related to data/stack execution.  See osfmk/vm/vm_map.c
109 */
110
111#if !SECURE_KERNEL
112extern int allow_stack_exec, allow_data_exec;
113
114SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
115SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
116#endif /* !SECURE_KERNEL */
117
118static const char *prot_values[] = {
119	"none",
120	"read-only",
121	"write-only",
122	"read-write",
123	"execute-only",
124	"read-execute",
125	"write-execute",
126	"read-write-execute"
127};
128
129void
130log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
131{
132	printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
133		current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
134}
135
136int shared_region_unnest_logging = 1;
137
138SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
139	   &shared_region_unnest_logging, 0, "");
140
141int vm_shared_region_unnest_log_interval = 10;
142int shared_region_unnest_log_count_threshold = 5;
143
144/* These log rate throttling state variables aren't thread safe, but
145 * are sufficient unto the task.
146 */
147static int64_t last_unnest_log_time = 0;
148static int shared_region_unnest_log_count = 0;
149
150void log_unnest_badness(vm_map_t m, vm_map_offset_t s, vm_map_offset_t e) {
151	struct timeval tv;
152	const char *pcommstr;
153
154	if (shared_region_unnest_logging == 0)
155		return;
156
157	if (shared_region_unnest_logging == 1) {
158		microtime(&tv);
159		if ((tv.tv_sec - last_unnest_log_time) < vm_shared_region_unnest_log_interval) {
160			if (shared_region_unnest_log_count++ > shared_region_unnest_log_count_threshold)
161				return;
162		}
163		else {
164			last_unnest_log_time = tv.tv_sec;
165			shared_region_unnest_log_count = 0;
166		}
167	}
168
169	pcommstr = current_proc()->p_comm;
170
171	printf("%s (map: %p) triggered DYLD shared region unnest for map: %p, region 0x%qx->0x%qx. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, get_task_map(current_proc()->task), m, (uint64_t)s, (uint64_t)e);
172}
173
174int
175useracc(
176	user_addr_t	addr,
177	user_size_t	len,
178	int	prot)
179{
180	return (vm_map_check_protection(
181			current_map(),
182			vm_map_trunc_page(addr), vm_map_round_page(addr+len),
183			prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
184}
185
186int
187vslock(
188	user_addr_t	addr,
189	user_size_t	len)
190{
191	kern_return_t kret;
192	kret = vm_map_wire(current_map(), vm_map_trunc_page(addr),
193			vm_map_round_page(addr+len),
194			VM_PROT_READ | VM_PROT_WRITE ,FALSE);
195
196	switch (kret) {
197	case KERN_SUCCESS:
198		return (0);
199	case KERN_INVALID_ADDRESS:
200	case KERN_NO_SPACE:
201		return (ENOMEM);
202	case KERN_PROTECTION_FAILURE:
203		return (EACCES);
204	default:
205		return (EINVAL);
206	}
207}
208
209int
210vsunlock(
211	user_addr_t addr,
212	user_size_t len,
213	__unused int dirtied)
214{
215#if FIXME  /* [ */
216	pmap_t		pmap;
217	vm_page_t	pg;
218	vm_map_offset_t	vaddr;
219	ppnum_t		paddr;
220#endif  /* FIXME ] */
221	kern_return_t kret;
222
223#if FIXME  /* [ */
224	if (dirtied) {
225		pmap = get_task_pmap(current_task());
226		for (vaddr = vm_map_trunc_page(addr);
227		     vaddr < vm_map_round_page(addr+len);
228				vaddr += PAGE_SIZE) {
229			paddr = pmap_extract(pmap, vaddr);
230			pg = PHYS_TO_VM_PAGE(paddr);
231			vm_page_set_modified(pg);
232		}
233	}
234#endif  /* FIXME ] */
235#ifdef	lint
236	dirtied++;
237#endif	/* lint */
238	kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr),
239				vm_map_round_page(addr+len), FALSE);
240	switch (kret) {
241	case KERN_SUCCESS:
242		return (0);
243	case KERN_INVALID_ADDRESS:
244	case KERN_NO_SPACE:
245		return (ENOMEM);
246	case KERN_PROTECTION_FAILURE:
247		return (EACCES);
248	default:
249		return (EINVAL);
250	}
251}
252
253int
254subyte(
255	user_addr_t addr,
256	int byte)
257{
258	char character;
259
260	character = (char)byte;
261	return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
262}
263
264int
265suibyte(
266	user_addr_t addr,
267	int byte)
268{
269	char character;
270
271	character = (char)byte;
272	return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
273}
274
275int fubyte(user_addr_t addr)
276{
277	unsigned char byte;
278
279	if (copyin(addr, (void *) &byte, sizeof(char)))
280		return(-1);
281	return(byte);
282}
283
284int fuibyte(user_addr_t addr)
285{
286	unsigned char byte;
287
288	if (copyin(addr, (void *) &(byte), sizeof(char)))
289		return(-1);
290	return(byte);
291}
292
293int
294suword(
295	user_addr_t addr,
296	long word)
297{
298	return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
299}
300
301long fuword(user_addr_t addr)
302{
303	long word = 0;
304
305	if (copyin(addr, (void *) &word, sizeof(int)))
306		return(-1);
307	return(word);
308}
309
310/* suiword and fuiword are the same as suword and fuword, respectively */
311
312int
313suiword(
314	user_addr_t addr,
315	long word)
316{
317	return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
318}
319
320long fuiword(user_addr_t addr)
321{
322	long word = 0;
323
324	if (copyin(addr, (void *) &word, sizeof(int)))
325		return(-1);
326	return(word);
327}
328
329/*
330 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
331 * fetching and setting of process-sized size_t and pointer values.
332 */
333int
334sulong(user_addr_t addr, int64_t word)
335{
336
337	if (IS_64BIT_PROCESS(current_proc())) {
338		return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
339	} else {
340		return(suiword(addr, (long)word));
341	}
342}
343
344int64_t
345fulong(user_addr_t addr)
346{
347	int64_t longword;
348
349	if (IS_64BIT_PROCESS(current_proc())) {
350		if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
351			return(-1);
352		return(longword);
353	} else {
354		return((int64_t)fuiword(addr));
355	}
356}
357
358int
359suulong(user_addr_t addr, uint64_t uword)
360{
361
362	if (IS_64BIT_PROCESS(current_proc())) {
363		return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
364	} else {
365		return(suiword(addr, (uint32_t)uword));
366	}
367}
368
369uint64_t
370fuulong(user_addr_t addr)
371{
372	uint64_t ulongword;
373
374	if (IS_64BIT_PROCESS(current_proc())) {
375		if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
376			return(-1ULL);
377		return(ulongword);
378	} else {
379		return((uint64_t)fuiword(addr));
380	}
381}
382
383int
384swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
385{
386	return(ENOTSUP);
387}
388
389/*
390 * pid_for_task
391 *
392 * Find the BSD process ID for the Mach task associated with the given Mach port
393 * name
394 *
395 * Parameters:	args		User argument descriptor (see below)
396 *
397 * Indirect parameters:	args->t		Mach port name
398 * 			args->pid	Process ID (returned value; see below)
399 *
400 * Returns:	KERL_SUCCESS	Success
401 * 		KERN_FAILURE	Not success
402 *
403 * Implicit returns: args->pid		Process ID
404 *
405 */
406kern_return_t
407pid_for_task(
408	struct pid_for_task_args *args)
409{
410	mach_port_name_t	t = args->t;
411	user_addr_t		pid_addr  = args->pid;
412	proc_t p;
413	task_t		t1;
414	int	pid = -1;
415	kern_return_t	err = KERN_SUCCESS;
416
417	AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
418	AUDIT_ARG(mach_port1, t);
419
420	t1 = port_name_to_task(t);
421
422	if (t1 == TASK_NULL) {
423		err = KERN_FAILURE;
424		goto pftout;
425	} else {
426		p = get_bsdtask_info(t1);
427		if (p) {
428			pid  = proc_pid(p);
429			err = KERN_SUCCESS;
430		} else {
431			err = KERN_FAILURE;
432		}
433	}
434	task_deallocate(t1);
435pftout:
436	AUDIT_ARG(pid, pid);
437	(void) copyout((char *) &pid, pid_addr, sizeof(int));
438	AUDIT_MACH_SYSCALL_EXIT(err);
439	return(err);
440}
441
442/*
443 *
444 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
445 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
446 *
447 */
448static  int tfp_policy = KERN_TFP_POLICY_DEFAULT;
449
450/*
451 *	Routine:	task_for_pid_posix_check
452 *	Purpose:
453 *			Verify that the current process should be allowed to
454 *			get the target process's task port. This is only
455 *			permitted if:
456 *			- The current process is root
457 *			OR all of the following are true:
458 *			- The target process's real, effective, and saved uids
459 *			  are the same as the current proc's euid,
460 *			- The target process's group set is a subset of the
461 *			  calling process's group set, and
462 *			- The target process hasn't switched credentials.
463 *
464 *	Returns:	TRUE: permitted
465 *			FALSE: denied
466 */
467static int
468task_for_pid_posix_check(proc_t target)
469{
470	kauth_cred_t targetcred, mycred;
471	uid_t myuid;
472	int allowed;
473
474	/* No task_for_pid on bad targets */
475	if (target->p_stat == SZOMB) {
476		return FALSE;
477	}
478
479	mycred = kauth_cred_get();
480	myuid = kauth_cred_getuid(mycred);
481
482	/* If we're running as root, the check passes */
483	if (kauth_cred_issuser(mycred))
484		return TRUE;
485
486	/* We're allowed to get our own task port */
487	if (target == current_proc())
488		return TRUE;
489
490	/*
491	 * Under DENY, only root can get another proc's task port,
492	 * so no more checks are needed.
493	 */
494	if (tfp_policy == KERN_TFP_POLICY_DENY) {
495		return FALSE;
496	}
497
498	targetcred = kauth_cred_proc_ref(target);
499	allowed = TRUE;
500
501	/* Do target's ruid, euid, and saved uid match my euid? */
502	if ((kauth_cred_getuid(targetcred) != myuid) ||
503			(kauth_cred_getruid(targetcred) != myuid) ||
504			(kauth_cred_getsvuid(targetcred) != myuid)) {
505		allowed = FALSE;
506		goto out;
507	}
508
509	/* Are target's groups a subset of my groups? */
510	if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
511			allowed == 0) {
512		allowed = FALSE;
513		goto out;
514	}
515
516	/* Has target switched credentials? */
517	if (target->p_flag & P_SUGID) {
518		allowed = FALSE;
519		goto out;
520	}
521
522out:
523	kauth_cred_unref(&targetcred);
524	return allowed;
525}
526
527/*
528 *	Routine:	task_for_pid
529 *	Purpose:
530 *		Get the task port for another "process", named by its
531 *		process ID on the same host as "target_task".
532 *
533 *		Only permitted to privileged processes, or processes
534 *		with the same user ID.
535 *
536 *		Note: if pid == 0, an error is return no matter who is calling.
537 *
538 * XXX This should be a BSD system call, not a Mach trap!!!
539 */
540kern_return_t
541task_for_pid(
542	struct task_for_pid_args *args)
543{
544	mach_port_name_t	target_tport = args->target_tport;
545	int			pid = args->pid;
546	user_addr_t		task_addr = args->t;
547	proc_t 			p = PROC_NULL;
548	task_t			t1 = TASK_NULL;
549	mach_port_name_t	tret = MACH_PORT_NULL;
550 	ipc_port_t 		tfpport;
551	void * sright;
552	int error = 0;
553
554	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
555	AUDIT_ARG(pid, pid);
556	AUDIT_ARG(mach_port1, target_tport);
557
558	/* Always check if pid == 0 */
559	if (pid == 0) {
560		(void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
561		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
562		return(KERN_FAILURE);
563	}
564
565	t1 = port_name_to_task(target_tport);
566	if (t1 == TASK_NULL) {
567		(void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
568		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
569		return(KERN_FAILURE);
570	}
571
572
573	p = proc_find(pid);
574	if (p == PROC_NULL) {
575		error = KERN_FAILURE;
576		goto tfpout;
577	}
578
579#if CONFIG_AUDIT
580	AUDIT_ARG(process, p);
581#endif
582
583	if (!(task_for_pid_posix_check(p))) {
584		error = KERN_FAILURE;
585		goto tfpout;
586	}
587
588	if (p->task != TASK_NULL) {
589		/* If we aren't root and target's task access port is set... */
590		if (!kauth_cred_issuser(kauth_cred_get()) &&
591			p != current_proc() &&
592			(task_get_task_access_port(p->task, &tfpport) == 0) &&
593			(tfpport != IPC_PORT_NULL)) {
594
595			if (tfpport == IPC_PORT_DEAD) {
596				error = KERN_PROTECTION_FAILURE;
597				goto tfpout;
598			}
599
600			/* Call up to the task access server */
601			error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
602
603			if (error != MACH_MSG_SUCCESS) {
604				if (error == MACH_RCV_INTERRUPTED)
605					error = KERN_ABORTED;
606				else
607					error = KERN_FAILURE;
608				goto tfpout;
609			}
610		}
611#if CONFIG_MACF
612		error = mac_proc_check_get_task(kauth_cred_get(), p);
613		if (error) {
614			error = KERN_FAILURE;
615			goto tfpout;
616		}
617#endif
618
619		/* Grant task port access */
620		task_reference(p->task);
621		extmod_statistics_incr_task_for_pid(p->task);
622
623		sright = (void *) convert_task_to_port(p->task);
624		tret = ipc_port_copyout_send(
625				sright,
626				get_task_ipcspace(current_task()));
627	}
628	error = KERN_SUCCESS;
629
630tfpout:
631	task_deallocate(t1);
632	AUDIT_ARG(mach_port2, tret);
633	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
634	if (p != PROC_NULL)
635		proc_rele(p);
636	AUDIT_MACH_SYSCALL_EXIT(error);
637	return(error);
638}
639
640/*
641 *	Routine:	task_name_for_pid
642 *	Purpose:
643 *		Get the task name port for another "process", named by its
644 *		process ID on the same host as "target_task".
645 *
646 *		Only permitted to privileged processes, or processes
647 *		with the same user ID.
648 *
649 * XXX This should be a BSD system call, not a Mach trap!!!
650 */
651
652kern_return_t
653task_name_for_pid(
654	struct task_name_for_pid_args *args)
655{
656	mach_port_name_t	target_tport = args->target_tport;
657	int			pid = args->pid;
658	user_addr_t		task_addr = args->t;
659	proc_t		p = PROC_NULL;
660	task_t		t1;
661	mach_port_name_t	tret;
662	void * sright;
663	int error = 0, refheld = 0;
664	kauth_cred_t target_cred;
665
666	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
667	AUDIT_ARG(pid, pid);
668	AUDIT_ARG(mach_port1, target_tport);
669
670	t1 = port_name_to_task(target_tport);
671	if (t1 == TASK_NULL) {
672		(void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
673		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
674		return(KERN_FAILURE);
675	}
676
677	p = proc_find(pid);
678	if (p != PROC_NULL) {
679		AUDIT_ARG(process, p);
680		target_cred = kauth_cred_proc_ref(p);
681		refheld = 1;
682
683		if ((p->p_stat != SZOMB)
684		    && ((current_proc() == p)
685			|| kauth_cred_issuser(kauth_cred_get())
686			|| ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
687			    ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) {
688
689			if (p->task != TASK_NULL) {
690				task_reference(p->task);
691#if CONFIG_MACF
692				error = mac_proc_check_get_task_name(kauth_cred_get(),  p);
693				if (error) {
694					task_deallocate(p->task);
695					goto noperm;
696				}
697#endif
698				sright = (void *)convert_task_name_to_port(p->task);
699				tret = ipc_port_copyout_send(sright,
700						get_task_ipcspace(current_task()));
701			} else
702				tret  = MACH_PORT_NULL;
703
704			AUDIT_ARG(mach_port2, tret);
705			(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
706			task_deallocate(t1);
707			error = KERN_SUCCESS;
708			goto tnfpout;
709		}
710	}
711
712#if CONFIG_MACF
713noperm:
714#endif
715    task_deallocate(t1);
716	tret = MACH_PORT_NULL;
717	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
718	error = KERN_FAILURE;
719tnfpout:
720	if (refheld != 0)
721		kauth_cred_unref(&target_cred);
722	if (p != PROC_NULL)
723		proc_rele(p);
724	AUDIT_MACH_SYSCALL_EXIT(error);
725	return(error);
726}
727
728kern_return_t
729pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
730{
731	task_t	target = NULL;
732	proc_t	targetproc = PROC_NULL;
733	int 	pid = args->pid;
734	int 	error = 0;
735
736#if CONFIG_MACF
737	error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND);
738	if (error) {
739		error = EPERM;
740		goto out;
741	}
742#endif
743
744	if (pid == 0) {
745		error = EPERM;
746		goto out;
747	}
748
749	targetproc = proc_find(pid);
750	if (targetproc == PROC_NULL) {
751		error = ESRCH;
752		goto out;
753	}
754
755	if (!task_for_pid_posix_check(targetproc)) {
756		error = EPERM;
757		goto out;
758	}
759
760	target = targetproc->task;
761#ifndef CONFIG_EMBEDDED
762	if (target != TASK_NULL) {
763		mach_port_t tfpport;
764
765		/* If we aren't root and target's task access port is set... */
766		if (!kauth_cred_issuser(kauth_cred_get()) &&
767			targetproc != current_proc() &&
768			(task_get_task_access_port(target, &tfpport) == 0) &&
769			(tfpport != IPC_PORT_NULL)) {
770
771			if (tfpport == IPC_PORT_DEAD) {
772				error = EACCES;
773				goto out;
774			}
775
776			/* Call up to the task access server */
777			error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
778
779			if (error != MACH_MSG_SUCCESS) {
780				if (error == MACH_RCV_INTERRUPTED)
781					error = EINTR;
782				else
783					error = EPERM;
784				goto out;
785			}
786		}
787	}
788#endif
789
790	task_reference(target);
791	error = task_pidsuspend(target);
792	if (error) {
793		if (error == KERN_INVALID_ARGUMENT) {
794			error = EINVAL;
795		} else {
796			error = EPERM;
797		}
798	}
799#if CONFIG_MEMORYSTATUS
800	else {
801    	memorystatus_on_suspend(pid);
802    }
803#endif
804
805	task_deallocate(target);
806
807out:
808	if (targetproc != PROC_NULL)
809		proc_rele(targetproc);
810	*ret = error;
811	return error;
812}
813
814kern_return_t
815pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
816{
817	task_t	target = NULL;
818	proc_t	targetproc = PROC_NULL;
819	int 	pid = args->pid;
820	int 	error = 0;
821
822#if CONFIG_MACF
823	error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME);
824	if (error) {
825		error = EPERM;
826		goto out;
827	}
828#endif
829
830	if (pid == 0) {
831		error = EPERM;
832		goto out;
833	}
834
835	targetproc = proc_find(pid);
836	if (targetproc == PROC_NULL) {
837		error = ESRCH;
838		goto out;
839	}
840
841	if (!task_for_pid_posix_check(targetproc)) {
842		error = EPERM;
843		goto out;
844	}
845
846	target = targetproc->task;
847#ifndef CONFIG_EMBEDDED
848	if (target != TASK_NULL) {
849		mach_port_t tfpport;
850
851		/* If we aren't root and target's task access port is set... */
852		if (!kauth_cred_issuser(kauth_cred_get()) &&
853			targetproc != current_proc() &&
854			(task_get_task_access_port(target, &tfpport) == 0) &&
855			(tfpport != IPC_PORT_NULL)) {
856
857			if (tfpport == IPC_PORT_DEAD) {
858				error = EACCES;
859				goto out;
860			}
861
862			/* Call up to the task access server */
863			error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
864
865			if (error != MACH_MSG_SUCCESS) {
866				if (error == MACH_RCV_INTERRUPTED)
867					error = EINTR;
868				else
869					error = EPERM;
870				goto out;
871			}
872		}
873	}
874#endif
875
876	task_reference(target);
877
878#if CONFIG_MEMORYSTATUS
879	memorystatus_on_resume(pid);
880#endif
881
882	error = task_pidresume(target);
883	if (error) {
884		if (error == KERN_INVALID_ARGUMENT) {
885			error = EINVAL;
886		} else {
887			error = EPERM;
888		}
889	}
890
891	task_deallocate(target);
892
893out:
894	if (targetproc != PROC_NULL)
895		proc_rele(targetproc);
896
897	*ret = error;
898	return error;
899}
900
901#if CONFIG_EMBEDDED
902kern_return_t
903pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret)
904{
905	int 	error = 0;
906	proc_t	targetproc = PROC_NULL;
907	int 	pid = args->pid;
908
909#ifndef CONFIG_FREEZE
910	#pragma unused(pid)
911#else
912
913#if CONFIG_MACF
914	error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_HIBERNATE);
915	if (error) {
916		error = EPERM;
917		goto out;
918	}
919#endif
920
921	/*
922	 * The only accepted pid value here is currently -1, since we just kick off the freeze thread
923	 * here - individual ids aren't required. However, it's intended that that this call is to change
924	 * in the future to initiate freeze of individual processes. In anticipation, we'll obtain the
925	 * process handle for potentially valid values and call task_for_pid_posix_check(); this way, everything
926	 * is validated correctly and set for further refactoring. See <rdar://problem/7839708> for more details.
927	 */
928	if (pid >= 0) {
929		targetproc = proc_find(pid);
930		if (targetproc == PROC_NULL) {
931			error = ESRCH;
932			goto out;
933		}
934
935		if (!task_for_pid_posix_check(targetproc)) {
936			error = EPERM;
937			goto out;
938		}
939	}
940
941	if (pid == -1) {
942		memorystatus_on_inactivity(pid);
943	} else {
944		error = EPERM;
945	}
946
947out:
948
949#endif /* CONFIG_FREEZE */
950
951	if (targetproc != PROC_NULL)
952		proc_rele(targetproc);
953	*ret = error;
954	return error;
955}
956
957int
958pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret)
959{
960	int 				error = 0;
961	proc_t				targetproc = PROC_NULL;
962	struct filedesc		*fdp;
963	struct fileproc		*fp;
964	int 				pid = args->pid;
965	int					level = args->level;
966	int					i;
967
968	if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
969		level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL)
970	{
971		error = EINVAL;
972		goto out;
973	}
974
975#if CONFIG_MACF
976	error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SHUTDOWN_SOCKETS);
977	if (error) {
978		error = EPERM;
979		goto out;
980	}
981#endif
982
983	targetproc = proc_find(pid);
984	if (targetproc == PROC_NULL) {
985		error = ESRCH;
986		goto out;
987	}
988
989	if (!task_for_pid_posix_check(targetproc)) {
990		error = EPERM;
991		goto out;
992	}
993
994	proc_fdlock(targetproc);
995	fdp = targetproc->p_fd;
996
997	for (i = 0; i < fdp->fd_nfiles; i++) {
998		struct socket *sockp;
999
1000		fp = fdp->fd_ofiles[i];
1001		if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
1002			fp->f_fglob->fg_type != DTYPE_SOCKET)
1003		{
1004			continue;
1005		}
1006
1007		sockp = (struct socket *)fp->f_fglob->fg_data;
1008
1009		/* Call networking stack with socket and level */
1010		(void) socket_defunct(targetproc, sockp, level);
1011	}
1012
1013	proc_fdunlock(targetproc);
1014
1015out:
1016	if (targetproc != PROC_NULL)
1017		proc_rele(targetproc);
1018	*ret = error;
1019	return error;
1020}
1021#endif /* CONFIG_EMBEDDED */
1022
1023static int
1024sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
1025    __unused int arg2, struct sysctl_req *req)
1026{
1027    int error = 0;
1028	int new_value;
1029
1030    error = SYSCTL_OUT(req, arg1, sizeof(int));
1031    if (error || req->newptr == USER_ADDR_NULL)
1032        return(error);
1033
1034	if (!is_suser())
1035		return(EPERM);
1036
1037	if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
1038		goto out;
1039	}
1040	if ((new_value == KERN_TFP_POLICY_DENY)
1041		|| (new_value == KERN_TFP_POLICY_DEFAULT))
1042			tfp_policy = new_value;
1043	else
1044			error = EINVAL;
1045out:
1046    return(error);
1047
1048}
1049
1050#if defined(SECURE_KERNEL)
1051static int kern_secure_kernel = 1;
1052#else
1053static int kern_secure_kernel = 0;
1054#endif
1055
1056SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
1057
1058SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
1059SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1060    &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
1061
1062SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
1063	   &shared_region_trace_level, 0, "");
1064SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
1065	   &shared_region_version, 0, "");
1066SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
1067	   &shared_region_persistence, 0, "");
1068
1069/*
1070 * shared_region_check_np:
1071 *
1072 * This system call is intended for dyld.
1073 *
1074 * dyld calls this when any process starts to see if the process's shared
1075 * region is already set up and ready to use.
1076 * This call returns the base address of the first mapping in the
1077 * process's shared region's first mapping.
1078 * dyld will then check what's mapped at that address.
1079 *
1080 * If the shared region is empty, dyld will then attempt to map the shared
1081 * cache file in the shared region via the shared_region_map_np() system call.
1082 *
1083 * If something's already mapped in the shared region, dyld will check if it
1084 * matches the shared cache it would like to use for that process.
1085 * If it matches, evrything's ready and the process can proceed and use the
1086 * shared region.
1087 * If it doesn't match, dyld will unmap the shared region and map the shared
1088 * cache into the process's address space via mmap().
1089 *
1090 * ERROR VALUES
1091 * EINVAL	no shared region
1092 * ENOMEM	shared region is empty
1093 * EFAULT	bad address for "start_address"
1094 */
1095int
1096shared_region_check_np(
1097	__unused struct proc			*p,
1098	struct shared_region_check_np_args	*uap,
1099	__unused int				*retvalp)
1100{
1101	vm_shared_region_t	shared_region;
1102	mach_vm_offset_t	start_address = 0;
1103	int			error;
1104	kern_return_t		kr;
1105
1106	SHARED_REGION_TRACE_DEBUG(
1107		("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
1108		 current_thread(), p->p_pid, p->p_comm,
1109		 (uint64_t)uap->start_address));
1110
1111	/* retrieve the current tasks's shared region */
1112	shared_region = vm_shared_region_get(current_task());
1113	if (shared_region != NULL) {
1114		/* retrieve address of its first mapping... */
1115		kr = vm_shared_region_start_address(shared_region,
1116						    &start_address);
1117		if (kr != KERN_SUCCESS) {
1118			error = ENOMEM;
1119		} else {
1120			/* ... and give it to the caller */
1121			error = copyout(&start_address,
1122					(user_addr_t) uap->start_address,
1123					sizeof (start_address));
1124			if (error) {
1125				SHARED_REGION_TRACE_ERROR(
1126					("shared_region: %p [%d(%s)] "
1127					 "check_np(0x%llx) "
1128					 "copyout(0x%llx) error %d\n",
1129					 current_thread(), p->p_pid, p->p_comm,
1130					 (uint64_t)uap->start_address, (uint64_t)start_address,
1131					 error));
1132			}
1133		}
1134		vm_shared_region_deallocate(shared_region);
1135	} else {
1136		/* no shared region ! */
1137		error = EINVAL;
1138	}
1139
1140	SHARED_REGION_TRACE_DEBUG(
1141		("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
1142		 current_thread(), p->p_pid, p->p_comm,
1143		 (uint64_t)uap->start_address, (uint64_t)start_address, error));
1144
1145	return error;
1146}
1147
1148
1149int
1150shared_region_copyin_mappings(
1151		struct proc			*p,
1152		user_addr_t			user_mappings,
1153		unsigned int			mappings_count,
1154		struct shared_file_mapping_np	*mappings)
1155{
1156	int		error = 0;
1157	vm_size_t	mappings_size = 0;
1158
1159	/* get the list of mappings the caller wants us to establish */
1160	mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
1161	error = copyin(user_mappings,
1162		       mappings,
1163		       mappings_size);
1164	if (error) {
1165		SHARED_REGION_TRACE_ERROR(
1166			("shared_region: %p [%d(%s)] map(): "
1167			 "copyin(0x%llx, %d) failed (error=%d)\n",
1168			 current_thread(), p->p_pid, p->p_comm,
1169			 (uint64_t)user_mappings, mappings_count, error));
1170	}
1171	return error;
1172}
1173/*
1174 * shared_region_map_np()
1175 *
1176 * This system call is intended for dyld.
1177 *
1178 * dyld uses this to map a shared cache file into a shared region.
1179 * This is usually done only the first time a shared cache is needed.
1180 * Subsequent processes will just use the populated shared region without
1181 * requiring any further setup.
1182 */
1183int
1184_shared_region_map(
1185	struct proc				*p,
1186	int					fd,
1187	uint32_t				mappings_count,
1188	struct shared_file_mapping_np		*mappings,
1189	memory_object_control_t			*sr_file_control,
1190	struct shared_file_mapping_np		*mapping_to_slide)
1191{
1192	int				error;
1193	kern_return_t			kr;
1194	struct fileproc			*fp;
1195	struct vnode			*vp, *root_vp;
1196	struct vnode_attr		va;
1197	off_t				fs;
1198	memory_object_size_t		file_size;
1199	vm_prot_t			maxprot = VM_PROT_ALL;
1200	memory_object_control_t		file_control;
1201	struct vm_shared_region		*shared_region;
1202
1203	SHARED_REGION_TRACE_DEBUG(
1204		("shared_region: %p [%d(%s)] -> map\n",
1205		 current_thread(), p->p_pid, p->p_comm));
1206
1207	shared_region = NULL;
1208	fp = NULL;
1209	vp = NULL;
1210
1211	/* get file structure from file descriptor */
1212	error = fp_lookup(p, fd, &fp, 0);
1213	if (error) {
1214		SHARED_REGION_TRACE_ERROR(
1215			("shared_region: %p [%d(%s)] map: "
1216			 "fd=%d lookup failed (error=%d)\n",
1217			 current_thread(), p->p_pid, p->p_comm, fd, error));
1218		goto done;
1219	}
1220
1221	/* make sure we're attempting to map a vnode */
1222	if (fp->f_fglob->fg_type != DTYPE_VNODE) {
1223		SHARED_REGION_TRACE_ERROR(
1224			("shared_region: %p [%d(%s)] map: "
1225			 "fd=%d not a vnode (type=%d)\n",
1226			 current_thread(), p->p_pid, p->p_comm,
1227			 fd, fp->f_fglob->fg_type));
1228		error = EINVAL;
1229		goto done;
1230	}
1231
1232	/* we need at least read permission on the file */
1233	if (! (fp->f_fglob->fg_flag & FREAD)) {
1234		SHARED_REGION_TRACE_ERROR(
1235			("shared_region: %p [%d(%s)] map: "
1236			 "fd=%d not readable\n",
1237			 current_thread(), p->p_pid, p->p_comm, fd));
1238		error = EPERM;
1239		goto done;
1240	}
1241
1242	/* get vnode from file structure */
1243	error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
1244	if (error) {
1245		SHARED_REGION_TRACE_ERROR(
1246			("shared_region: %p [%d(%s)] map: "
1247			 "fd=%d getwithref failed (error=%d)\n",
1248			 current_thread(), p->p_pid, p->p_comm, fd, error));
1249		goto done;
1250	}
1251	vp = (struct vnode *) fp->f_fglob->fg_data;
1252
1253	/* make sure the vnode is a regular file */
1254	if (vp->v_type != VREG) {
1255		SHARED_REGION_TRACE_ERROR(
1256			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1257			 "not a file (type=%d)\n",
1258			 current_thread(), p->p_pid, p->p_comm,
1259			 vp, vp->v_name, vp->v_type));
1260		error = EINVAL;
1261		goto done;
1262	}
1263
1264#if CONFIG_MACF
1265	error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
1266			fp->f_fglob, VM_PROT_ALL, MAP_FILE, &maxprot);
1267	if (error) {
1268		goto done;
1269	}
1270#endif /* MAC */
1271
1272	/* make sure vnode is on the process's root volume */
1273	root_vp = p->p_fd->fd_rdir;
1274	if (root_vp == NULL) {
1275		root_vp = rootvnode;
1276	} else {
1277		/*
1278		 * Chroot-ed processes can't use the shared_region.
1279		 */
1280		error = EINVAL;
1281		goto done;
1282	}
1283
1284	if (vp->v_mount != root_vp->v_mount) {
1285		SHARED_REGION_TRACE_ERROR(
1286			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1287			 "not on process's root volume\n",
1288			 current_thread(), p->p_pid, p->p_comm,
1289			 vp, vp->v_name));
1290		error = EPERM;
1291		goto done;
1292	}
1293
1294	/* make sure vnode is owned by "root" */
1295	VATTR_INIT(&va);
1296	VATTR_WANTED(&va, va_uid);
1297	error = vnode_getattr(vp, &va, vfs_context_current());
1298	if (error) {
1299		SHARED_REGION_TRACE_ERROR(
1300			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1301			 "vnode_getattr(%p) failed (error=%d)\n",
1302			 current_thread(), p->p_pid, p->p_comm,
1303			 vp, vp->v_name, vp, error));
1304		goto done;
1305	}
1306	if (va.va_uid != 0) {
1307		SHARED_REGION_TRACE_ERROR(
1308			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1309			 "owned by uid=%d instead of 0\n",
1310			 current_thread(), p->p_pid, p->p_comm,
1311			 vp, vp->v_name, va.va_uid));
1312		error = EPERM;
1313		goto done;
1314	}
1315
1316	/* get vnode size */
1317	error = vnode_size(vp, &fs, vfs_context_current());
1318	if (error) {
1319		SHARED_REGION_TRACE_ERROR(
1320			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1321			 "vnode_size(%p) failed (error=%d)\n",
1322			 current_thread(), p->p_pid, p->p_comm,
1323			 vp, vp->v_name, vp, error));
1324		goto done;
1325	}
1326	file_size = fs;
1327
1328	/* get the file's memory object handle */
1329	file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
1330	if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
1331		SHARED_REGION_TRACE_ERROR(
1332			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1333			 "no memory object\n",
1334			 current_thread(), p->p_pid, p->p_comm,
1335			 vp, vp->v_name));
1336		error = EINVAL;
1337		goto done;
1338	}
1339
1340	if (sr_file_control != NULL) {
1341		*sr_file_control = file_control;
1342	}
1343
1344
1345
1346	/* get the process's shared region (setup in vm_map_exec()) */
1347	shared_region = vm_shared_region_get(current_task());
1348	if (shared_region == NULL) {
1349		SHARED_REGION_TRACE_ERROR(
1350			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1351			 "no shared region\n",
1352			 current_thread(), p->p_pid, p->p_comm,
1353			 vp, vp->v_name));
1354		goto done;
1355	}
1356
1357	/* map the file into that shared region's submap */
1358	kr = vm_shared_region_map_file(shared_region,
1359				       mappings_count,
1360				       mappings,
1361				       file_control,
1362				       file_size,
1363				       (void *) p->p_fd->fd_rdir,
1364				       mapping_to_slide);
1365	if (kr != KERN_SUCCESS) {
1366		SHARED_REGION_TRACE_ERROR(
1367			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1368			 "vm_shared_region_map_file() failed kr=0x%x\n",
1369			 current_thread(), p->p_pid, p->p_comm,
1370			 vp, vp->v_name, kr));
1371		switch (kr) {
1372		case KERN_INVALID_ADDRESS:
1373			error = EFAULT;
1374			break;
1375		case KERN_PROTECTION_FAILURE:
1376			error = EPERM;
1377			break;
1378		case KERN_NO_SPACE:
1379			error = ENOMEM;
1380			break;
1381		case KERN_FAILURE:
1382		case KERN_INVALID_ARGUMENT:
1383		default:
1384			error = EINVAL;
1385			break;
1386		}
1387		goto done;
1388	}
1389
1390	error = 0;
1391
1392	vnode_lock_spin(vp);
1393
1394	vp->v_flag |= VSHARED_DYLD;
1395
1396	vnode_unlock(vp);
1397
1398	/* update the vnode's access time */
1399	if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
1400		VATTR_INIT(&va);
1401		nanotime(&va.va_access_time);
1402		VATTR_SET_ACTIVE(&va, va_access_time);
1403		vnode_setattr(vp, &va, vfs_context_current());
1404	}
1405
1406	if (p->p_flag & P_NOSHLIB) {
1407		/* signal that this process is now using split libraries */
1408		OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
1409	}
1410
1411done:
1412	if (vp != NULL) {
1413		/*
1414		 * release the vnode...
1415		 * ubc_map() still holds it for us in the non-error case
1416		 */
1417		(void) vnode_put(vp);
1418		vp = NULL;
1419	}
1420	if (fp != NULL) {
1421		/* release the file descriptor */
1422		fp_drop(p, fd, fp, 0);
1423		fp = NULL;
1424	}
1425
1426	if (shared_region != NULL) {
1427		vm_shared_region_deallocate(shared_region);
1428	}
1429
1430	SHARED_REGION_TRACE_DEBUG(
1431		("shared_region: %p [%d(%s)] <- map\n",
1432		 current_thread(), p->p_pid, p->p_comm));
1433
1434	return error;
1435}
1436
1437int
1438_shared_region_slide(uint32_t slide,
1439			mach_vm_offset_t	entry_start_address,
1440			mach_vm_size_t		entry_size,
1441			mach_vm_offset_t	slide_start,
1442			mach_vm_size_t		slide_size,
1443			memory_object_control_t	sr_file_control)
1444{
1445	void *slide_info_entry = NULL;
1446	int			error;
1447
1448	if((error = vm_shared_region_slide_init(slide_size, entry_start_address, entry_size, slide, sr_file_control))) {
1449		printf("slide_info initialization failed with kr=%d\n", error);
1450		goto done;
1451	}
1452
1453	slide_info_entry = vm_shared_region_get_slide_info_entry();
1454	if (slide_info_entry == NULL){
1455		error = EFAULT;
1456	} else {
1457		error = copyin((user_addr_t)slide_start,
1458			       slide_info_entry,
1459			       (vm_size_t)slide_size);
1460	}
1461	if (error) {
1462		goto done;
1463	}
1464
1465	if (vm_shared_region_slide_sanity_check() != KERN_SUCCESS) {
1466 		error = EFAULT;
1467 		printf("Sanity Check failed for slide_info\n");
1468 	} else {
1469#if DEBUG
1470		printf("Succesfully init slide_info with start_address: %p region_size: %ld slide_header_size: %ld\n",
1471 				(void*)(uintptr_t)entry_start_address,
1472 				(unsigned long)entry_size,
1473 				(unsigned long)slide_size);
1474#endif
1475	}
1476done:
1477	return error;
1478}
1479
1480int
1481shared_region_map_and_slide_np(
1482	struct proc				*p,
1483	struct shared_region_map_and_slide_np_args	*uap,
1484	__unused int					*retvalp)
1485{
1486	struct shared_file_mapping_np	mapping_to_slide;
1487	struct shared_file_mapping_np	*mappings;
1488	unsigned int mappings_count = uap->count;
1489
1490	memory_object_control_t		sr_file_control;
1491	kern_return_t			kr = KERN_SUCCESS;
1492	uint32_t			slide = uap->slide;
1493
1494#define SFM_MAX_STACK	8
1495	struct shared_file_mapping_np	stack_mappings[SFM_MAX_STACK];
1496
1497	/* Is the process chrooted?? */
1498	if (p->p_fd->fd_rdir != NULL) {
1499		kr = EINVAL;
1500		goto done;
1501	}
1502
1503	if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) {
1504		if (kr == KERN_INVALID_ARGUMENT) {
1505			/*
1506			 * This will happen if we request sliding again
1507			 * with the same slide value that was used earlier
1508			 * for the very first sliding.
1509			 */
1510			kr = KERN_SUCCESS;
1511		}
1512		goto done;
1513	}
1514
1515	if (mappings_count == 0) {
1516		SHARED_REGION_TRACE_INFO(
1517			("shared_region: %p [%d(%s)] map(): "
1518			 "no mappings\n",
1519			 current_thread(), p->p_pid, p->p_comm));
1520		kr = 0;	/* no mappings: we're done ! */
1521		goto done;
1522	} else if (mappings_count <= SFM_MAX_STACK) {
1523		mappings = &stack_mappings[0];
1524	} else {
1525		SHARED_REGION_TRACE_ERROR(
1526			("shared_region: %p [%d(%s)] map(): "
1527			 "too many mappings (%d)\n",
1528			 current_thread(), p->p_pid, p->p_comm,
1529			 mappings_count));
1530		kr = KERN_FAILURE;
1531		goto done;
1532	}
1533
1534	if ( (kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) {
1535		goto done;
1536	}
1537
1538
1539	kr = _shared_region_map(p, uap->fd, mappings_count, mappings, &sr_file_control, &mapping_to_slide);
1540	if (kr != KERN_SUCCESS) {
1541		return kr;
1542	}
1543
1544	if (slide) {
1545		kr = _shared_region_slide(slide,
1546				mapping_to_slide.sfm_file_offset,
1547				mapping_to_slide.sfm_size,
1548				uap->slide_start,
1549				uap->slide_size,
1550				sr_file_control);
1551		if (kr  != KERN_SUCCESS) {
1552			vm_shared_region_undo_mappings(NULL, 0, mappings, mappings_count);
1553			return kr;
1554		}
1555	}
1556done:
1557	return kr;
1558}
1559
1560/* sysctl overflow room */
1561
1562/* vm_page_free_target is provided as a makeshift solution for applications that want to
1563	allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1564	reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1565extern unsigned int	vm_page_free_target;
1566SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
1567		   &vm_page_free_target, 0, "Pageout daemon free target");
1568
1569extern unsigned int	vm_memory_pressure;
1570SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
1571	   &vm_memory_pressure, 0, "Memory pressure indicator");
1572
1573static int
1574vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
1575{
1576#pragma unused(oidp, arg1, arg2)
1577	unsigned int page_free_wanted;
1578
1579	page_free_wanted = mach_vm_ctl_page_free_wanted();
1580	return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted));
1581}
1582SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
1583	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
1584	    0, 0, vm_ctl_page_free_wanted, "I", "");
1585
1586extern unsigned int	vm_page_purgeable_count;
1587SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1588	   &vm_page_purgeable_count, 0, "Purgeable page count");
1589
1590extern unsigned int	vm_page_purgeable_wired_count;
1591SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1592	   &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
1593
1594SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1595	   &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
1596SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1597	   &vm_page_stats_reusable.reusable_pages_success, "");
1598SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1599	   &vm_page_stats_reusable.reusable_pages_failure, "");
1600SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
1601	   &vm_page_stats_reusable.reusable_pages_shared, "");
1602SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1603	   &vm_page_stats_reusable.all_reusable_calls, "");
1604SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1605	   &vm_page_stats_reusable.partial_reusable_calls, "");
1606SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1607	   &vm_page_stats_reusable.reuse_pages_success, "");
1608SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1609	   &vm_page_stats_reusable.reuse_pages_failure, "");
1610SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1611	   &vm_page_stats_reusable.all_reuse_calls, "");
1612SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1613	   &vm_page_stats_reusable.partial_reuse_calls, "");
1614SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1615	   &vm_page_stats_reusable.can_reuse_success, "");
1616SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1617	   &vm_page_stats_reusable.can_reuse_failure, "");
1618
1619
1620extern unsigned int vm_page_free_count, vm_page_speculative_count;
1621SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
1622SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
1623
1624extern unsigned int vm_page_cleaned_count;
1625SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
1626
1627/* pageout counts */
1628extern unsigned int vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external, vm_pageout_inactive_clean, vm_pageout_speculative_clean, vm_pageout_inactive_used;
1629extern unsigned int vm_pageout_freed_from_inactive_clean, vm_pageout_freed_from_speculative;
1630SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_internal, 0, "");
1631SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_external, 0, "");
1632SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_clean, 0, "");
1633SYSCTL_UINT(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_speculative_clean, 0, "");
1634SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_used, 0, "");
1635SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_inactive_clean, 0, "");
1636SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_speculative, 0, "");
1637
1638extern unsigned int vm_pageout_freed_from_cleaned;
1639SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_cleaned, 0, "");
1640
1641/* counts of pages entering the cleaned queue */
1642extern unsigned int vm_pageout_enqueued_cleaned, vm_pageout_enqueued_cleaned_from_inactive_clean, vm_pageout_enqueued_cleaned_from_inactive_dirty;
1643SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
1644SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_clean, 0, "");
1645SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_dirty, 0, "");
1646
1647/* counts of pages leaving the cleaned queue */
1648extern unsigned int vm_pageout_cleaned_reclaimed, vm_pageout_cleaned_reactivated, vm_pageout_cleaned_reference_reactivated, vm_pageout_cleaned_volatile_reactivated, vm_pageout_cleaned_fault_reactivated, vm_pageout_cleaned_commit_reactivated, vm_pageout_cleaned_busy, vm_pageout_cleaned_nolock;
1649SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reclaimed, 0, "Cleaned pages reclaimed");
1650SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
1651SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
1652SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
1653SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
1654SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_commit_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_commit_reactivated, 0, "Cleaned pages commit reactivated");
1655SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
1656SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
1657
1658#include <kern/thread.h>
1659#include <sys/user.h>
1660
1661void vm_pageout_io_throttle(void);
1662
1663void vm_pageout_io_throttle(void) {
1664	struct uthread *uthread = get_bsdthread_info(current_thread());
1665
1666               /*
1667                * thread is marked as a low priority I/O type
1668                * and the I/O we issued while in this cleaning operation
1669                * collided with normal I/O operations... we'll
1670                * delay in order to mitigate the impact of this
1671                * task on the normal operation of the system
1672                */
1673
1674	if (uthread->uu_lowpri_window) {
1675		throttle_lowpri_io(TRUE);
1676	}
1677
1678}
1679
1680int
1681vm_pressure_monitor(
1682	__unused struct proc *p,
1683	struct vm_pressure_monitor_args *uap,
1684	int *retval)
1685{
1686	kern_return_t	kr;
1687	uint32_t	pages_reclaimed;
1688	uint32_t	pages_wanted;
1689
1690	kr = mach_vm_pressure_monitor(
1691		(boolean_t) uap->wait_for_pressure,
1692		uap->nsecs_monitored,
1693		(uap->pages_reclaimed) ? &pages_reclaimed : NULL,
1694		&pages_wanted);
1695
1696	switch (kr) {
1697	case KERN_SUCCESS:
1698		break;
1699	case KERN_ABORTED:
1700		return EINTR;
1701	default:
1702		return EINVAL;
1703	}
1704
1705	if (uap->pages_reclaimed) {
1706		if (copyout((void *)&pages_reclaimed,
1707			    uap->pages_reclaimed,
1708			    sizeof (pages_reclaimed)) != 0) {
1709			return EFAULT;
1710		}
1711	}
1712
1713	*retval = (int) pages_wanted;
1714	return 0;
1715}
1716
1717int
1718kas_info(struct proc *p,
1719			  struct kas_info_args *uap,
1720			  int *retval __unused)
1721{
1722#ifdef SECURE_KERNEL
1723	(void)p;
1724	(void)uap;
1725	return ENOTSUP;
1726#else /* !SECURE_KERNEL */
1727	int			selector = uap->selector;
1728	user_addr_t	valuep = uap->value;
1729	user_addr_t	sizep = uap->size;
1730	user_size_t size;
1731	int			error;
1732
1733	if (!kauth_cred_issuser(kauth_cred_get())) {
1734		return EPERM;
1735	}
1736
1737#if CONFIG_MACF
1738	error = mac_system_check_kas_info(kauth_cred_get(), selector);
1739	if (error) {
1740		return error;
1741	}
1742#endif
1743
1744	if (IS_64BIT_PROCESS(p)) {
1745		user64_size_t size64;
1746		error = copyin(sizep, &size64, sizeof(size64));
1747		size = (user_size_t)size64;
1748	} else {
1749		user32_size_t size32;
1750		error = copyin(sizep, &size32, sizeof(size32));
1751		size = (user_size_t)size32;
1752	}
1753	if (error) {
1754		return error;
1755	}
1756
1757	switch (selector) {
1758		case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
1759			{
1760				uint64_t slide = vm_kernel_slide;
1761
1762				if (sizeof(slide) != size) {
1763					return EINVAL;
1764				}
1765
1766				if (IS_64BIT_PROCESS(p)) {
1767					user64_size_t size64 = (user64_size_t)size;
1768					error = copyout(&size64, sizep, sizeof(size64));
1769				} else {
1770					user32_size_t size32 = (user32_size_t)size;
1771					error = copyout(&size32, sizep, sizeof(size32));
1772				}
1773				if (error) {
1774					return error;
1775				}
1776
1777				error = copyout(&slide, valuep, sizeof(slide));
1778				if (error) {
1779					return error;
1780				}
1781			}
1782			break;
1783		default:
1784			return EINVAL;
1785	}
1786
1787	return 0;
1788#endif /* !SECURE_KERNEL */
1789}
1790