1/*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved.  The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34/*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections.  This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40
41#include <meta_features.h>
42
43#include <vm/vm_options.h>
44
45#include <kern/task.h>
46#include <kern/thread.h>
47#include <kern/debug.h>
48#include <kern/extmod_statistics.h>
49#include <mach/mach_traps.h>
50#include <mach/port.h>
51#include <mach/task.h>
52#include <mach/task_access.h>
53#include <mach/task_special_ports.h>
54#include <mach/time_value.h>
55#include <mach/vm_map.h>
56#include <mach/vm_param.h>
57#include <mach/vm_prot.h>
58
59#include <sys/file_internal.h>
60#include <sys/param.h>
61#include <sys/systm.h>
62#include <sys/dir.h>
63#include <sys/namei.h>
64#include <sys/proc_internal.h>
65#include <sys/kauth.h>
66#include <sys/vm.h>
67#include <sys/file.h>
68#include <sys/vnode_internal.h>
69#include <sys/mount.h>
70#include <sys/trace.h>
71#include <sys/kernel.h>
72#include <sys/ubc_internal.h>
73#include <sys/user.h>
74#include <sys/syslog.h>
75#include <sys/stat.h>
76#include <sys/sysproto.h>
77#include <sys/mman.h>
78#include <sys/sysctl.h>
79#include <sys/cprotect.h>
80#include <sys/kpi_socket.h>
81#include <sys/kas_info.h>
82#include <sys/socket.h>
83#include <sys/socketvar.h>
84
85#include <security/audit/audit.h>
86#include <security/mac.h>
87#include <bsm/audit_kevents.h>
88
89#include <kern/kalloc.h>
90#include <vm/vm_map.h>
91#include <vm/vm_kern.h>
92#include <vm/vm_pageout.h>
93
94#include <machine/spl.h>
95
96#include <mach/shared_region.h>
97#include <vm/vm_shared_region.h>
98
99#include <vm/vm_protos.h>
100
101#include <sys/kern_memorystatus.h>
102
103
104int _shared_region_map_and_slide(struct proc*, int, unsigned int, struct shared_file_mapping_np*, uint32_t, user_addr_t, user_addr_t);
105int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *);
106
107SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
108SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
109SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
110SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
111SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
112SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
113SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
114SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
115SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
116#if VM_SCAN_FOR_SHADOW_CHAIN
117static int vm_shadow_max_enabled = 0;    /* Disabled by default */
118extern int proc_shadow_max(void);
119static int
120vm_shadow_max SYSCTL_HANDLER_ARGS
121{
122#pragma unused(arg1, arg2, oidp)
123	int value = 0;
124
125	if (vm_shadow_max_enabled)
126		value = proc_shadow_max();
127
128	return SYSCTL_OUT(req, &value, sizeof(value));
129}
130SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED,
131    0, 0, &vm_shadow_max, "I", "");
132
133SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
134
135#endif /* VM_SCAN_FOR_SHADOW_CHAIN */
136
137SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
138
139__attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
140	mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid);
141/*
142 * Sysctl's related to data/stack execution.  See osfmk/vm/vm_map.c
143 */
144
145#ifndef SECURE_KERNEL
146extern int allow_stack_exec, allow_data_exec;
147
148SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
149SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
150#endif /* !SECURE_KERNEL */
151
152static const char *prot_values[] = {
153	"none",
154	"read-only",
155	"write-only",
156	"read-write",
157	"execute-only",
158	"read-execute",
159	"write-execute",
160	"read-write-execute"
161};
162
163void
164log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
165{
166	printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
167		current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
168}
169
170int shared_region_unnest_logging = 1;
171
172SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
173	   &shared_region_unnest_logging, 0, "");
174
175int vm_shared_region_unnest_log_interval = 10;
176int shared_region_unnest_log_count_threshold = 5;
177
178/*
179 * Shared cache path enforcement.
180 */
181
182static int scdir_enforce = 1;
183static char scdir_path[] = "/var/db/dyld/";
184
185#ifndef SECURE_KERNEL
186SYSCTL_INT(_vm, OID_AUTO, enforce_shared_cache_dir, CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, "");
187#endif
188
189/* These log rate throttling state variables aren't thread safe, but
190 * are sufficient unto the task.
191 */
192static int64_t last_unnest_log_time = 0;
193static int shared_region_unnest_log_count = 0;
194
195void log_unnest_badness(vm_map_t m, vm_map_offset_t s, vm_map_offset_t e) {
196	struct timeval tv;
197	const char *pcommstr;
198
199	if (shared_region_unnest_logging == 0)
200		return;
201
202	if (shared_region_unnest_logging == 1) {
203		microtime(&tv);
204		if ((tv.tv_sec - last_unnest_log_time) < vm_shared_region_unnest_log_interval) {
205			if (shared_region_unnest_log_count++ > shared_region_unnest_log_count_threshold)
206				return;
207		}
208		else {
209			last_unnest_log_time = tv.tv_sec;
210			shared_region_unnest_log_count = 0;
211		}
212	}
213
214	pcommstr = current_proc()->p_comm;
215
216	printf("%s (map: %p) triggered DYLD shared region unnest for map: %p, region 0x%qx->0x%qx. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, get_task_map(current_proc()->task), m, (uint64_t)s, (uint64_t)e);
217}
218
219int
220useracc(
221	user_addr_t	addr,
222	user_size_t	len,
223	int	prot)
224{
225	vm_map_t	map;
226
227	map = current_map();
228	return (vm_map_check_protection(
229			map,
230			vm_map_trunc_page(addr,
231					  vm_map_page_mask(map)),
232			vm_map_round_page(addr+len,
233					  vm_map_page_mask(map)),
234			prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
235}
236
237int
238vslock(
239	user_addr_t	addr,
240	user_size_t	len)
241{
242	kern_return_t	kret;
243	vm_map_t	map;
244
245	map = current_map();
246	kret = vm_map_wire(map,
247			   vm_map_trunc_page(addr,
248					     vm_map_page_mask(map)),
249			   vm_map_round_page(addr+len,
250					     vm_map_page_mask(map)),
251			   VM_PROT_READ | VM_PROT_WRITE,
252			   FALSE);
253
254	switch (kret) {
255	case KERN_SUCCESS:
256		return (0);
257	case KERN_INVALID_ADDRESS:
258	case KERN_NO_SPACE:
259		return (ENOMEM);
260	case KERN_PROTECTION_FAILURE:
261		return (EACCES);
262	default:
263		return (EINVAL);
264	}
265}
266
267int
268vsunlock(
269	user_addr_t addr,
270	user_size_t len,
271	__unused int dirtied)
272{
273#if FIXME  /* [ */
274	pmap_t		pmap;
275	vm_page_t	pg;
276	vm_map_offset_t	vaddr;
277	ppnum_t		paddr;
278#endif  /* FIXME ] */
279	kern_return_t	kret;
280	vm_map_t	map;
281
282	map = current_map();
283
284#if FIXME  /* [ */
285	if (dirtied) {
286		pmap = get_task_pmap(current_task());
287		for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
288		     vaddr < vm_map_round_page(addr+len, PAGE_MASK);
289		     vaddr += PAGE_SIZE) {
290			paddr = pmap_extract(pmap, vaddr);
291			pg = PHYS_TO_VM_PAGE(paddr);
292			vm_page_set_modified(pg);
293		}
294	}
295#endif  /* FIXME ] */
296#ifdef	lint
297	dirtied++;
298#endif	/* lint */
299	kret = vm_map_unwire(map,
300			     vm_map_trunc_page(addr,
301					       vm_map_page_mask(map)),
302			     vm_map_round_page(addr+len,
303					       vm_map_page_mask(map)),
304			     FALSE);
305	switch (kret) {
306	case KERN_SUCCESS:
307		return (0);
308	case KERN_INVALID_ADDRESS:
309	case KERN_NO_SPACE:
310		return (ENOMEM);
311	case KERN_PROTECTION_FAILURE:
312		return (EACCES);
313	default:
314		return (EINVAL);
315	}
316}
317
318int
319subyte(
320	user_addr_t addr,
321	int byte)
322{
323	char character;
324
325	character = (char)byte;
326	return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
327}
328
329int
330suibyte(
331	user_addr_t addr,
332	int byte)
333{
334	char character;
335
336	character = (char)byte;
337	return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
338}
339
340int fubyte(user_addr_t addr)
341{
342	unsigned char byte;
343
344	if (copyin(addr, (void *) &byte, sizeof(char)))
345		return(-1);
346	return(byte);
347}
348
349int fuibyte(user_addr_t addr)
350{
351	unsigned char byte;
352
353	if (copyin(addr, (void *) &(byte), sizeof(char)))
354		return(-1);
355	return(byte);
356}
357
358int
359suword(
360	user_addr_t addr,
361	long word)
362{
363	return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
364}
365
366long fuword(user_addr_t addr)
367{
368	long word = 0;
369
370	if (copyin(addr, (void *) &word, sizeof(int)))
371		return(-1);
372	return(word);
373}
374
375/* suiword and fuiword are the same as suword and fuword, respectively */
376
377int
378suiword(
379	user_addr_t addr,
380	long word)
381{
382	return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
383}
384
385long fuiword(user_addr_t addr)
386{
387	long word = 0;
388
389	if (copyin(addr, (void *) &word, sizeof(int)))
390		return(-1);
391	return(word);
392}
393
394/*
395 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
396 * fetching and setting of process-sized size_t and pointer values.
397 */
398int
399sulong(user_addr_t addr, int64_t word)
400{
401
402	if (IS_64BIT_PROCESS(current_proc())) {
403		return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
404	} else {
405		return(suiword(addr, (long)word));
406	}
407}
408
409int64_t
410fulong(user_addr_t addr)
411{
412	int64_t longword;
413
414	if (IS_64BIT_PROCESS(current_proc())) {
415		if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
416			return(-1);
417		return(longword);
418	} else {
419		return((int64_t)fuiword(addr));
420	}
421}
422
423int
424suulong(user_addr_t addr, uint64_t uword)
425{
426
427	if (IS_64BIT_PROCESS(current_proc())) {
428		return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
429	} else {
430		return(suiword(addr, (uint32_t)uword));
431	}
432}
433
434uint64_t
435fuulong(user_addr_t addr)
436{
437	uint64_t ulongword;
438
439	if (IS_64BIT_PROCESS(current_proc())) {
440		if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
441			return(-1ULL);
442		return(ulongword);
443	} else {
444		return((uint64_t)fuiword(addr));
445	}
446}
447
448int
449swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
450{
451	return(ENOTSUP);
452}
453
454/*
455 * pid_for_task
456 *
457 * Find the BSD process ID for the Mach task associated with the given Mach port
458 * name
459 *
460 * Parameters:	args		User argument descriptor (see below)
461 *
462 * Indirect parameters:	args->t		Mach port name
463 * 			args->pid	Process ID (returned value; see below)
464 *
465 * Returns:	KERL_SUCCESS	Success
466 * 		KERN_FAILURE	Not success
467 *
468 * Implicit returns: args->pid		Process ID
469 *
470 */
471kern_return_t
472pid_for_task(
473	struct pid_for_task_args *args)
474{
475	mach_port_name_t	t = args->t;
476	user_addr_t		pid_addr  = args->pid;
477	proc_t p;
478	task_t		t1;
479	int	pid = -1;
480	kern_return_t	err = KERN_SUCCESS;
481
482	AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
483	AUDIT_ARG(mach_port1, t);
484
485	t1 = port_name_to_task(t);
486
487	if (t1 == TASK_NULL) {
488		err = KERN_FAILURE;
489		goto pftout;
490	} else {
491		p = get_bsdtask_info(t1);
492		if (p) {
493			pid  = proc_pid(p);
494			err = KERN_SUCCESS;
495		} else {
496			err = KERN_FAILURE;
497		}
498	}
499	task_deallocate(t1);
500pftout:
501	AUDIT_ARG(pid, pid);
502	(void) copyout((char *) &pid, pid_addr, sizeof(int));
503	AUDIT_MACH_SYSCALL_EXIT(err);
504	return(err);
505}
506
507/*
508 *
509 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
510 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
511 *
512 */
513static  int tfp_policy = KERN_TFP_POLICY_DEFAULT;
514
515/*
516 *	Routine:	task_for_pid_posix_check
517 *	Purpose:
518 *			Verify that the current process should be allowed to
519 *			get the target process's task port. This is only
520 *			permitted if:
521 *			- The current process is root
522 *			OR all of the following are true:
523 *			- The target process's real, effective, and saved uids
524 *			  are the same as the current proc's euid,
525 *			- The target process's group set is a subset of the
526 *			  calling process's group set, and
527 *			- The target process hasn't switched credentials.
528 *
529 *	Returns:	TRUE: permitted
530 *			FALSE: denied
531 */
532static int
533task_for_pid_posix_check(proc_t target)
534{
535	kauth_cred_t targetcred, mycred;
536	uid_t myuid;
537	int allowed;
538
539	/* No task_for_pid on bad targets */
540	if (target->p_stat == SZOMB) {
541		return FALSE;
542	}
543
544	mycred = kauth_cred_get();
545	myuid = kauth_cred_getuid(mycred);
546
547	/* If we're running as root, the check passes */
548	if (kauth_cred_issuser(mycred))
549		return TRUE;
550
551	/* We're allowed to get our own task port */
552	if (target == current_proc())
553		return TRUE;
554
555	/*
556	 * Under DENY, only root can get another proc's task port,
557	 * so no more checks are needed.
558	 */
559	if (tfp_policy == KERN_TFP_POLICY_DENY) {
560		return FALSE;
561	}
562
563	targetcred = kauth_cred_proc_ref(target);
564	allowed = TRUE;
565
566	/* Do target's ruid, euid, and saved uid match my euid? */
567	if ((kauth_cred_getuid(targetcred) != myuid) ||
568			(kauth_cred_getruid(targetcred) != myuid) ||
569			(kauth_cred_getsvuid(targetcred) != myuid)) {
570		allowed = FALSE;
571		goto out;
572	}
573
574	/* Are target's groups a subset of my groups? */
575	if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
576			allowed == 0) {
577		allowed = FALSE;
578		goto out;
579	}
580
581	/* Has target switched credentials? */
582	if (target->p_flag & P_SUGID) {
583		allowed = FALSE;
584		goto out;
585	}
586
587out:
588	kauth_cred_unref(&targetcred);
589	return allowed;
590}
591
592/*
593 *	__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__
594 *
595 *	Description:	Waits for the user space daemon to respond to the request
596 *			we made. Function declared non inline to be visible in
597 *			stackshots and spindumps as well as debugging.
598 */
599__attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
600	mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid)
601{
602	return check_task_access(task_access_port, calling_pid, calling_gid, target_pid);
603}
604
605/*
606 *	Routine:	task_for_pid
607 *	Purpose:
608 *		Get the task port for another "process", named by its
609 *		process ID on the same host as "target_task".
610 *
611 *		Only permitted to privileged processes, or processes
612 *		with the same user ID.
613 *
614 *		Note: if pid == 0, an error is return no matter who is calling.
615 *
616 * XXX This should be a BSD system call, not a Mach trap!!!
617 */
618kern_return_t
619task_for_pid(
620	struct task_for_pid_args *args)
621{
622	mach_port_name_t	target_tport = args->target_tport;
623	int			pid = args->pid;
624	user_addr_t		task_addr = args->t;
625	proc_t 			p = PROC_NULL;
626	task_t			t1 = TASK_NULL;
627	mach_port_name_t	tret = MACH_PORT_NULL;
628 	ipc_port_t 		tfpport;
629	void * sright;
630	int error = 0;
631
632	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
633	AUDIT_ARG(pid, pid);
634	AUDIT_ARG(mach_port1, target_tport);
635
636	/* Always check if pid == 0 */
637	if (pid == 0) {
638		(void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
639		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
640		return(KERN_FAILURE);
641	}
642
643	t1 = port_name_to_task(target_tport);
644	if (t1 == TASK_NULL) {
645		(void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
646		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
647		return(KERN_FAILURE);
648	}
649
650
651	p = proc_find(pid);
652	if (p == PROC_NULL) {
653		error = KERN_FAILURE;
654		goto tfpout;
655	}
656
657#if CONFIG_AUDIT
658	AUDIT_ARG(process, p);
659#endif
660
661	if (!(task_for_pid_posix_check(p))) {
662		error = KERN_FAILURE;
663		goto tfpout;
664	}
665
666	if (p->task != TASK_NULL) {
667		/* If we aren't root and target's task access port is set... */
668		if (!kauth_cred_issuser(kauth_cred_get()) &&
669			p != current_proc() &&
670			(task_get_task_access_port(p->task, &tfpport) == 0) &&
671			(tfpport != IPC_PORT_NULL)) {
672
673			if (tfpport == IPC_PORT_DEAD) {
674				error = KERN_PROTECTION_FAILURE;
675				goto tfpout;
676			}
677
678			/* Call up to the task access server */
679			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
680
681			if (error != MACH_MSG_SUCCESS) {
682				if (error == MACH_RCV_INTERRUPTED)
683					error = KERN_ABORTED;
684				else
685					error = KERN_FAILURE;
686				goto tfpout;
687			}
688		}
689#if CONFIG_MACF
690		error = mac_proc_check_get_task(kauth_cred_get(), p);
691		if (error) {
692			error = KERN_FAILURE;
693			goto tfpout;
694		}
695#endif
696
697		/* Grant task port access */
698		task_reference(p->task);
699		extmod_statistics_incr_task_for_pid(p->task);
700
701		sright = (void *) convert_task_to_port(p->task);
702		tret = ipc_port_copyout_send(
703				sright,
704				get_task_ipcspace(current_task()));
705	}
706	error = KERN_SUCCESS;
707
708tfpout:
709	task_deallocate(t1);
710	AUDIT_ARG(mach_port2, tret);
711	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
712	if (p != PROC_NULL)
713		proc_rele(p);
714	AUDIT_MACH_SYSCALL_EXIT(error);
715	return(error);
716}
717
718/*
719 *	Routine:	task_name_for_pid
720 *	Purpose:
721 *		Get the task name port for another "process", named by its
722 *		process ID on the same host as "target_task".
723 *
724 *		Only permitted to privileged processes, or processes
725 *		with the same user ID.
726 *
727 * XXX This should be a BSD system call, not a Mach trap!!!
728 */
729
730kern_return_t
731task_name_for_pid(
732	struct task_name_for_pid_args *args)
733{
734	mach_port_name_t	target_tport = args->target_tport;
735	int			pid = args->pid;
736	user_addr_t		task_addr = args->t;
737	proc_t		p = PROC_NULL;
738	task_t		t1;
739	mach_port_name_t	tret;
740	void * sright;
741	int error = 0, refheld = 0;
742	kauth_cred_t target_cred;
743
744	AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
745	AUDIT_ARG(pid, pid);
746	AUDIT_ARG(mach_port1, target_tport);
747
748	t1 = port_name_to_task(target_tport);
749	if (t1 == TASK_NULL) {
750		(void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
751		AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
752		return(KERN_FAILURE);
753	}
754
755	p = proc_find(pid);
756	if (p != PROC_NULL) {
757		AUDIT_ARG(process, p);
758		target_cred = kauth_cred_proc_ref(p);
759		refheld = 1;
760
761		if ((p->p_stat != SZOMB)
762		    && ((current_proc() == p)
763			|| kauth_cred_issuser(kauth_cred_get())
764			|| ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) &&
765			    ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) {
766
767			if (p->task != TASK_NULL) {
768				task_reference(p->task);
769#if CONFIG_MACF
770				error = mac_proc_check_get_task_name(kauth_cred_get(),  p);
771				if (error) {
772					task_deallocate(p->task);
773					goto noperm;
774				}
775#endif
776				sright = (void *)convert_task_name_to_port(p->task);
777				tret = ipc_port_copyout_send(sright,
778						get_task_ipcspace(current_task()));
779			} else
780				tret  = MACH_PORT_NULL;
781
782			AUDIT_ARG(mach_port2, tret);
783			(void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
784			task_deallocate(t1);
785			error = KERN_SUCCESS;
786			goto tnfpout;
787		}
788	}
789
790#if CONFIG_MACF
791noperm:
792#endif
793    task_deallocate(t1);
794	tret = MACH_PORT_NULL;
795	(void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
796	error = KERN_FAILURE;
797tnfpout:
798	if (refheld != 0)
799		kauth_cred_unref(&target_cred);
800	if (p != PROC_NULL)
801		proc_rele(p);
802	AUDIT_MACH_SYSCALL_EXIT(error);
803	return(error);
804}
805
806kern_return_t
807pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
808{
809	task_t	target = NULL;
810	proc_t	targetproc = PROC_NULL;
811	int 	pid = args->pid;
812	int 	error = 0;
813
814#if CONFIG_MACF
815	error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND);
816	if (error) {
817		error = EPERM;
818		goto out;
819	}
820#endif
821
822	if (pid == 0) {
823		error = EPERM;
824		goto out;
825	}
826
827	targetproc = proc_find(pid);
828	if (targetproc == PROC_NULL) {
829		error = ESRCH;
830		goto out;
831	}
832
833	if (!task_for_pid_posix_check(targetproc)) {
834		error = EPERM;
835		goto out;
836	}
837
838	target = targetproc->task;
839	if (target != TASK_NULL) {
840		mach_port_t tfpport;
841
842		/* If we aren't root and target's task access port is set... */
843		if (!kauth_cred_issuser(kauth_cred_get()) &&
844			targetproc != current_proc() &&
845			(task_get_task_access_port(target, &tfpport) == 0) &&
846			(tfpport != IPC_PORT_NULL)) {
847
848			if (tfpport == IPC_PORT_DEAD) {
849				error = EACCES;
850				goto out;
851			}
852
853			/* Call up to the task access server */
854			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
855
856			if (error != MACH_MSG_SUCCESS) {
857				if (error == MACH_RCV_INTERRUPTED)
858					error = EINTR;
859				else
860					error = EPERM;
861				goto out;
862			}
863		}
864	}
865
866	task_reference(target);
867	error = task_pidsuspend(target);
868	if (error) {
869		if (error == KERN_INVALID_ARGUMENT) {
870			error = EINVAL;
871		} else {
872			error = EPERM;
873		}
874	}
875#if CONFIG_MEMORYSTATUS
876	else {
877		memorystatus_on_suspend(targetproc);
878	}
879#endif
880
881	task_deallocate(target);
882
883out:
884	if (targetproc != PROC_NULL)
885		proc_rele(targetproc);
886	*ret = error;
887	return error;
888}
889
890kern_return_t
891pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
892{
893	task_t	target = NULL;
894	proc_t	targetproc = PROC_NULL;
895	int 	pid = args->pid;
896	int 	error = 0;
897
898#if CONFIG_MACF
899	error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME);
900	if (error) {
901		error = EPERM;
902		goto out;
903	}
904#endif
905
906	if (pid == 0) {
907		error = EPERM;
908		goto out;
909	}
910
911	targetproc = proc_find(pid);
912	if (targetproc == PROC_NULL) {
913		error = ESRCH;
914		goto out;
915	}
916
917	if (!task_for_pid_posix_check(targetproc)) {
918		error = EPERM;
919		goto out;
920	}
921
922	target = targetproc->task;
923	if (target != TASK_NULL) {
924		mach_port_t tfpport;
925
926		/* If we aren't root and target's task access port is set... */
927		if (!kauth_cred_issuser(kauth_cred_get()) &&
928			targetproc != current_proc() &&
929			(task_get_task_access_port(target, &tfpport) == 0) &&
930			(tfpport != IPC_PORT_NULL)) {
931
932			if (tfpport == IPC_PORT_DEAD) {
933				error = EACCES;
934				goto out;
935			}
936
937			/* Call up to the task access server */
938			error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid);
939
940			if (error != MACH_MSG_SUCCESS) {
941				if (error == MACH_RCV_INTERRUPTED)
942					error = EINTR;
943				else
944					error = EPERM;
945				goto out;
946			}
947		}
948	}
949
950	task_reference(target);
951
952#if CONFIG_MEMORYSTATUS
953	memorystatus_on_resume(targetproc);
954#endif
955
956	error = task_pidresume(target);
957	if (error) {
958		if (error == KERN_INVALID_ARGUMENT) {
959			error = EINVAL;
960		} else {
961			if (error == KERN_MEMORY_ERROR) {
962				psignal(targetproc, SIGKILL);
963				error = EIO;
964			} else
965				error = EPERM;
966		}
967	}
968
969	task_deallocate(target);
970
971out:
972	if (targetproc != PROC_NULL)
973		proc_rele(targetproc);
974
975	*ret = error;
976	return error;
977}
978
979
980static int
981sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
982    __unused int arg2, struct sysctl_req *req)
983{
984    int error = 0;
985	int new_value;
986
987    error = SYSCTL_OUT(req, arg1, sizeof(int));
988    if (error || req->newptr == USER_ADDR_NULL)
989        return(error);
990
991	if (!kauth_cred_issuser(kauth_cred_get()))
992		return(EPERM);
993
994	if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
995		goto out;
996	}
997	if ((new_value == KERN_TFP_POLICY_DENY)
998		|| (new_value == KERN_TFP_POLICY_DEFAULT))
999			tfp_policy = new_value;
1000	else
1001			error = EINVAL;
1002out:
1003    return(error);
1004
1005}
1006
1007#if defined(SECURE_KERNEL)
1008static int kern_secure_kernel = 1;
1009#else
1010static int kern_secure_kernel = 0;
1011#endif
1012
1013SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
1014
1015SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
1016SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1017    &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
1018
1019SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
1020	   &shared_region_trace_level, 0, "");
1021SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
1022	   &shared_region_version, 0, "");
1023SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
1024	   &shared_region_persistence, 0, "");
1025
1026/*
1027 * shared_region_check_np:
1028 *
1029 * This system call is intended for dyld.
1030 *
1031 * dyld calls this when any process starts to see if the process's shared
1032 * region is already set up and ready to use.
1033 * This call returns the base address of the first mapping in the
1034 * process's shared region's first mapping.
1035 * dyld will then check what's mapped at that address.
1036 *
1037 * If the shared region is empty, dyld will then attempt to map the shared
1038 * cache file in the shared region via the shared_region_map_np() system call.
1039 *
1040 * If something's already mapped in the shared region, dyld will check if it
1041 * matches the shared cache it would like to use for that process.
1042 * If it matches, evrything's ready and the process can proceed and use the
1043 * shared region.
1044 * If it doesn't match, dyld will unmap the shared region and map the shared
1045 * cache into the process's address space via mmap().
1046 *
1047 * ERROR VALUES
1048 * EINVAL	no shared region
1049 * ENOMEM	shared region is empty
1050 * EFAULT	bad address for "start_address"
1051 */
1052int
1053shared_region_check_np(
1054	__unused struct proc			*p,
1055	struct shared_region_check_np_args	*uap,
1056	__unused int				*retvalp)
1057{
1058	vm_shared_region_t	shared_region;
1059	mach_vm_offset_t	start_address = 0;
1060	int			error;
1061	kern_return_t		kr;
1062
1063	SHARED_REGION_TRACE_DEBUG(
1064		("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
1065		 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1066		 p->p_pid, p->p_comm,
1067		 (uint64_t)uap->start_address));
1068
1069	/* retrieve the current tasks's shared region */
1070	shared_region = vm_shared_region_get(current_task());
1071	if (shared_region != NULL) {
1072		/* retrieve address of its first mapping... */
1073		kr = vm_shared_region_start_address(shared_region,
1074						    &start_address);
1075		if (kr != KERN_SUCCESS) {
1076			error = ENOMEM;
1077		} else {
1078			/* ... and give it to the caller */
1079			error = copyout(&start_address,
1080					(user_addr_t) uap->start_address,
1081					sizeof (start_address));
1082			if (error) {
1083				SHARED_REGION_TRACE_ERROR(
1084					("shared_region: %p [%d(%s)] "
1085					 "check_np(0x%llx) "
1086					 "copyout(0x%llx) error %d\n",
1087					 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1088					 p->p_pid, p->p_comm,
1089					 (uint64_t)uap->start_address, (uint64_t)start_address,
1090					 error));
1091			}
1092		}
1093		vm_shared_region_deallocate(shared_region);
1094	} else {
1095		/* no shared region ! */
1096		error = EINVAL;
1097	}
1098
1099	SHARED_REGION_TRACE_DEBUG(
1100		("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
1101		 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1102		 p->p_pid, p->p_comm,
1103		 (uint64_t)uap->start_address, (uint64_t)start_address, error));
1104
1105	return error;
1106}
1107
1108
1109int
1110shared_region_copyin_mappings(
1111		struct proc			*p,
1112		user_addr_t			user_mappings,
1113		unsigned int			mappings_count,
1114		struct shared_file_mapping_np	*mappings)
1115{
1116	int		error = 0;
1117	vm_size_t	mappings_size = 0;
1118
1119	/* get the list of mappings the caller wants us to establish */
1120	mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
1121	error = copyin(user_mappings,
1122		       mappings,
1123		       mappings_size);
1124	if (error) {
1125		SHARED_REGION_TRACE_ERROR(
1126			("shared_region: %p [%d(%s)] map(): "
1127			 "copyin(0x%llx, %d) failed (error=%d)\n",
1128			 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1129			 p->p_pid, p->p_comm,
1130			 (uint64_t)user_mappings, mappings_count, error));
1131	}
1132	return error;
1133}
1134/*
1135 * shared_region_map_np()
1136 *
1137 * This system call is intended for dyld.
1138 *
1139 * dyld uses this to map a shared cache file into a shared region.
1140 * This is usually done only the first time a shared cache is needed.
1141 * Subsequent processes will just use the populated shared region without
1142 * requiring any further setup.
1143 */
1144int
1145_shared_region_map_and_slide(
1146	struct proc				*p,
1147	int					fd,
1148	uint32_t				mappings_count,
1149	struct shared_file_mapping_np		*mappings,
1150	uint32_t				slide,
1151	user_addr_t				slide_start,
1152	user_addr_t				slide_size)
1153{
1154	int				error;
1155	kern_return_t			kr;
1156	struct fileproc			*fp;
1157	struct vnode			*vp, *root_vp, *scdir_vp;
1158	struct vnode_attr		va;
1159	off_t				fs;
1160	memory_object_size_t		file_size;
1161#if CONFIG_MACF
1162	vm_prot_t			maxprot = VM_PROT_ALL;
1163#endif
1164	memory_object_control_t		file_control;
1165	struct vm_shared_region		*shared_region;
1166
1167	SHARED_REGION_TRACE_DEBUG(
1168		("shared_region: %p [%d(%s)] -> map\n",
1169		 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1170		 p->p_pid, p->p_comm));
1171
1172	shared_region = NULL;
1173	fp = NULL;
1174	vp = NULL;
1175	scdir_vp = NULL;
1176
1177	/* get file structure from file descriptor */
1178	error = fp_lookup(p, fd, &fp, 0);
1179	if (error) {
1180		SHARED_REGION_TRACE_ERROR(
1181			("shared_region: %p [%d(%s)] map: "
1182			 "fd=%d lookup failed (error=%d)\n",
1183			 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1184			 p->p_pid, p->p_comm, fd, error));
1185		goto done;
1186	}
1187
1188	/* make sure we're attempting to map a vnode */
1189	if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) {
1190		SHARED_REGION_TRACE_ERROR(
1191			("shared_region: %p [%d(%s)] map: "
1192			 "fd=%d not a vnode (type=%d)\n",
1193			 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1194			 p->p_pid, p->p_comm,
1195			 fd, FILEGLOB_DTYPE(fp->f_fglob)));
1196		error = EINVAL;
1197		goto done;
1198	}
1199
1200	/* we need at least read permission on the file */
1201	if (! (fp->f_fglob->fg_flag & FREAD)) {
1202		SHARED_REGION_TRACE_ERROR(
1203			("shared_region: %p [%d(%s)] map: "
1204			 "fd=%d not readable\n",
1205			 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1206			 p->p_pid, p->p_comm, fd));
1207		error = EPERM;
1208		goto done;
1209	}
1210
1211	/* get vnode from file structure */
1212	error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
1213	if (error) {
1214		SHARED_REGION_TRACE_ERROR(
1215			("shared_region: %p [%d(%s)] map: "
1216			 "fd=%d getwithref failed (error=%d)\n",
1217			 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1218			 p->p_pid, p->p_comm, fd, error));
1219		goto done;
1220	}
1221	vp = (struct vnode *) fp->f_fglob->fg_data;
1222
1223	/* make sure the vnode is a regular file */
1224	if (vp->v_type != VREG) {
1225		SHARED_REGION_TRACE_ERROR(
1226			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1227			 "not a file (type=%d)\n",
1228			 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1229			 p->p_pid, p->p_comm,
1230			 (void *)VM_KERNEL_ADDRPERM(vp),
1231			 vp->v_name, vp->v_type));
1232		error = EINVAL;
1233		goto done;
1234	}
1235
1236#if CONFIG_MACF
1237	error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
1238			fp->f_fglob, VM_PROT_ALL, MAP_FILE, &maxprot);
1239	if (error) {
1240		goto done;
1241	}
1242#endif /* MAC */
1243
1244#if CONFIG_PROTECT
1245	/* check for content protection access */
1246	{
1247		error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0);
1248		if (error) {
1249			goto done;
1250		}
1251	}
1252#endif /* CONFIG_PROTECT */
1253
1254	/* make sure vnode is on the process's root volume */
1255	root_vp = p->p_fd->fd_rdir;
1256	if (root_vp == NULL) {
1257		root_vp = rootvnode;
1258	} else {
1259		/*
1260		 * Chroot-ed processes can't use the shared_region.
1261		 */
1262		error = EINVAL;
1263		goto done;
1264	}
1265
1266	if (vp->v_mount != root_vp->v_mount) {
1267		SHARED_REGION_TRACE_ERROR(
1268			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1269			 "not on process's root volume\n",
1270			 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1271			 p->p_pid, p->p_comm,
1272			 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
1273		error = EPERM;
1274		goto done;
1275	}
1276
1277	/* make sure vnode is owned by "root" */
1278	VATTR_INIT(&va);
1279	VATTR_WANTED(&va, va_uid);
1280	error = vnode_getattr(vp, &va, vfs_context_current());
1281	if (error) {
1282		SHARED_REGION_TRACE_ERROR(
1283			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1284			 "vnode_getattr(%p) failed (error=%d)\n",
1285			 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1286			 p->p_pid, p->p_comm,
1287			 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
1288			 (void *)VM_KERNEL_ADDRPERM(vp), error));
1289		goto done;
1290	}
1291	if (va.va_uid != 0) {
1292		SHARED_REGION_TRACE_ERROR(
1293			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1294			 "owned by uid=%d instead of 0\n",
1295			 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1296			 p->p_pid, p->p_comm,
1297			 (void *)VM_KERNEL_ADDRPERM(vp),
1298			 vp->v_name, va.va_uid));
1299		error = EPERM;
1300		goto done;
1301	}
1302
1303	if (scdir_enforce) {
1304		/* get vnode for scdir_path */
1305		error = vnode_lookup(scdir_path, 0, &scdir_vp, vfs_context_current());
1306		if (error) {
1307			SHARED_REGION_TRACE_ERROR(
1308				("shared_region: %p [%d(%s)] map(%p:'%s'): "
1309				 "vnode_lookup(%s) failed (error=%d)\n",
1310				 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1311				 p->p_pid, p->p_comm,
1312				 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
1313				 scdir_path, error));
1314			goto done;
1315		}
1316
1317		/* ensure parent is scdir_vp */
1318		if (vnode_parent(vp) != scdir_vp) {
1319			SHARED_REGION_TRACE_ERROR(
1320				("shared_region: %p [%d(%s)] map(%p:'%s'): "
1321				 "shared cache file not in %s\n",
1322				 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1323				 p->p_pid, p->p_comm,
1324				 (void *)VM_KERNEL_ADDRPERM(vp),
1325				 vp->v_name, scdir_path));
1326			error = EPERM;
1327			goto done;
1328		}
1329	}
1330
1331	/* get vnode size */
1332	error = vnode_size(vp, &fs, vfs_context_current());
1333	if (error) {
1334		SHARED_REGION_TRACE_ERROR(
1335			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1336			 "vnode_size(%p) failed (error=%d)\n",
1337			 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1338			 p->p_pid, p->p_comm,
1339			 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name,
1340			 (void *)VM_KERNEL_ADDRPERM(vp), error));
1341		goto done;
1342	}
1343	file_size = fs;
1344
1345	/* get the file's memory object handle */
1346	file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
1347	if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
1348		SHARED_REGION_TRACE_ERROR(
1349			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1350			 "no memory object\n",
1351			 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1352			 p->p_pid, p->p_comm,
1353			 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
1354		error = EINVAL;
1355		goto done;
1356	}
1357
1358
1359	/* get the process's shared region (setup in vm_map_exec()) */
1360	shared_region = vm_shared_region_get(current_task());
1361	if (shared_region == NULL) {
1362		SHARED_REGION_TRACE_ERROR(
1363			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1364			 "no shared region\n",
1365			 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1366			 p->p_pid, p->p_comm,
1367			 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name));
1368		goto done;
1369	}
1370
1371	/* map the file into that shared region's submap */
1372	kr = vm_shared_region_map_file(shared_region,
1373				       mappings_count,
1374				       mappings,
1375				       file_control,
1376				       file_size,
1377				       (void *) p->p_fd->fd_rdir,
1378				       slide,
1379				       slide_start,
1380				       slide_size);
1381	if (kr != KERN_SUCCESS) {
1382		SHARED_REGION_TRACE_ERROR(
1383			("shared_region: %p [%d(%s)] map(%p:'%s'): "
1384			 "vm_shared_region_map_file() failed kr=0x%x\n",
1385			 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1386			 p->p_pid, p->p_comm,
1387			 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, kr));
1388		switch (kr) {
1389		case KERN_INVALID_ADDRESS:
1390			error = EFAULT;
1391			break;
1392		case KERN_PROTECTION_FAILURE:
1393			error = EPERM;
1394			break;
1395		case KERN_NO_SPACE:
1396			error = ENOMEM;
1397			break;
1398		case KERN_FAILURE:
1399		case KERN_INVALID_ARGUMENT:
1400		default:
1401			error = EINVAL;
1402			break;
1403		}
1404		goto done;
1405	}
1406
1407	error = 0;
1408
1409	vnode_lock_spin(vp);
1410
1411	vp->v_flag |= VSHARED_DYLD;
1412
1413	vnode_unlock(vp);
1414
1415	/* update the vnode's access time */
1416	if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
1417		VATTR_INIT(&va);
1418		nanotime(&va.va_access_time);
1419		VATTR_SET_ACTIVE(&va, va_access_time);
1420		vnode_setattr(vp, &va, vfs_context_current());
1421	}
1422
1423	if (p->p_flag & P_NOSHLIB) {
1424		/* signal that this process is now using split libraries */
1425		OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
1426	}
1427
1428done:
1429	if (vp != NULL) {
1430		/*
1431		 * release the vnode...
1432		 * ubc_map() still holds it for us in the non-error case
1433		 */
1434		(void) vnode_put(vp);
1435		vp = NULL;
1436	}
1437	if (fp != NULL) {
1438		/* release the file descriptor */
1439		fp_drop(p, fd, fp, 0);
1440		fp = NULL;
1441	}
1442	if (scdir_vp != NULL) {
1443		(void)vnode_put(scdir_vp);
1444		scdir_vp = NULL;
1445	}
1446
1447	if (shared_region != NULL) {
1448		vm_shared_region_deallocate(shared_region);
1449	}
1450
1451	SHARED_REGION_TRACE_DEBUG(
1452		("shared_region: %p [%d(%s)] <- map\n",
1453		 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1454		 p->p_pid, p->p_comm));
1455
1456	return error;
1457}
1458
1459int
1460shared_region_map_and_slide_np(
1461	struct proc				*p,
1462	struct shared_region_map_and_slide_np_args	*uap,
1463	__unused int					*retvalp)
1464{
1465	struct shared_file_mapping_np	*mappings;
1466	unsigned int			mappings_count = uap->count;
1467	kern_return_t			kr = KERN_SUCCESS;
1468	uint32_t			slide = uap->slide;
1469
1470#define SFM_MAX_STACK	8
1471	struct shared_file_mapping_np	stack_mappings[SFM_MAX_STACK];
1472
1473	/* Is the process chrooted?? */
1474	if (p->p_fd->fd_rdir != NULL) {
1475		kr = EINVAL;
1476		goto done;
1477	}
1478
1479	if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) {
1480		if (kr == KERN_INVALID_ARGUMENT) {
1481			/*
1482			 * This will happen if we request sliding again
1483			 * with the same slide value that was used earlier
1484			 * for the very first sliding.
1485			 */
1486			kr = KERN_SUCCESS;
1487		}
1488		goto done;
1489	}
1490
1491	if (mappings_count == 0) {
1492		SHARED_REGION_TRACE_INFO(
1493			("shared_region: %p [%d(%s)] map(): "
1494			 "no mappings\n",
1495			 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1496			 p->p_pid, p->p_comm));
1497		kr = 0;	/* no mappings: we're done ! */
1498		goto done;
1499	} else if (mappings_count <= SFM_MAX_STACK) {
1500		mappings = &stack_mappings[0];
1501	} else {
1502		SHARED_REGION_TRACE_ERROR(
1503			("shared_region: %p [%d(%s)] map(): "
1504			 "too many mappings (%d)\n",
1505			 (void *)VM_KERNEL_ADDRPERM(current_thread()),
1506			 p->p_pid, p->p_comm,
1507			 mappings_count));
1508		kr = KERN_FAILURE;
1509		goto done;
1510	}
1511
1512	if ( (kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) {
1513		goto done;
1514	}
1515
1516
1517	kr = _shared_region_map_and_slide(p, uap->fd, mappings_count, mappings,
1518					  slide,
1519					  uap->slide_start, uap->slide_size);
1520	if (kr != KERN_SUCCESS) {
1521		return kr;
1522	}
1523
1524done:
1525	return kr;
1526}
1527
1528/* sysctl overflow room */
1529
1530SYSCTL_INT (_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
1531	    (int *) &page_size, 0, "vm page size");
1532
1533/* vm_page_free_target is provided as a makeshift solution for applications that want to
1534	allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
1535	reclaimed. It allows the app to calculate how much memory is free outside the free target. */
1536extern unsigned int	vm_page_free_target;
1537SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
1538		   &vm_page_free_target, 0, "Pageout daemon free target");
1539
1540extern unsigned int	vm_memory_pressure;
1541SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
1542	   &vm_memory_pressure, 0, "Memory pressure indicator");
1543
1544static int
1545vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
1546{
1547#pragma unused(oidp, arg1, arg2)
1548	unsigned int page_free_wanted;
1549
1550	page_free_wanted = mach_vm_ctl_page_free_wanted();
1551	return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted));
1552}
1553SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
1554	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
1555	    0, 0, vm_ctl_page_free_wanted, "I", "");
1556
1557extern unsigned int	vm_page_purgeable_count;
1558SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1559	   &vm_page_purgeable_count, 0, "Purgeable page count");
1560
1561extern unsigned int	vm_page_purgeable_wired_count;
1562SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1563	   &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
1564
1565extern int madvise_free_debug;
1566SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1567	   &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
1568
1569SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
1570	   &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
1571SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1572	   &vm_page_stats_reusable.reusable_pages_success, "");
1573SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1574	   &vm_page_stats_reusable.reusable_pages_failure, "");
1575SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
1576	   &vm_page_stats_reusable.reusable_pages_shared, "");
1577SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1578	   &vm_page_stats_reusable.all_reusable_calls, "");
1579SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1580	   &vm_page_stats_reusable.partial_reusable_calls, "");
1581SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1582	   &vm_page_stats_reusable.reuse_pages_success, "");
1583SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1584	   &vm_page_stats_reusable.reuse_pages_failure, "");
1585SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1586	   &vm_page_stats_reusable.all_reuse_calls, "");
1587SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
1588	   &vm_page_stats_reusable.partial_reuse_calls, "");
1589SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
1590	   &vm_page_stats_reusable.can_reuse_success, "");
1591SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
1592	   &vm_page_stats_reusable.can_reuse_failure, "");
1593SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
1594	   &vm_page_stats_reusable.reusable_reclaimed, "");
1595
1596
1597extern unsigned int vm_page_free_count, vm_page_speculative_count;
1598SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
1599SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
1600
1601extern unsigned int vm_page_cleaned_count;
1602SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
1603
1604/* pageout counts */
1605extern unsigned int vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external, vm_pageout_inactive_clean, vm_pageout_speculative_clean, vm_pageout_inactive_used;
1606extern unsigned int vm_pageout_freed_from_inactive_clean, vm_pageout_freed_from_speculative;
1607SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_internal, 0, "");
1608SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_external, 0, "");
1609SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_clean, 0, "");
1610SYSCTL_UINT(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_speculative_clean, 0, "");
1611SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_used, 0, "");
1612SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_inactive_clean, 0, "");
1613SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_speculative, 0, "");
1614
1615extern unsigned int vm_pageout_freed_from_cleaned;
1616SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_cleaned, 0, "");
1617
1618/* counts of pages entering the cleaned queue */
1619extern unsigned int vm_pageout_enqueued_cleaned, vm_pageout_enqueued_cleaned_from_inactive_clean, vm_pageout_enqueued_cleaned_from_inactive_dirty;
1620SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
1621SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_clean, 0, "");
1622SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_dirty, 0, "");
1623
1624/* counts of pages leaving the cleaned queue */
1625extern unsigned int vm_pageout_cleaned_reclaimed, vm_pageout_cleaned_reactivated, vm_pageout_cleaned_reference_reactivated, vm_pageout_cleaned_volatile_reactivated, vm_pageout_cleaned_fault_reactivated, vm_pageout_cleaned_commit_reactivated, vm_pageout_cleaned_busy, vm_pageout_cleaned_nolock;
1626SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reclaimed, 0, "Cleaned pages reclaimed");
1627SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
1628SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
1629SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
1630SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
1631SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_commit_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_commit_reactivated, 0, "Cleaned pages commit reactivated");
1632SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
1633SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
1634
1635/* counts of pages prefaulted when entering a memory object */
1636extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
1637SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
1638SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
1639
1640#include <kern/thread.h>
1641#include <sys/user.h>
1642
1643void vm_pageout_io_throttle(void);
1644
1645void vm_pageout_io_throttle(void) {
1646	struct uthread *uthread = get_bsdthread_info(current_thread());
1647
1648               /*
1649                * thread is marked as a low priority I/O type
1650                * and the I/O we issued while in this cleaning operation
1651                * collided with normal I/O operations... we'll
1652                * delay in order to mitigate the impact of this
1653                * task on the normal operation of the system
1654                */
1655
1656	if (uthread->uu_lowpri_window) {
1657		throttle_lowpri_io(1);
1658	}
1659
1660}
1661
1662int
1663vm_pressure_monitor(
1664	__unused struct proc *p,
1665	struct vm_pressure_monitor_args *uap,
1666	int *retval)
1667{
1668	kern_return_t	kr;
1669	uint32_t	pages_reclaimed;
1670	uint32_t	pages_wanted;
1671
1672	kr = mach_vm_pressure_monitor(
1673		(boolean_t) uap->wait_for_pressure,
1674		uap->nsecs_monitored,
1675		(uap->pages_reclaimed) ? &pages_reclaimed : NULL,
1676		&pages_wanted);
1677
1678	switch (kr) {
1679	case KERN_SUCCESS:
1680		break;
1681	case KERN_ABORTED:
1682		return EINTR;
1683	default:
1684		return EINVAL;
1685	}
1686
1687	if (uap->pages_reclaimed) {
1688		if (copyout((void *)&pages_reclaimed,
1689			    uap->pages_reclaimed,
1690			    sizeof (pages_reclaimed)) != 0) {
1691			return EFAULT;
1692		}
1693	}
1694
1695	*retval = (int) pages_wanted;
1696	return 0;
1697}
1698
1699int
1700kas_info(struct proc *p,
1701			  struct kas_info_args *uap,
1702			  int *retval __unused)
1703{
1704#ifdef SECURE_KERNEL
1705	(void)p;
1706	(void)uap;
1707	return ENOTSUP;
1708#else /* !SECURE_KERNEL */
1709	int			selector = uap->selector;
1710	user_addr_t	valuep = uap->value;
1711	user_addr_t	sizep = uap->size;
1712	user_size_t size;
1713	int			error;
1714
1715	if (!kauth_cred_issuser(kauth_cred_get())) {
1716		return EPERM;
1717	}
1718
1719#if CONFIG_MACF
1720	error = mac_system_check_kas_info(kauth_cred_get(), selector);
1721	if (error) {
1722		return error;
1723	}
1724#endif
1725
1726	if (IS_64BIT_PROCESS(p)) {
1727		user64_size_t size64;
1728		error = copyin(sizep, &size64, sizeof(size64));
1729		size = (user_size_t)size64;
1730	} else {
1731		user32_size_t size32;
1732		error = copyin(sizep, &size32, sizeof(size32));
1733		size = (user_size_t)size32;
1734	}
1735	if (error) {
1736		return error;
1737	}
1738
1739	switch (selector) {
1740		case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
1741			{
1742				uint64_t slide = vm_kernel_slide;
1743
1744				if (sizeof(slide) != size) {
1745					return EINVAL;
1746				}
1747
1748				if (IS_64BIT_PROCESS(p)) {
1749					user64_size_t size64 = (user64_size_t)size;
1750					error = copyout(&size64, sizep, sizeof(size64));
1751				} else {
1752					user32_size_t size32 = (user32_size_t)size;
1753					error = copyout(&size32, sizep, sizeof(size32));
1754				}
1755				if (error) {
1756					return error;
1757				}
1758
1759				error = copyout(&slide, valuep, sizeof(slide));
1760				if (error) {
1761					return error;
1762				}
1763			}
1764			break;
1765		default:
1766			return EINVAL;
1767	}
1768
1769	return 0;
1770#endif /* !SECURE_KERNEL */
1771}
1772