1/*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_FREE_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
49 *  School of Computer Science
50 *  Carnegie Mellon University
51 *  Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 *	File:	kern/task.c
58 *	Author:	Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59 *		David Black
60 *
61 *	Task management primitives implementation.
62 */
63/*
64 * Copyright (c) 1993 The University of Utah and
65 * the Computer Systems Laboratory (CSL).  All rights reserved.
66 *
67 * Permission to use, copy, modify and distribute this software and its
68 * documentation is hereby granted, provided that both the copyright
69 * notice and this permission notice appear in all copies of the
70 * software, derivative works or modified versions, and any portions
71 * thereof, and that both notices appear in supporting documentation.
72 *
73 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74 * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76 *
77 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
78 * improvements that they make and grant CSL redistribution rights.
79 *
80 */
81/*
82 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
83 * support for mandatory and extensible security protections.  This notice
84 * is included in support of clause 2.2 (b) of the Apple Public License,
85 * Version 2.0.
86 * Copyright (c) 2005 SPARTA, Inc.
87 */
88
89#include <fast_tas.h>
90#include <platforms.h>
91
92#include <mach/mach_types.h>
93#include <mach/boolean.h>
94#include <mach/host_priv.h>
95#include <mach/machine/vm_types.h>
96#include <mach/vm_param.h>
97#include <mach/semaphore.h>
98#include <mach/task_info.h>
99#include <mach/task_special_ports.h>
100
101#include <ipc/ipc_types.h>
102#include <ipc/ipc_space.h>
103#include <ipc/ipc_entry.h>
104#include <ipc/ipc_hash.h>
105
106#include <kern/kern_types.h>
107#include <kern/mach_param.h>
108#include <kern/misc_protos.h>
109#include <kern/task.h>
110#include <kern/thread.h>
111#include <kern/zalloc.h>
112#include <kern/kalloc.h>
113#include <kern/processor.h>
114#include <kern/sched_prim.h>	/* for thread_wakeup */
115#include <kern/ipc_tt.h>
116#include <kern/host.h>
117#include <kern/clock.h>
118#include <kern/timer.h>
119#include <kern/assert.h>
120#include <kern/sync_lock.h>
121#include <kern/affinity.h>
122#include <kern/exc_resource.h>
123#if CONFIG_TELEMETRY
124#include <kern/telemetry.h>
125#endif
126
127#include <vm/pmap.h>
128#include <vm/vm_map.h>
129#include <vm/vm_kern.h>		/* for kernel_map, ipc_kernel_map */
130#include <vm/vm_pageout.h>
131#include <vm/vm_protos.h>
132#include <vm/vm_purgeable_internal.h>
133
134#include <sys/resource.h>
135/*
136 * Exported interfaces
137 */
138
139#include <mach/task_server.h>
140#include <mach/mach_host_server.h>
141#include <mach/host_security_server.h>
142#include <mach/mach_port_server.h>
143#include <mach/security_server.h>
144
145#include <vm/vm_shared_region.h>
146
147#if CONFIG_MACF_MACH
148#include <security/mac_mach_internal.h>
149#endif
150
151#if CONFIG_COUNTERS
152#include <pmc/pmc.h>
153#endif /* CONFIG_COUNTERS */
154
155#include <libkern/OSDebug.h>
156#include <libkern/OSAtomic.h>
157
158task_t			kernel_task;
159zone_t			task_zone;
160lck_attr_t      task_lck_attr;
161lck_grp_t       task_lck_grp;
162lck_grp_attr_t  task_lck_grp_attr;
163
164/* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
165int audio_active = 0;
166
167zinfo_usage_store_t tasks_tkm_private;
168zinfo_usage_store_t tasks_tkm_shared;
169
170/* A container to accumulate statistics for expired tasks */
171expired_task_statistics_t		dead_task_statistics;
172lck_spin_t		dead_task_statistics_lock;
173
174static ledger_template_t task_ledger_template = NULL;
175struct _task_ledger_indices task_ledgers __attribute__((used)) = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
176void init_task_ledgers(void);
177void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
178void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
179void __attribute__((noinline)) THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void);
180void __attribute__((noinline)) THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb);
181int coredump(void *core_proc, int reserve_mb, int ignore_ulimit);
182
183kern_return_t task_suspend_internal(task_t);
184kern_return_t task_resume_internal(task_t);
185
186void proc_init_cpumon_params(void);
187
188// Warn tasks when they hit 80% of their memory limit.
189#define	PHYS_FOOTPRINT_WARNING_LEVEL 80
190
191#define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT		150 /* wakeups per second */
192#define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL	300 /* in seconds. */
193
194/*
195 * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
196 *
197 * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
198 *  stacktraces, aka micro-stackshots)
199 */
200#define	TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER	70
201
202int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
203int task_wakeups_monitor_rate;     /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
204
205int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
206
207int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
208
209int max_task_footprint = 0; /* Per-task limit on physical memory consumption */
210int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
211
212int hwm_user_cores = 0; /* high watermark violations generate user core files */
213
214#ifdef MACH_BSD
215extern void	proc_getexecutableuuid(void *, unsigned char *, unsigned long);
216extern int	proc_pid(struct proc *p);
217extern int	proc_selfpid(void);
218extern char	*proc_name_address(struct proc *p);
219#if CONFIG_JETSAM
220extern void	memorystatus_on_ledger_footprint_exceeded(int warning, const int max_footprint_mb);
221#endif
222#endif
223
224/* Forwards */
225
226void		task_hold_locked(
227			task_t		task);
228void		task_wait_locked(
229			task_t		task,
230			boolean_t	until_not_runnable);
231void		task_release_locked(
232			task_t		task);
233void		task_free(
234			task_t		task );
235void		task_synchronizer_destroy_all(
236			task_t		task);
237
238int check_for_tasksuspend(
239			task_t task);
240
241void
242task_backing_store_privileged(
243			task_t task)
244{
245	task_lock(task);
246	task->priv_flags |= VM_BACKING_STORE_PRIV;
247	task_unlock(task);
248	return;
249}
250
251
252void
253task_set_64bit(
254		task_t task,
255		boolean_t is64bit)
256{
257#if defined(__i386__) || defined(__x86_64__)
258	thread_t thread;
259#endif /* defined(__i386__) || defined(__x86_64__) */
260
261	task_lock(task);
262
263	if (is64bit) {
264		if (task_has_64BitAddr(task))
265			goto out;
266		task_set_64BitAddr(task);
267	} else {
268		if ( !task_has_64BitAddr(task))
269			goto out;
270		task_clear_64BitAddr(task);
271	}
272	/* FIXME: On x86, the thread save state flavor can diverge from the
273	 * task's 64-bit feature flag due to the 32-bit/64-bit register save
274	 * state dichotomy. Since we can be pre-empted in this interval,
275	 * certain routines may observe the thread as being in an inconsistent
276	 * state with respect to its task's 64-bitness.
277	 */
278
279#if defined(__i386__) || defined(__x86_64__)
280	queue_iterate(&task->threads, thread, thread_t, task_threads) {
281		thread_mtx_lock(thread);
282		machine_thread_switch_addrmode(thread);
283		thread_mtx_unlock(thread);
284	}
285#endif /* defined(__i386__) || defined(__x86_64__) */
286
287out:
288	task_unlock(task);
289}
290
291
292void
293task_set_dyld_info(task_t task, mach_vm_address_t addr, mach_vm_size_t size)
294{
295	task_lock(task);
296	task->all_image_info_addr = addr;
297	task->all_image_info_size = size;
298	task_unlock(task);
299}
300
301#if TASK_REFERENCE_LEAK_DEBUG
302#include <kern/btlog.h>
303
304decl_simple_lock_data(static,task_ref_lock);
305static btlog_t *task_ref_btlog;
306#define TASK_REF_OP_INCR	0x1
307#define TASK_REF_OP_DECR	0x2
308
309#define TASK_REF_BTDEPTH	7
310
311static void
312task_ref_lock_lock(void *context)
313{
314	simple_lock((simple_lock_t)context);
315}
316static void
317task_ref_lock_unlock(void *context)
318{
319	simple_unlock((simple_lock_t)context);
320}
321
322void
323task_reference_internal(task_t task)
324{
325	void *       bt[TASK_REF_BTDEPTH];
326	int             numsaved = 0;
327
328	numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
329
330	(void)hw_atomic_add(&(task)->ref_count, 1);
331	btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
332					bt, numsaved);
333}
334
335uint32_t
336task_deallocate_internal(task_t task)
337{
338	void *       bt[TASK_REF_BTDEPTH];
339	int             numsaved = 0;
340
341	numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
342
343	btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
344					bt, numsaved);
345	return hw_atomic_sub(&(task)->ref_count, 1);
346}
347
348#endif /* TASK_REFERENCE_LEAK_DEBUG */
349
350void
351task_init(void)
352{
353
354	lck_grp_attr_setdefault(&task_lck_grp_attr);
355	lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
356	lck_attr_setdefault(&task_lck_attr);
357	lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
358
359	task_zone = zinit(
360			sizeof(struct task),
361			task_max * sizeof(struct task),
362			TASK_CHUNK * sizeof(struct task),
363			"tasks");
364
365	zone_change(task_zone, Z_NOENCRYPT, TRUE);
366
367	/*
368	 * Configure per-task memory limit. The boot arg takes precedence over the
369	 * device tree.
370	 */
371	if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint,
372			sizeof (max_task_footprint))) {
373		max_task_footprint = 0;
374	}
375
376	if (max_task_footprint == 0) {
377		/*
378		 * No limit was found in boot-args, so go look in the device tree.
379		 */
380		if (!PE_get_default("kern.max_task_pmem", &max_task_footprint,
381				sizeof(max_task_footprint))) {
382			max_task_footprint = 0;
383		}
384	}
385
386	if (max_task_footprint != 0) {
387#if CONFIG_JETSAM
388		if (max_task_footprint < 50) {
389				printf("Warning: max_task_pmem %d below minimum.\n",
390				max_task_footprint);
391				max_task_footprint = 50;
392		}
393		printf("Limiting task physical memory footprint to %d MB\n",
394			max_task_footprint);
395		max_task_footprint *= 1024 * 1024; // Convert MB to bytes
396#else
397		printf("Warning: max_task_footprint specified, but jetsam not configured; ignoring.\n");
398#endif
399	}
400
401	if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
402			sizeof (hwm_user_cores))) {
403		hwm_user_cores = 0;
404	}
405
406	proc_init_cpumon_params();
407
408	if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) {
409		task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
410	}
411
412	if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof (task_wakeups_monitor_interval))) {
413		task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
414	}
415
416	if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
417		sizeof (task_wakeups_monitor_ustackshots_trigger_pct))) {
418		task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
419	}
420
421	if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
422		sizeof (disable_exc_resource))) {
423		disable_exc_resource = 0;
424	}
425
426	init_task_ledgers();
427
428#if TASK_REFERENCE_LEAK_DEBUG
429	simple_lock_init(&task_ref_lock, 0);
430	task_ref_btlog = btlog_create(100000,
431								  TASK_REF_BTDEPTH,
432								  task_ref_lock_lock,
433								  task_ref_lock_unlock,
434								  &task_ref_lock);
435	assert(task_ref_btlog);
436#endif
437
438	/*
439	 * Create the kernel task as the first task.
440	 */
441#ifdef __LP64__
442	if (task_create_internal(TASK_NULL, FALSE, TRUE, &kernel_task) != KERN_SUCCESS)
443#else
444	if (task_create_internal(TASK_NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS)
445#endif
446		panic("task_init\n");
447
448	vm_map_deallocate(kernel_task->map);
449	kernel_task->map = kernel_map;
450	lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
451}
452
453/*
454 * Create a task running in the kernel address space.  It may
455 * have its own map of size mem_size and may have ipc privileges.
456 */
457kern_return_t
458kernel_task_create(
459	__unused task_t		parent_task,
460	__unused vm_offset_t		map_base,
461	__unused vm_size_t		map_size,
462	__unused task_t		*child_task)
463{
464	return (KERN_INVALID_ARGUMENT);
465}
466
467kern_return_t
468task_create(
469	task_t				parent_task,
470	__unused ledger_port_array_t	ledger_ports,
471	__unused mach_msg_type_number_t	num_ledger_ports,
472	__unused boolean_t		inherit_memory,
473	__unused task_t			*child_task)	/* OUT */
474{
475	if (parent_task == TASK_NULL)
476		return(KERN_INVALID_ARGUMENT);
477
478	/*
479	 * No longer supported: too many calls assume that a task has a valid
480	 * process attached.
481	 */
482	return(KERN_FAILURE);
483}
484
485kern_return_t
486host_security_create_task_token(
487	host_security_t			host_security,
488	task_t				parent_task,
489	__unused security_token_t	sec_token,
490	__unused audit_token_t		audit_token,
491	__unused host_priv_t		host_priv,
492	__unused ledger_port_array_t	ledger_ports,
493	__unused mach_msg_type_number_t	num_ledger_ports,
494	__unused boolean_t		inherit_memory,
495	__unused task_t			*child_task)	/* OUT */
496{
497	if (parent_task == TASK_NULL)
498		return(KERN_INVALID_ARGUMENT);
499
500	if (host_security == HOST_NULL)
501		return(KERN_INVALID_SECURITY);
502
503	/*
504	 * No longer supported.
505	 */
506	return(KERN_FAILURE);
507}
508
509/*
510 * Task ledgers
511 * ------------
512 *
513 * phys_footprint
514 *   Physical footprint: This is the sum of:
515 *     + phys_mem [task's resident memory]
516 *     + phys_compressed
517 *     + iokit_mem
518 *
519 * iokit_mem
520 *   IOKit mappings: The total size of all IOKit mappings in this task [regardless of clean/dirty state].
521 *
522 * phys_compressed
523 *   Physical compressed: Amount of this task's resident memory which is held by the compressor.
524 *   Such memory is no longer actually resident for the task [i.e., resident in its pmap],
525 *   and could be either decompressed back into memory, or paged out to storage, depending
526 *   on our implementation.
527 */
528void
529init_task_ledgers(void)
530{
531	ledger_template_t t;
532
533	assert(task_ledger_template == NULL);
534	assert(kernel_task == TASK_NULL);
535
536	if ((t = ledger_template_create("Per-task ledger")) == NULL)
537		panic("couldn't create task ledger template");
538
539	task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
540	task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
541	    "physmem", "bytes");
542	task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
543	    "bytes");
544	task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
545	    "bytes");
546	task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
547	    "bytes");
548	task_ledgers.iokit_mem = ledger_entry_add(t, "iokit_mem", "mappings",
549 	    "bytes");
550	task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
551 	    "bytes");
552	task_ledgers.phys_compressed = ledger_entry_add(t, "phys_compressed", "physmem",
553 	    "bytes");
554	task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
555 	    "count");
556	task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
557 	    "count");
558
559	if ((task_ledgers.cpu_time < 0) || (task_ledgers.tkm_private < 0) ||
560	    (task_ledgers.tkm_shared < 0) || (task_ledgers.phys_mem < 0) ||
561	    (task_ledgers.wired_mem < 0) || (task_ledgers.iokit_mem < 0) ||
562	    (task_ledgers.phys_footprint < 0) || (task_ledgers.phys_compressed < 0) ||
563	    (task_ledgers.platform_idle_wakeups < 0) || (task_ledgers.interrupt_wakeups < 0)) {
564		panic("couldn't create entries for task ledger template");
565	}
566
567	ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
568
569#if CONFIG_JETSAM
570	ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
571#endif
572
573	ledger_set_callback(t, task_ledgers.interrupt_wakeups,
574		task_wakeups_rate_exceeded, NULL, NULL);
575
576	task_ledger_template = t;
577}
578
579kern_return_t
580task_create_internal(
581	task_t		parent_task,
582	boolean_t	inherit_memory,
583	boolean_t	is_64bit,
584	task_t		*child_task)		/* OUT */
585{
586	task_t			new_task;
587	vm_shared_region_t	shared_region;
588	ledger_t		ledger = NULL;
589
590	new_task = (task_t) zalloc(task_zone);
591
592	if (new_task == TASK_NULL)
593		return(KERN_RESOURCE_SHORTAGE);
594
595	/* one ref for just being alive; one for our caller */
596	new_task->ref_count = 2;
597
598	/* allocate with active entries */
599	assert(task_ledger_template != NULL);
600	if ((ledger = ledger_instantiate(task_ledger_template,
601			LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
602		zfree(task_zone, new_task);
603		return(KERN_RESOURCE_SHORTAGE);
604	}
605
606	new_task->ledger = ledger;
607
608	/* if inherit_memory is true, parent_task MUST not be NULL */
609	if (inherit_memory)
610		new_task->map = vm_map_fork(ledger, parent_task->map);
611	else
612		new_task->map = vm_map_create(pmap_create(ledger, 0, is_64bit),
613				(vm_map_offset_t)(VM_MIN_ADDRESS),
614				(vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
615
616	/* Inherit memlock limit from parent */
617	if (parent_task)
618		vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
619
620	lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
621	queue_init(&new_task->threads);
622	new_task->suspend_count = 0;
623	new_task->thread_count = 0;
624	new_task->active_thread_count = 0;
625	new_task->user_stop_count = 0;
626	new_task->legacy_stop_count = 0;
627	new_task->active = TRUE;
628	new_task->halting = FALSE;
629	new_task->user_data = NULL;
630	new_task->faults = 0;
631	new_task->cow_faults = 0;
632	new_task->pageins = 0;
633	new_task->messages_sent = 0;
634	new_task->messages_received = 0;
635	new_task->syscalls_mach = 0;
636	new_task->priv_flags = 0;
637	new_task->syscalls_unix=0;
638	new_task->c_switch = new_task->p_switch = new_task->ps_switch = 0;
639	new_task->t_flags = 0;
640	new_task->importance = 0;
641
642	zinfo_task_init(new_task);
643
644#ifdef MACH_BSD
645	new_task->bsd_info = NULL;
646#endif /* MACH_BSD */
647
648#if CONFIG_JETSAM
649	if (max_task_footprint != 0) {
650		ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
651	}
652#endif
653
654	if (task_wakeups_monitor_rate != 0) {
655		uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
656		int32_t  rate; // Ignored because of WAKEMON_SET_DEFAULTS
657		task_wakeups_monitor_ctl(new_task, &flags, &rate);
658	}
659
660#if defined(__i386__) || defined(__x86_64__)
661	new_task->i386_ldt = 0;
662#endif
663
664	new_task->task_debug = NULL;
665
666	queue_init(&new_task->semaphore_list);
667	new_task->semaphores_owned = 0;
668
669#if CONFIG_MACF_MACH
670	new_task->label = labelh_new(1);
671	mac_task_label_init (&new_task->maclabel);
672#endif
673
674	ipc_task_init(new_task, parent_task);
675
676	new_task->total_user_time = 0;
677	new_task->total_system_time = 0;
678
679	new_task->vtimers = 0;
680
681	new_task->shared_region = NULL;
682
683	new_task->affinity_space = NULL;
684
685#if CONFIG_COUNTERS
686	new_task->t_chud = 0U;
687#endif
688
689	new_task->pidsuspended = FALSE;
690	new_task->frozen = FALSE;
691	new_task->changing_freeze_state = FALSE;
692	new_task->rusage_cpu_flags = 0;
693	new_task->rusage_cpu_percentage = 0;
694	new_task->rusage_cpu_interval = 0;
695	new_task->rusage_cpu_deadline = 0;
696	new_task->rusage_cpu_callt = NULL;
697#if MACH_ASSERT
698	new_task->suspends_outstanding = 0;
699#endif
700
701
702	new_task->low_mem_notified_warn = 0;
703	new_task->low_mem_notified_critical = 0;
704	new_task->purged_memory_warn = 0;
705	new_task->purged_memory_critical = 0;
706	new_task->mem_notify_reserved = 0;
707#if IMPORTANCE_INHERITANCE
708	new_task->imp_receiver = 0;
709	new_task->imp_donor = 0;
710	new_task->imp_reserved = 0;
711	new_task->task_imp_assertcnt = 0;
712	new_task->task_imp_externcnt = 0;
713#endif /* IMPORTANCE_INHERITANCE */
714
715#if	defined(__x86_64__)
716	new_task->uexc_range_start = new_task->uexc_range_size = new_task->uexc_handler = 0;
717#endif
718
719	new_task->requested_policy = default_task_requested_policy;
720	new_task->effective_policy = default_task_effective_policy;
721	new_task->pended_policy    = default_task_pended_policy;
722
723	if (parent_task != TASK_NULL) {
724		new_task->sec_token = parent_task->sec_token;
725		new_task->audit_token = parent_task->audit_token;
726
727		/* inherit the parent's shared region */
728		shared_region = vm_shared_region_get(parent_task);
729		vm_shared_region_set(new_task, shared_region);
730
731		if(task_has_64BitAddr(parent_task))
732			task_set_64BitAddr(new_task);
733		new_task->all_image_info_addr = parent_task->all_image_info_addr;
734		new_task->all_image_info_size = parent_task->all_image_info_size;
735
736#if defined(__i386__) || defined(__x86_64__)
737		if (inherit_memory && parent_task->i386_ldt)
738			new_task->i386_ldt = user_ldt_copy(parent_task->i386_ldt);
739#endif
740		if (inherit_memory && parent_task->affinity_space)
741			task_affinity_create(parent_task, new_task);
742
743		new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
744
745#if IMPORTANCE_INHERITANCE
746		new_task->imp_donor = parent_task->imp_donor;
747		/* Embedded doesn't want this to inherit */
748		new_task->imp_receiver = parent_task->imp_receiver;
749#endif /* IMPORTANCE_INHERITANCE */
750
751		new_task->requested_policy.t_apptype     = parent_task->requested_policy.t_apptype;
752
753		new_task->requested_policy.int_darwinbg  = parent_task->requested_policy.int_darwinbg;
754		new_task->requested_policy.ext_darwinbg  = parent_task->requested_policy.ext_darwinbg;
755		new_task->requested_policy.int_iotier    = parent_task->requested_policy.int_iotier;
756		new_task->requested_policy.ext_iotier    = parent_task->requested_policy.ext_iotier;
757		new_task->requested_policy.int_iopassive = parent_task->requested_policy.int_iopassive;
758		new_task->requested_policy.ext_iopassive = parent_task->requested_policy.ext_iopassive;
759		new_task->requested_policy.bg_iotier     = parent_task->requested_policy.bg_iotier;
760		new_task->requested_policy.terminated    = parent_task->requested_policy.terminated;
761
762		task_policy_create(new_task, parent_task->requested_policy.t_boosted);
763	} else {
764		new_task->sec_token = KERNEL_SECURITY_TOKEN;
765		new_task->audit_token = KERNEL_AUDIT_TOKEN;
766#ifdef __LP64__
767		if(is_64bit)
768			task_set_64BitAddr(new_task);
769#endif
770		new_task->all_image_info_addr = (mach_vm_address_t)0;
771		new_task->all_image_info_size = (mach_vm_size_t)0;
772
773		new_task->pset_hint = PROCESSOR_SET_NULL;
774	}
775
776	if (kernel_task == TASK_NULL) {
777		new_task->priority = BASEPRI_KERNEL;
778		new_task->max_priority = MAXPRI_KERNEL;
779	} else if (proc_get_effective_task_policy(new_task, TASK_POLICY_LOWPRI_CPU)) {
780		new_task->priority = MAXPRI_THROTTLE;
781		new_task->max_priority = MAXPRI_THROTTLE;
782	} else {
783		new_task->priority = BASEPRI_DEFAULT;
784		new_task->max_priority = MAXPRI_USER;
785	}
786
787	bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
788	new_task->task_timer_wakeups_bin_1 = new_task->task_timer_wakeups_bin_2 = 0;
789	lck_mtx_lock(&tasks_threads_lock);
790	queue_enter(&tasks, new_task, task_t, tasks);
791	tasks_count++;
792	lck_mtx_unlock(&tasks_threads_lock);
793
794	if (vm_backing_store_low && parent_task != NULL)
795		new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV);
796
797	new_task->task_volatile_objects = 0;
798
799	ipc_task_enable(new_task);
800
801	*child_task = new_task;
802	return(KERN_SUCCESS);
803}
804
805/*
806 *	task_deallocate:
807 *
808 *	Drop a reference on a task.
809 */
810void
811task_deallocate(
812	task_t		task)
813{
814	ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
815
816	if (task == TASK_NULL)
817	    return;
818
819	if (task_deallocate_internal(task) > 0)
820		return;
821
822	lck_mtx_lock(&tasks_threads_lock);
823	queue_remove(&terminated_tasks, task, task_t, tasks);
824	terminated_tasks_count--;
825	lck_mtx_unlock(&tasks_threads_lock);
826
827	/*
828	 *	Give the machine dependent code a chance
829	 *	to perform cleanup before ripping apart
830	 *	the task.
831	 */
832	machine_task_terminate(task);
833
834	ipc_task_terminate(task);
835
836	if (task->affinity_space)
837		task_affinity_deallocate(task);
838
839	vm_map_deallocate(task->map);
840	is_release(task->itk_space);
841
842	ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
843	                   &interrupt_wakeups, &debit);
844	ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
845	                   &platform_idle_wakeups, &debit);
846
847	/* Accumulate statistics for dead tasks */
848	lck_spin_lock(&dead_task_statistics_lock);
849	dead_task_statistics.total_user_time += task->total_user_time;
850	dead_task_statistics.total_system_time += task->total_system_time;
851
852	dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
853	dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
854
855	dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
856	dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
857
858	lck_spin_unlock(&dead_task_statistics_lock);
859	lck_mtx_destroy(&task->lock, &task_lck_grp);
860
861#if CONFIG_MACF_MACH
862	labelh_release(task->label);
863#endif
864
865	if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
866	    &debit)) {
867		OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
868		OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
869	}
870	if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
871	    &debit)) {
872		OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
873		OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
874	}
875	ledger_dereference(task->ledger);
876	zinfo_task_free(task);
877
878#if TASK_REFERENCE_LEAK_DEBUG
879	btlog_remove_entries_for_element(task_ref_btlog, task);
880#endif
881
882	if (task->task_volatile_objects) {
883		/*
884		 * This task still "owns" some volatile VM objects.
885		 * Disown them now to avoid leaving them pointing back at
886		 * an invalid task.
887		 */
888		vm_purgeable_disown(task);
889		assert(task->task_volatile_objects == 0);
890	}
891
892	zfree(task_zone, task);
893}
894
895/*
896 *	task_name_deallocate:
897 *
898 *	Drop a reference on a task name.
899 */
900void
901task_name_deallocate(
902	task_name_t		task_name)
903{
904	return(task_deallocate((task_t)task_name));
905}
906
907/*
908 *	task_suspension_token_deallocate:
909 *
910 *	Drop a reference on a task suspension token.
911 */
912void
913task_suspension_token_deallocate(
914	task_suspension_token_t		token)
915{
916	return(task_deallocate((task_t)token));
917}
918
919/*
920 *	task_terminate:
921 *
922 *	Terminate the specified task.  See comments on thread_terminate
923 *	(kern/thread.c) about problems with terminating the "current task."
924 */
925
926kern_return_t
927task_terminate(
928	task_t		task)
929{
930	if (task == TASK_NULL)
931		return (KERN_INVALID_ARGUMENT);
932
933	if (task->bsd_info)
934		return (KERN_FAILURE);
935
936	return (task_terminate_internal(task));
937}
938
939kern_return_t
940task_terminate_internal(
941	task_t			task)
942{
943	thread_t			thread, self;
944	task_t				self_task;
945	boolean_t			interrupt_save;
946
947	assert(task != kernel_task);
948
949	self = current_thread();
950	self_task = self->task;
951
952	/*
953	 *	Get the task locked and make sure that we are not racing
954	 *	with someone else trying to terminate us.
955	 */
956	if (task == self_task)
957		task_lock(task);
958	else
959	if (task < self_task) {
960		task_lock(task);
961		task_lock(self_task);
962	}
963	else {
964		task_lock(self_task);
965		task_lock(task);
966	}
967
968	if (!task->active) {
969		/*
970		 *	Task is already being terminated.
971		 *	Just return an error. If we are dying, this will
972		 *	just get us to our AST special handler and that
973		 *	will get us to finalize the termination of ourselves.
974		 */
975		task_unlock(task);
976		if (self_task != task)
977			task_unlock(self_task);
978
979		return (KERN_FAILURE);
980	}
981
982#if MACH_ASSERT
983	if (task->suspends_outstanding != 0) {
984		printf("WARNING: %s (%d) exiting with %d outstanding suspensions\n",
985			proc_name_address(task->bsd_info), proc_pid(task->bsd_info),
986			task->suspends_outstanding);
987	}
988#endif
989
990	if (self_task != task)
991		task_unlock(self_task);
992
993	/*
994	 * Make sure the current thread does not get aborted out of
995	 * the waits inside these operations.
996	 */
997	interrupt_save = thread_interrupt_level(THREAD_UNINT);
998
999	/*
1000	 *	Indicate that we want all the threads to stop executing
1001	 *	at user space by holding the task (we would have held
1002	 *	each thread independently in thread_terminate_internal -
1003	 *	but this way we may be more likely to already find it
1004	 *	held there).  Mark the task inactive, and prevent
1005	 *	further task operations via the task port.
1006	 */
1007	task_hold_locked(task);
1008	task->active = FALSE;
1009	ipc_task_disable(task);
1010
1011#if CONFIG_TELEMETRY
1012	/*
1013	 * Notify telemetry that this task is going away.
1014	 */
1015	telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
1016#endif
1017
1018	/*
1019	 *	Terminate each thread in the task.
1020	 */
1021	queue_iterate(&task->threads, thread, thread_t, task_threads) {
1022			thread_terminate_internal(thread);
1023	}
1024
1025	task_unlock(task);
1026
1027
1028	/*
1029	 *	Destroy all synchronizers owned by the task.
1030	 */
1031	task_synchronizer_destroy_all(task);
1032
1033	/*
1034	 *	Destroy the IPC space, leaving just a reference for it.
1035	 */
1036	ipc_space_terminate(task->itk_space);
1037
1038	if (vm_map_has_4GB_pagezero(task->map))
1039		vm_map_clear_4GB_pagezero(task->map);
1040
1041	/*
1042	 * If the current thread is a member of the task
1043	 * being terminated, then the last reference to
1044	 * the task will not be dropped until the thread
1045	 * is finally reaped.  To avoid incurring the
1046	 * expense of removing the address space regions
1047	 * at reap time, we do it explictly here.
1048	 */
1049	vm_map_remove(task->map,
1050		      task->map->min_offset,
1051		      task->map->max_offset,
1052		      VM_MAP_NO_FLAGS);
1053
1054	/* release our shared region */
1055	vm_shared_region_set(task, NULL);
1056
1057	lck_mtx_lock(&tasks_threads_lock);
1058	queue_remove(&tasks, task, task_t, tasks);
1059	queue_enter(&terminated_tasks, task, task_t, tasks);
1060	tasks_count--;
1061	terminated_tasks_count++;
1062	lck_mtx_unlock(&tasks_threads_lock);
1063
1064	/*
1065	 * We no longer need to guard against being aborted, so restore
1066	 * the previous interruptible state.
1067	 */
1068	thread_interrupt_level(interrupt_save);
1069
1070	/*
1071	 * Get rid of the task active reference on itself.
1072	 */
1073	task_deallocate(task);
1074
1075	return (KERN_SUCCESS);
1076}
1077
1078/*
1079 * task_start_halt:
1080 *
1081 * 	Shut the current task down (except for the current thread) in
1082 *	preparation for dramatic changes to the task (probably exec).
1083 *	We hold the task and mark all other threads in the task for
1084 *	termination.
1085 */
1086kern_return_t
1087task_start_halt(
1088	task_t		task)
1089{
1090	thread_t	thread, self;
1091
1092	assert(task != kernel_task);
1093
1094	self = current_thread();
1095
1096	if (task != self->task)
1097		return (KERN_INVALID_ARGUMENT);
1098
1099	task_lock(task);
1100
1101	if (task->halting || !task->active || !self->active) {
1102		/*
1103		 *	Task or current thread is already being terminated.
1104		 *	Hurry up and return out of the current kernel context
1105		 *	so that we run our AST special handler to terminate
1106		 *	ourselves.
1107		 */
1108		task_unlock(task);
1109
1110		return (KERN_FAILURE);
1111	}
1112
1113	task->halting = TRUE;
1114
1115	if (task->thread_count > 1) {
1116
1117		/*
1118		 * Mark all the threads to keep them from starting any more
1119		 * user-level execution.  The thread_terminate_internal code
1120		 * would do this on a thread by thread basis anyway, but this
1121		 * gives us a better chance of not having to wait there.
1122		 */
1123		task_hold_locked(task);
1124
1125		/*
1126		 *	Terminate all the other threads in the task.
1127		 */
1128		queue_iterate(&task->threads, thread, thread_t, task_threads) {
1129			if (thread != self)
1130				thread_terminate_internal(thread);
1131		}
1132
1133		task_release_locked(task);
1134	}
1135	task_unlock(task);
1136	return KERN_SUCCESS;
1137}
1138
1139
1140/*
1141 * task_complete_halt:
1142 *
1143 *	Complete task halt by waiting for threads to terminate, then clean
1144 *	up task resources (VM, port namespace, etc...) and then let the
1145 *	current thread go in the (practically empty) task context.
1146 */
1147void
1148task_complete_halt(task_t task)
1149{
1150	task_lock(task);
1151	assert(task->halting);
1152	assert(task == current_task());
1153
1154	/*
1155	 *	Wait for the other threads to get shut down.
1156	 *      When the last other thread is reaped, we'll be
1157	 *	woken up.
1158	 */
1159	if (task->thread_count > 1) {
1160		assert_wait((event_t)&task->halting, THREAD_UNINT);
1161		task_unlock(task);
1162		thread_block(THREAD_CONTINUE_NULL);
1163	} else {
1164		task_unlock(task);
1165	}
1166
1167	/*
1168	 *	Give the machine dependent code a chance
1169	 *	to perform cleanup of task-level resources
1170	 *	associated with the current thread before
1171	 *	ripping apart the task.
1172	 */
1173	machine_task_terminate(task);
1174
1175	/*
1176	 *	Destroy all synchronizers owned by the task.
1177	 */
1178	task_synchronizer_destroy_all(task);
1179
1180	/*
1181	 *	Destroy the contents of the IPC space, leaving just
1182	 *	a reference for it.
1183	 */
1184	ipc_space_clean(task->itk_space);
1185
1186	/*
1187	 * Clean out the address space, as we are going to be
1188	 * getting a new one.
1189	 */
1190	vm_map_remove(task->map, task->map->min_offset,
1191		      task->map->max_offset, VM_MAP_NO_FLAGS);
1192
1193	task->halting = FALSE;
1194}
1195
1196/*
1197 *	task_hold_locked:
1198 *
1199 *	Suspend execution of the specified task.
1200 *	This is a recursive-style suspension of the task, a count of
1201 *	suspends is maintained.
1202 *
1203 * 	CONDITIONS: the task is locked and active.
1204 */
1205void
1206task_hold_locked(
1207	register task_t		task)
1208{
1209	register thread_t	thread;
1210
1211	assert(task->active);
1212
1213	if (task->suspend_count++ > 0)
1214		return;
1215
1216	/*
1217	 *	Iterate through all the threads and hold them.
1218	 */
1219	queue_iterate(&task->threads, thread, thread_t, task_threads) {
1220		thread_mtx_lock(thread);
1221		thread_hold(thread);
1222		thread_mtx_unlock(thread);
1223	}
1224}
1225
1226/*
1227 *	task_hold:
1228 *
1229 *	Same as the internal routine above, except that is must lock
1230 *	and verify that the task is active.  This differs from task_suspend
1231 *	in that it places a kernel hold on the task rather than just a
1232 *	user-level hold.  This keeps users from over resuming and setting
1233 *	it running out from under the kernel.
1234 *
1235 * 	CONDITIONS: the caller holds a reference on the task
1236 */
1237kern_return_t
1238task_hold(
1239	register task_t		task)
1240{
1241	if (task == TASK_NULL)
1242		return (KERN_INVALID_ARGUMENT);
1243
1244	task_lock(task);
1245
1246	if (!task->active) {
1247		task_unlock(task);
1248
1249		return (KERN_FAILURE);
1250	}
1251
1252	task_hold_locked(task);
1253	task_unlock(task);
1254
1255	return (KERN_SUCCESS);
1256}
1257
1258kern_return_t
1259task_wait(
1260		task_t		task,
1261		boolean_t	until_not_runnable)
1262{
1263	if (task == TASK_NULL)
1264		return (KERN_INVALID_ARGUMENT);
1265
1266	task_lock(task);
1267
1268	if (!task->active) {
1269		task_unlock(task);
1270
1271		return (KERN_FAILURE);
1272	}
1273
1274	task_wait_locked(task, until_not_runnable);
1275	task_unlock(task);
1276
1277	return (KERN_SUCCESS);
1278}
1279
1280/*
1281 *	task_wait_locked:
1282 *
1283 *	Wait for all threads in task to stop.
1284 *
1285 * Conditions:
1286 *	Called with task locked, active, and held.
1287 */
1288void
1289task_wait_locked(
1290	register task_t		task,
1291	boolean_t		until_not_runnable)
1292{
1293	register thread_t	thread, self;
1294
1295	assert(task->active);
1296	assert(task->suspend_count > 0);
1297
1298	self = current_thread();
1299
1300	/*
1301	 *	Iterate through all the threads and wait for them to
1302	 *	stop.  Do not wait for the current thread if it is within
1303	 *	the task.
1304	 */
1305	queue_iterate(&task->threads, thread, thread_t, task_threads) {
1306		if (thread != self)
1307			thread_wait(thread, until_not_runnable);
1308	}
1309}
1310
1311/*
1312 *	task_release_locked:
1313 *
1314 *	Release a kernel hold on a task.
1315 *
1316 * 	CONDITIONS: the task is locked and active
1317 */
1318void
1319task_release_locked(
1320	register task_t		task)
1321{
1322	register thread_t	thread;
1323
1324	assert(task->active);
1325	assert(task->suspend_count > 0);
1326
1327	if (--task->suspend_count > 0)
1328		return;
1329
1330	queue_iterate(&task->threads, thread, thread_t, task_threads) {
1331		thread_mtx_lock(thread);
1332		thread_release(thread);
1333		thread_mtx_unlock(thread);
1334	}
1335}
1336
1337/*
1338 *	task_release:
1339 *
1340 *	Same as the internal routine above, except that it must lock
1341 *	and verify that the task is active.
1342 *
1343 * 	CONDITIONS: The caller holds a reference to the task
1344 */
1345kern_return_t
1346task_release(
1347	task_t		task)
1348{
1349	if (task == TASK_NULL)
1350		return (KERN_INVALID_ARGUMENT);
1351
1352	task_lock(task);
1353
1354	if (!task->active) {
1355		task_unlock(task);
1356
1357		return (KERN_FAILURE);
1358	}
1359
1360	task_release_locked(task);
1361	task_unlock(task);
1362
1363	return (KERN_SUCCESS);
1364}
1365
1366kern_return_t
1367task_threads(
1368	task_t					task,
1369	thread_act_array_t		*threads_out,
1370	mach_msg_type_number_t	*count)
1371{
1372	mach_msg_type_number_t	actual;
1373	thread_t				*thread_list;
1374	thread_t				thread;
1375	vm_size_t				size, size_needed;
1376	void					*addr;
1377	unsigned int			i, j;
1378
1379	if (task == TASK_NULL)
1380		return (KERN_INVALID_ARGUMENT);
1381
1382	size = 0; addr = NULL;
1383
1384	for (;;) {
1385		task_lock(task);
1386		if (!task->active) {
1387			task_unlock(task);
1388
1389			if (size != 0)
1390				kfree(addr, size);
1391
1392			return (KERN_FAILURE);
1393		}
1394
1395		actual = task->thread_count;
1396
1397		/* do we have the memory we need? */
1398		size_needed = actual * sizeof (mach_port_t);
1399		if (size_needed <= size)
1400			break;
1401
1402		/* unlock the task and allocate more memory */
1403		task_unlock(task);
1404
1405		if (size != 0)
1406			kfree(addr, size);
1407
1408		assert(size_needed > 0);
1409		size = size_needed;
1410
1411		addr = kalloc(size);
1412		if (addr == 0)
1413			return (KERN_RESOURCE_SHORTAGE);
1414	}
1415
1416	/* OK, have memory and the task is locked & active */
1417	thread_list = (thread_t *)addr;
1418
1419	i = j = 0;
1420
1421	for (thread = (thread_t)queue_first(&task->threads); i < actual;
1422				++i, thread = (thread_t)queue_next(&thread->task_threads)) {
1423		thread_reference_internal(thread);
1424		thread_list[j++] = thread;
1425	}
1426
1427	assert(queue_end(&task->threads, (queue_entry_t)thread));
1428
1429	actual = j;
1430	size_needed = actual * sizeof (mach_port_t);
1431
1432	/* can unlock task now that we've got the thread refs */
1433	task_unlock(task);
1434
1435	if (actual == 0) {
1436		/* no threads, so return null pointer and deallocate memory */
1437
1438		*threads_out = NULL;
1439		*count = 0;
1440
1441		if (size != 0)
1442			kfree(addr, size);
1443	}
1444	else {
1445		/* if we allocated too much, must copy */
1446
1447		if (size_needed < size) {
1448			void *newaddr;
1449
1450			newaddr = kalloc(size_needed);
1451			if (newaddr == 0) {
1452				for (i = 0; i < actual; ++i)
1453					thread_deallocate(thread_list[i]);
1454				kfree(addr, size);
1455				return (KERN_RESOURCE_SHORTAGE);
1456			}
1457
1458			bcopy(addr, newaddr, size_needed);
1459			kfree(addr, size);
1460			thread_list = (thread_t *)newaddr;
1461		}
1462
1463		*threads_out = thread_list;
1464		*count = actual;
1465
1466		/* do the conversion that Mig should handle */
1467
1468		for (i = 0; i < actual; ++i)
1469			((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
1470	}
1471
1472	return (KERN_SUCCESS);
1473}
1474
1475#define TASK_HOLD_NORMAL	0
1476#define TASK_HOLD_PIDSUSPEND	1
1477#define TASK_HOLD_LEGACY	2
1478#define TASK_HOLD_LEGACY_ALL	3
1479
1480static kern_return_t
1481place_task_hold    (
1482	register task_t task,
1483	int mode)
1484{
1485	if (!task->active) {
1486		return (KERN_FAILURE);
1487	}
1488
1489	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1490	    MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
1491	    proc_pid(task->bsd_info), ((thread_t)queue_first(&task->threads))->thread_id,
1492	    task->user_stop_count, task->user_stop_count + 1, 0);
1493
1494#if MACH_ASSERT
1495	current_task()->suspends_outstanding++;
1496#endif
1497
1498	if (mode == TASK_HOLD_LEGACY)
1499		task->legacy_stop_count++;
1500
1501	if (task->user_stop_count++ > 0) {
1502		/*
1503		 *	If the stop count was positive, the task is
1504		 *	already stopped and we can exit.
1505		 */
1506		return (KERN_SUCCESS);
1507	}
1508
1509	/*
1510	 * Put a kernel-level hold on the threads in the task (all
1511	 * user-level task suspensions added together represent a
1512	 * single kernel-level hold).  We then wait for the threads
1513	 * to stop executing user code.
1514	 */
1515	task_hold_locked(task);
1516	task_wait_locked(task, FALSE);
1517
1518	return (KERN_SUCCESS);
1519}
1520
1521static kern_return_t
1522release_task_hold    (
1523	register task_t		task,
1524	int           		mode)
1525{
1526	register boolean_t release = FALSE;
1527
1528	if (!task->active) {
1529		return (KERN_FAILURE);
1530	}
1531
1532	if (mode == TASK_HOLD_PIDSUSPEND) {
1533	    if (task->pidsuspended == FALSE) {
1534		    return (KERN_FAILURE);
1535	    }
1536	    task->pidsuspended = FALSE;
1537	}
1538
1539	if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
1540
1541		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1542		    MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_RESUME) | DBG_FUNC_NONE,
1543		    proc_pid(task->bsd_info), ((thread_t)queue_first(&task->threads))->thread_id,
1544		    task->user_stop_count, mode, task->legacy_stop_count);
1545
1546#if MACH_ASSERT
1547		/*
1548		 * This is obviously not robust; if we suspend one task and then resume a different one,
1549		 * we'll fly under the radar. This is only meant to catch the common case of a crashed
1550		 * or buggy suspender.
1551		 */
1552		current_task()->suspends_outstanding--;
1553#endif
1554
1555		if (mode == TASK_HOLD_LEGACY_ALL) {
1556			if (task->legacy_stop_count >= task->user_stop_count) {
1557				task->user_stop_count = 0;
1558				release = TRUE;
1559			} else {
1560				task->user_stop_count -= task->legacy_stop_count;
1561			}
1562			task->legacy_stop_count = 0;
1563		} else {
1564			if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0)
1565				task->legacy_stop_count--;
1566			if (--task->user_stop_count == 0)
1567				release = TRUE;
1568		}
1569	}
1570	else {
1571		return (KERN_FAILURE);
1572	}
1573
1574	/*
1575	 *	Release the task if necessary.
1576	 */
1577	if (release)
1578		task_release_locked(task);
1579
1580    return (KERN_SUCCESS);
1581}
1582
1583
1584/*
1585 *	task_suspend:
1586 *
1587 *	Implement an (old-fashioned) user-level suspension on a task.
1588 *
1589 *	Because the user isn't expecting to have to manage a suspension
1590 *	token, we'll track it for him in the kernel in the form of a naked
1591 *	send right to the task's resume port.  All such send rights
1592 *	account for a single suspension against the task (unlike task_suspend2()
1593 *	where each caller gets a unique suspension count represented by a
1594 *	unique send-once right).
1595 *
1596 * Conditions:
1597 * 	The caller holds a reference to the task
1598 */
1599kern_return_t
1600task_suspend(
1601	register task_t		task)
1602{
1603	kern_return_t	 		kr;
1604	mach_port_t			port, send, old_notify;
1605	mach_port_name_t		name;
1606
1607	if (task == TASK_NULL || task == kernel_task)
1608		return (KERN_INVALID_ARGUMENT);
1609
1610	task_lock(task);
1611
1612	/*
1613	 * Claim a send right on the task resume port, and request a no-senders
1614	 * notification on that port (if none outstanding).
1615	 */
1616	if (task->itk_resume == IP_NULL) {
1617		task->itk_resume = ipc_port_alloc_kernel();
1618		if (!IP_VALID(task->itk_resume))
1619			panic("failed to create resume port");
1620		ipc_kobject_set(task->itk_resume, (ipc_kobject_t)task, IKOT_TASK_RESUME);
1621	}
1622
1623	port = task->itk_resume;
1624	ip_lock(port);
1625	assert(ip_active(port));
1626
1627	send = ipc_port_make_send_locked(port);
1628	assert(IP_VALID(send));
1629
1630	if (port->ip_nsrequest == IP_NULL) {
1631		ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
1632		assert(old_notify == IP_NULL);
1633		/* port unlocked */
1634	} else {
1635		ip_unlock(port);
1636	}
1637
1638	/*
1639	 * place a legacy hold on the task.
1640	 */
1641	kr = place_task_hold(task, TASK_HOLD_LEGACY);
1642	if (kr != KERN_SUCCESS) {
1643		task_unlock(task);
1644		ipc_port_release_send(send);
1645		return kr;
1646	}
1647
1648	task_unlock(task);
1649
1650	/*
1651	 * Copyout the send right into the calling task's IPC space.  It won't know it is there,
1652	 * but we'll look it up when calling a traditional resume.  Any IPC operations that
1653	 * deallocate the send right will auto-release the suspension.
1654	 */
1655	if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send,
1656		MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) {
1657		printf("warning: %s(%d) failed to copyout suspension token for task %s(%d) with error: %d\n",
1658			proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
1659			proc_name_address(task->bsd_info), proc_pid(task->bsd_info), kr);
1660		return (kr);
1661	}
1662
1663	return (kr);
1664}
1665
1666/*
1667 *	task_resume:
1668 *		Release a user hold on a task.
1669 *
1670 * Conditions:
1671 *		The caller holds a reference to the task
1672 */
1673kern_return_t
1674task_resume(
1675	register task_t	task)
1676{
1677	kern_return_t	 kr;
1678	mach_port_name_t resume_port_name;
1679	ipc_entry_t		 resume_port_entry;
1680	ipc_space_t		 space = current_task()->itk_space;
1681
1682	if (task == TASK_NULL || task == kernel_task )
1683		return (KERN_INVALID_ARGUMENT);
1684
1685	/* release a legacy task hold */
1686	task_lock(task);
1687	kr = release_task_hold(task, TASK_HOLD_LEGACY);
1688	task_unlock(task);
1689
1690	is_write_lock(space);
1691	if (is_active(space) && IP_VALID(task->itk_resume) &&
1692	    ipc_hash_lookup(space, (ipc_object_t)task->itk_resume, &resume_port_name, &resume_port_entry) == TRUE) {
1693		/*
1694		 * We found a suspension token in the caller's IPC space. Release a send right to indicate that
1695		 * we are holding one less legacy hold on the task from this caller.  If the release failed,
1696		 * go ahead and drop all the rights, as someone either already released our holds or the task
1697		 * is gone.
1698		 */
1699		if (kr == KERN_SUCCESS)
1700			ipc_right_dealloc(space, resume_port_name, resume_port_entry);
1701		else
1702			ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
1703		/* space unlocked */
1704	} else {
1705		is_write_unlock(space);
1706		if (kr == KERN_SUCCESS)
1707			printf("warning: %s(%d) performed out-of-band resume on %s(%d)\n",
1708			       proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
1709			       proc_name_address(task->bsd_info), proc_pid(task->bsd_info));
1710	}
1711
1712	return kr;
1713}
1714
1715/*
1716 * Suspend the target task.
1717 * Making/holding a token/reference/port is the callers responsibility.
1718 */
1719kern_return_t
1720task_suspend_internal(task_t task)
1721{
1722	kern_return_t	 kr;
1723
1724	if (task == TASK_NULL || task == kernel_task)
1725		return (KERN_INVALID_ARGUMENT);
1726
1727	task_lock(task);
1728	kr = place_task_hold(task, TASK_HOLD_NORMAL);
1729	task_unlock(task);
1730	return (kr);
1731}
1732
1733/*
1734 * Suspend the target task, and return a suspension token. The token
1735 * represents a reference on the suspended task.
1736 */
1737kern_return_t
1738task_suspend2(
1739	register task_t			task,
1740	task_suspension_token_t *suspend_token)
1741{
1742	kern_return_t	 kr;
1743
1744	kr = task_suspend_internal(task);
1745	if (kr != KERN_SUCCESS) {
1746		*suspend_token = TASK_NULL;
1747		return (kr);
1748	}
1749
1750	/*
1751	 * Take a reference on the target task and return that to the caller
1752	 * as a "suspension token," which can be converted into an SO right to
1753	 * the now-suspended task's resume port.
1754	 */
1755	task_reference_internal(task);
1756	*suspend_token = task;
1757
1758	return (KERN_SUCCESS);
1759}
1760
1761/*
1762 * Resume the task
1763 * (reference/token/port management is caller's responsibility).
1764 */
1765kern_return_t
1766task_resume_internal(
1767	register task_suspension_token_t		task)
1768{
1769	kern_return_t kr;
1770
1771	if (task == TASK_NULL || task == kernel_task)
1772		return (KERN_INVALID_ARGUMENT);
1773
1774	task_lock(task);
1775	kr = release_task_hold(task, TASK_HOLD_NORMAL);
1776	task_unlock(task);
1777	return (kr);
1778}
1779
1780/*
1781 * Resume the task using a suspension token. Consumes the token's ref.
1782 */
1783kern_return_t
1784task_resume2(
1785	register task_suspension_token_t		task)
1786{
1787	kern_return_t kr;
1788
1789	kr = task_resume_internal(task);
1790	task_suspension_token_deallocate(task);
1791
1792	return (kr);
1793}
1794
1795boolean_t
1796task_suspension_notify(mach_msg_header_t *request_header)
1797{
1798	ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
1799	task_t task = convert_port_to_task_suspension_token(port);
1800	mach_msg_type_number_t not_count;
1801
1802	if (task == TASK_NULL || task == kernel_task)
1803		return TRUE;  /* nothing to do */
1804
1805	switch (request_header->msgh_id) {
1806
1807	case MACH_NOTIFY_SEND_ONCE:
1808		/* release the hold held by this specific send-once right */
1809		task_lock(task);
1810		release_task_hold(task, TASK_HOLD_NORMAL);
1811		task_unlock(task);
1812		break;
1813
1814	case MACH_NOTIFY_NO_SENDERS:
1815		not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
1816
1817		task_lock(task);
1818		ip_lock(port);
1819		if (port->ip_mscount == not_count) {
1820
1821			/* release all the [remaining] outstanding legacy holds */
1822			assert(port->ip_nsrequest == IP_NULL);
1823			ip_unlock(port);
1824			release_task_hold(task, TASK_HOLD_LEGACY_ALL);
1825			task_unlock(task);
1826
1827		} else if (port->ip_nsrequest == IP_NULL) {
1828			ipc_port_t old_notify;
1829
1830			task_unlock(task);
1831			/* new send rights, re-arm notification at current make-send count */
1832			ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
1833			assert(old_notify == IP_NULL);
1834			/* port unlocked */
1835		} else {
1836			ip_unlock(port);
1837			task_unlock(task);
1838		}
1839		break;
1840
1841	default:
1842		break;
1843	}
1844
1845	task_suspension_token_deallocate(task); /* drop token reference */
1846	return TRUE;
1847}
1848
1849kern_return_t
1850task_pidsuspend_locked(task_t task)
1851{
1852	kern_return_t kr;
1853
1854	if (task->pidsuspended) {
1855		kr = KERN_FAILURE;
1856		goto out;
1857	}
1858
1859	task->pidsuspended = TRUE;
1860
1861	kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
1862	if (kr != KERN_SUCCESS) {
1863		task->pidsuspended = FALSE;
1864	}
1865out:
1866	return(kr);
1867}
1868
1869
1870/*
1871 *	task_pidsuspend:
1872 *
1873 *	Suspends a task by placing a hold on its threads.
1874 *
1875 * Conditions:
1876 * 	The caller holds a reference to the task
1877 */
1878kern_return_t
1879task_pidsuspend(
1880	register task_t		task)
1881{
1882	kern_return_t	 kr;
1883
1884	if (task == TASK_NULL || task == kernel_task)
1885		return (KERN_INVALID_ARGUMENT);
1886
1887	task_lock(task);
1888
1889	kr = task_pidsuspend_locked(task);
1890
1891	task_unlock(task);
1892
1893	return (kr);
1894}
1895
1896/* If enabled, we bring all the frozen pages back in prior to resumption; otherwise, they're faulted back in on demand */
1897#define THAW_ON_RESUME 1
1898
1899/*
1900 *	task_pidresume:
1901 *		Resumes a previously suspended task.
1902 *
1903 * Conditions:
1904 *		The caller holds a reference to the task
1905 */
1906kern_return_t
1907task_pidresume(
1908	register task_t	task)
1909{
1910	kern_return_t	 kr;
1911
1912	if (task == TASK_NULL || task == kernel_task)
1913		return (KERN_INVALID_ARGUMENT);
1914
1915	task_lock(task);
1916
1917#if (CONFIG_FREEZE && THAW_ON_RESUME)
1918
1919	while (task->changing_freeze_state) {
1920
1921		assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
1922		task_unlock(task);
1923		thread_block(THREAD_CONTINUE_NULL);
1924
1925		task_lock(task);
1926	}
1927	task->changing_freeze_state = TRUE;
1928#endif
1929
1930	kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
1931
1932	task_unlock(task);
1933
1934#if (CONFIG_FREEZE && THAW_ON_RESUME)
1935	if ((kr == KERN_SUCCESS) && (task->frozen == TRUE)) {
1936
1937		if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
1938
1939			kr = KERN_SUCCESS;
1940		} else {
1941
1942			kr = vm_map_thaw(task->map);
1943		}
1944	}
1945	task_lock(task);
1946
1947	if (kr == KERN_SUCCESS)
1948		task->frozen = FALSE;
1949	task->changing_freeze_state = FALSE;
1950	thread_wakeup(&task->changing_freeze_state);
1951
1952	task_unlock(task);
1953#endif
1954
1955	return (kr);
1956}
1957
1958#if CONFIG_FREEZE
1959
1960/*
1961 *	task_freeze:
1962 *
1963 *	Freeze a task.
1964 *
1965 * Conditions:
1966 * 	The caller holds a reference to the task
1967 */
1968kern_return_t
1969task_freeze(
1970	register task_t    task,
1971	uint32_t           *purgeable_count,
1972	uint32_t           *wired_count,
1973	uint32_t           *clean_count,
1974	uint32_t           *dirty_count,
1975	uint32_t           dirty_budget,
1976	boolean_t          *shared,
1977	boolean_t          walk_only)
1978{
1979	kern_return_t kr;
1980
1981	if (task == TASK_NULL || task == kernel_task)
1982		return (KERN_INVALID_ARGUMENT);
1983
1984	task_lock(task);
1985
1986	while (task->changing_freeze_state) {
1987
1988		assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
1989		task_unlock(task);
1990		thread_block(THREAD_CONTINUE_NULL);
1991
1992		task_lock(task);
1993	}
1994	if (task->frozen) {
1995		task_unlock(task);
1996		return (KERN_FAILURE);
1997	}
1998	task->changing_freeze_state = TRUE;
1999
2000	task_unlock(task);
2001
2002	if (walk_only) {
2003		kr = vm_map_freeze_walk(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2004	} else {
2005		kr = vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared);
2006	}
2007
2008	task_lock(task);
2009
2010	if (walk_only == FALSE && kr == KERN_SUCCESS)
2011		task->frozen = TRUE;
2012	task->changing_freeze_state = FALSE;
2013	thread_wakeup(&task->changing_freeze_state);
2014
2015	task_unlock(task);
2016
2017	return (kr);
2018}
2019
2020/*
2021 *	task_thaw:
2022 *
2023 *	Thaw a currently frozen task.
2024 *
2025 * Conditions:
2026 * 	The caller holds a reference to the task
2027 */
2028extern void
2029vm_consider_waking_compactor_swapper(void);
2030
2031kern_return_t
2032task_thaw(
2033	register task_t		task)
2034{
2035	kern_return_t kr;
2036
2037	if (task == TASK_NULL || task == kernel_task)
2038		return (KERN_INVALID_ARGUMENT);
2039
2040	task_lock(task);
2041
2042	while (task->changing_freeze_state) {
2043
2044		assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
2045		task_unlock(task);
2046		thread_block(THREAD_CONTINUE_NULL);
2047
2048		task_lock(task);
2049	}
2050	if (!task->frozen) {
2051		task_unlock(task);
2052		return (KERN_FAILURE);
2053	}
2054	task->changing_freeze_state = TRUE;
2055
2056	if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) {
2057		task_unlock(task);
2058
2059		kr = vm_map_thaw(task->map);
2060
2061		task_lock(task);
2062
2063		if (kr == KERN_SUCCESS)
2064			task->frozen = FALSE;
2065	} else {
2066		task->frozen = FALSE;
2067		kr = KERN_SUCCESS;
2068	}
2069
2070	task->changing_freeze_state = FALSE;
2071	thread_wakeup(&task->changing_freeze_state);
2072
2073	task_unlock(task);
2074
2075	if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2076		vm_consider_waking_compactor_swapper();
2077	}
2078
2079	return (kr);
2080}
2081
2082#endif /* CONFIG_FREEZE */
2083
2084kern_return_t
2085host_security_set_task_token(
2086        host_security_t  host_security,
2087        task_t		 task,
2088        security_token_t sec_token,
2089	audit_token_t	 audit_token,
2090	host_priv_t	 host_priv)
2091{
2092	ipc_port_t	 host_port;
2093	kern_return_t	 kr;
2094
2095	if (task == TASK_NULL)
2096		return(KERN_INVALID_ARGUMENT);
2097
2098	if (host_security == HOST_NULL)
2099		return(KERN_INVALID_SECURITY);
2100
2101        task_lock(task);
2102        task->sec_token = sec_token;
2103	task->audit_token = audit_token;
2104
2105	task_unlock(task);
2106
2107	if (host_priv != HOST_PRIV_NULL) {
2108		kr = host_get_host_priv_port(host_priv, &host_port);
2109	} else {
2110		kr = host_get_host_port(host_priv_self(), &host_port);
2111	}
2112	assert(kr == KERN_SUCCESS);
2113	kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
2114        return(kr);
2115}
2116
2117/*
2118 * This routine was added, pretty much exclusively, for registering the
2119 * RPC glue vector for in-kernel short circuited tasks.  Rather than
2120 * removing it completely, I have only disabled that feature (which was
2121 * the only feature at the time).  It just appears that we are going to
2122 * want to add some user data to tasks in the future (i.e. bsd info,
2123 * task names, etc...), so I left it in the formal task interface.
2124 */
2125kern_return_t
2126task_set_info(
2127	task_t		task,
2128	task_flavor_t	flavor,
2129	__unused task_info_t	task_info_in,		/* pointer to IN array */
2130	__unused mach_msg_type_number_t	task_info_count)
2131{
2132	if (task == TASK_NULL)
2133		return(KERN_INVALID_ARGUMENT);
2134
2135	switch (flavor) {
2136	    default:
2137		return (KERN_INVALID_ARGUMENT);
2138	}
2139	return (KERN_SUCCESS);
2140}
2141
2142kern_return_t
2143task_info(
2144	task_t			task,
2145	task_flavor_t		flavor,
2146	task_info_t		task_info_out,
2147	mach_msg_type_number_t	*task_info_count)
2148{
2149	kern_return_t error = KERN_SUCCESS;
2150
2151	if (task == TASK_NULL)
2152		return (KERN_INVALID_ARGUMENT);
2153
2154	task_lock(task);
2155
2156	if ((task != current_task()) && (!task->active)) {
2157		task_unlock(task);
2158		return (KERN_INVALID_ARGUMENT);
2159	}
2160
2161	switch (flavor) {
2162
2163	case TASK_BASIC_INFO_32:
2164	case TASK_BASIC2_INFO_32:
2165	{
2166		task_basic_info_32_t	basic_info;
2167		vm_map_t				map;
2168		clock_sec_t				secs;
2169		clock_usec_t			usecs;
2170
2171		if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
2172		    error = KERN_INVALID_ARGUMENT;
2173		    break;
2174		}
2175
2176		basic_info = (task_basic_info_32_t)task_info_out;
2177
2178		map = (task == kernel_task)? kernel_map: task->map;
2179		basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
2180		if (flavor == TASK_BASIC2_INFO_32) {
2181			/*
2182			 * The "BASIC2" flavor gets the maximum resident
2183			 * size instead of the current resident size...
2184			 */
2185			basic_info->resident_size = pmap_resident_max(map->pmap);
2186		} else {
2187			basic_info->resident_size = pmap_resident_count(map->pmap);
2188		}
2189		basic_info->resident_size *= PAGE_SIZE;
2190
2191		basic_info->policy = ((task != kernel_task)?
2192										  POLICY_TIMESHARE: POLICY_RR);
2193		basic_info->suspend_count = task->user_stop_count;
2194
2195		absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2196		basic_info->user_time.seconds =
2197			(typeof(basic_info->user_time.seconds))secs;
2198		basic_info->user_time.microseconds = usecs;
2199
2200		absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2201		basic_info->system_time.seconds =
2202			(typeof(basic_info->system_time.seconds))secs;
2203		basic_info->system_time.microseconds = usecs;
2204
2205		*task_info_count = TASK_BASIC_INFO_32_COUNT;
2206		break;
2207	}
2208
2209	case TASK_BASIC_INFO_64:
2210	{
2211		task_basic_info_64_t	basic_info;
2212		vm_map_t				map;
2213		clock_sec_t				secs;
2214		clock_usec_t			usecs;
2215
2216		if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
2217		    error = KERN_INVALID_ARGUMENT;
2218		    break;
2219		}
2220
2221		basic_info = (task_basic_info_64_t)task_info_out;
2222
2223		map = (task == kernel_task)? kernel_map: task->map;
2224		basic_info->virtual_size  = map->size;
2225		basic_info->resident_size =
2226			(mach_vm_size_t)(pmap_resident_count(map->pmap))
2227			* PAGE_SIZE_64;
2228
2229		basic_info->policy = ((task != kernel_task)?
2230										  POLICY_TIMESHARE: POLICY_RR);
2231		basic_info->suspend_count = task->user_stop_count;
2232
2233		absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2234		basic_info->user_time.seconds =
2235			(typeof(basic_info->user_time.seconds))secs;
2236		basic_info->user_time.microseconds = usecs;
2237
2238		absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2239		basic_info->system_time.seconds =
2240			(typeof(basic_info->system_time.seconds))secs;
2241		basic_info->system_time.microseconds = usecs;
2242
2243		*task_info_count = TASK_BASIC_INFO_64_COUNT;
2244		break;
2245	}
2246
2247	case MACH_TASK_BASIC_INFO:
2248	{
2249		mach_task_basic_info_t  basic_info;
2250		vm_map_t                map;
2251		clock_sec_t             secs;
2252		clock_usec_t            usecs;
2253
2254		if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
2255		    error = KERN_INVALID_ARGUMENT;
2256		    break;
2257		}
2258
2259		basic_info = (mach_task_basic_info_t)task_info_out;
2260
2261		map = (task == kernel_task) ? kernel_map : task->map;
2262
2263		basic_info->virtual_size  = map->size;
2264
2265		basic_info->resident_size =
2266		    (mach_vm_size_t)(pmap_resident_count(map->pmap));
2267		basic_info->resident_size *= PAGE_SIZE_64;
2268
2269		basic_info->resident_size_max =
2270		    (mach_vm_size_t)(pmap_resident_max(map->pmap));
2271		basic_info->resident_size_max *= PAGE_SIZE_64;
2272
2273		basic_info->policy = ((task != kernel_task) ?
2274		                      POLICY_TIMESHARE : POLICY_RR);
2275
2276		basic_info->suspend_count = task->user_stop_count;
2277
2278		absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
2279		basic_info->user_time.seconds =
2280		    (typeof(basic_info->user_time.seconds))secs;
2281		basic_info->user_time.microseconds = usecs;
2282
2283		absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
2284		basic_info->system_time.seconds =
2285		    (typeof(basic_info->system_time.seconds))secs;
2286		basic_info->system_time.microseconds = usecs;
2287
2288		*task_info_count = MACH_TASK_BASIC_INFO_COUNT;
2289		break;
2290	}
2291
2292	case TASK_THREAD_TIMES_INFO:
2293	{
2294		register task_thread_times_info_t	times_info;
2295		register thread_t					thread;
2296
2297		if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
2298		    error = KERN_INVALID_ARGUMENT;
2299		    break;
2300		}
2301
2302		times_info = (task_thread_times_info_t) task_info_out;
2303		times_info->user_time.seconds = 0;
2304		times_info->user_time.microseconds = 0;
2305		times_info->system_time.seconds = 0;
2306		times_info->system_time.microseconds = 0;
2307
2308
2309		queue_iterate(&task->threads, thread, thread_t, task_threads) {
2310			time_value_t	user_time, system_time;
2311
2312			if (thread->options & TH_OPT_IDLE_THREAD)
2313				continue;
2314
2315			thread_read_times(thread, &user_time, &system_time);
2316
2317			time_value_add(&times_info->user_time, &user_time);
2318			time_value_add(&times_info->system_time, &system_time);
2319		}
2320
2321		*task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
2322		break;
2323	}
2324
2325	case TASK_ABSOLUTETIME_INFO:
2326	{
2327		task_absolutetime_info_t	info;
2328		register thread_t			thread;
2329
2330		if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
2331			error = KERN_INVALID_ARGUMENT;
2332			break;
2333		}
2334
2335		info = (task_absolutetime_info_t)task_info_out;
2336		info->threads_user = info->threads_system = 0;
2337
2338
2339		info->total_user = task->total_user_time;
2340		info->total_system = task->total_system_time;
2341
2342		queue_iterate(&task->threads, thread, thread_t, task_threads) {
2343			uint64_t	tval;
2344			spl_t 		x;
2345
2346			if (thread->options & TH_OPT_IDLE_THREAD)
2347				continue;
2348
2349			x = splsched();
2350			thread_lock(thread);
2351
2352			tval = timer_grab(&thread->user_timer);
2353			info->threads_user += tval;
2354			info->total_user += tval;
2355
2356			tval = timer_grab(&thread->system_timer);
2357			if (thread->precise_user_kernel_time) {
2358				info->threads_system += tval;
2359				info->total_system += tval;
2360			} else {
2361				/* system_timer may represent either sys or user */
2362				info->threads_user += tval;
2363				info->total_user += tval;
2364			}
2365
2366			thread_unlock(thread);
2367			splx(x);
2368		}
2369
2370
2371		*task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
2372		break;
2373	}
2374
2375	case TASK_DYLD_INFO:
2376	{
2377		task_dyld_info_t info;
2378
2379		/*
2380		 * We added the format field to TASK_DYLD_INFO output.  For
2381		 * temporary backward compatibility, accept the fact that
2382		 * clients may ask for the old version - distinquished by the
2383		 * size of the expected result structure.
2384		 */
2385#define TASK_LEGACY_DYLD_INFO_COUNT \
2386		offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
2387
2388		if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
2389			error = KERN_INVALID_ARGUMENT;
2390			break;
2391		}
2392
2393		info = (task_dyld_info_t)task_info_out;
2394		info->all_image_info_addr = task->all_image_info_addr;
2395		info->all_image_info_size = task->all_image_info_size;
2396
2397		/* only set format on output for those expecting it */
2398		if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
2399			info->all_image_info_format = task_has_64BitAddr(task) ?
2400				                 TASK_DYLD_ALL_IMAGE_INFO_64 :
2401				                 TASK_DYLD_ALL_IMAGE_INFO_32 ;
2402			*task_info_count = TASK_DYLD_INFO_COUNT;
2403		} else {
2404			*task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
2405		}
2406		break;
2407	}
2408
2409	case TASK_EXTMOD_INFO:
2410	{
2411		task_extmod_info_t info;
2412		void *p;
2413
2414		if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
2415			error = KERN_INVALID_ARGUMENT;
2416			break;
2417		}
2418
2419		info = (task_extmod_info_t)task_info_out;
2420
2421		p = get_bsdtask_info(task);
2422		if (p) {
2423			proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
2424		} else {
2425			bzero(info->task_uuid, sizeof(info->task_uuid));
2426		}
2427		info->extmod_statistics = task->extmod_statistics;
2428		*task_info_count = TASK_EXTMOD_INFO_COUNT;
2429
2430		break;
2431	}
2432
2433	case TASK_KERNELMEMORY_INFO:
2434	{
2435		task_kernelmemory_info_t	tkm_info;
2436		ledger_amount_t			credit, debit;
2437
2438		if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
2439		   error = KERN_INVALID_ARGUMENT;
2440		   break;
2441		}
2442
2443		tkm_info = (task_kernelmemory_info_t) task_info_out;
2444		tkm_info->total_palloc = 0;
2445		tkm_info->total_pfree = 0;
2446		tkm_info->total_salloc = 0;
2447		tkm_info->total_sfree = 0;
2448
2449		if (task == kernel_task) {
2450			/*
2451			 * All shared allocs/frees from other tasks count against
2452			 * the kernel private memory usage.  If we are looking up
2453			 * info for the kernel task, gather from everywhere.
2454			 */
2455			task_unlock(task);
2456
2457			/* start by accounting for all the terminated tasks against the kernel */
2458			tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
2459			tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
2460
2461			/* count all other task/thread shared alloc/free against the kernel */
2462			lck_mtx_lock(&tasks_threads_lock);
2463
2464			/* XXX this really shouldn't be using the function parameter 'task' as a local var! */
2465			queue_iterate(&tasks, task, task_t, tasks) {
2466				if (task == kernel_task) {
2467					if (ledger_get_entries(task->ledger,
2468					    task_ledgers.tkm_private, &credit,
2469					    &debit) == KERN_SUCCESS) {
2470						tkm_info->total_palloc += credit;
2471						tkm_info->total_pfree += debit;
2472					}
2473				}
2474				if (!ledger_get_entries(task->ledger,
2475				    task_ledgers.tkm_shared, &credit, &debit)) {
2476					tkm_info->total_palloc += credit;
2477					tkm_info->total_pfree += debit;
2478				}
2479			}
2480			lck_mtx_unlock(&tasks_threads_lock);
2481		} else {
2482			if (!ledger_get_entries(task->ledger,
2483			    task_ledgers.tkm_private, &credit, &debit)) {
2484				tkm_info->total_palloc = credit;
2485				tkm_info->total_pfree = debit;
2486			}
2487			if (!ledger_get_entries(task->ledger,
2488			    task_ledgers.tkm_shared, &credit, &debit)) {
2489				tkm_info->total_salloc = credit;
2490				tkm_info->total_sfree = debit;
2491			}
2492			task_unlock(task);
2493		}
2494
2495		*task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
2496		return KERN_SUCCESS;
2497	}
2498
2499	/* OBSOLETE */
2500	case TASK_SCHED_FIFO_INFO:
2501	{
2502
2503		if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
2504			error = KERN_INVALID_ARGUMENT;
2505			break;
2506		}
2507
2508		error = KERN_INVALID_POLICY;
2509		break;
2510	}
2511
2512	/* OBSOLETE */
2513	case TASK_SCHED_RR_INFO:
2514	{
2515		register policy_rr_base_t	rr_base;
2516		uint32_t quantum_time;
2517		uint64_t quantum_ns;
2518
2519		if (*task_info_count < POLICY_RR_BASE_COUNT) {
2520			error = KERN_INVALID_ARGUMENT;
2521			break;
2522		}
2523
2524		rr_base = (policy_rr_base_t) task_info_out;
2525
2526		if (task != kernel_task) {
2527			error = KERN_INVALID_POLICY;
2528			break;
2529		}
2530
2531		rr_base->base_priority = task->priority;
2532
2533		quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
2534		absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
2535
2536		rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
2537
2538		*task_info_count = POLICY_RR_BASE_COUNT;
2539		break;
2540	}
2541
2542	/* OBSOLETE */
2543	case TASK_SCHED_TIMESHARE_INFO:
2544	{
2545		register policy_timeshare_base_t	ts_base;
2546
2547		if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
2548			error = KERN_INVALID_ARGUMENT;
2549			break;
2550		}
2551
2552		ts_base = (policy_timeshare_base_t) task_info_out;
2553
2554		if (task == kernel_task) {
2555			error = KERN_INVALID_POLICY;
2556			break;
2557		}
2558
2559		ts_base->base_priority = task->priority;
2560
2561		*task_info_count = POLICY_TIMESHARE_BASE_COUNT;
2562		break;
2563	}
2564
2565	case TASK_SECURITY_TOKEN:
2566	{
2567		register security_token_t	*sec_token_p;
2568
2569		if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
2570		    error = KERN_INVALID_ARGUMENT;
2571		    break;
2572		}
2573
2574		sec_token_p = (security_token_t *) task_info_out;
2575
2576		*sec_token_p = task->sec_token;
2577
2578		*task_info_count = TASK_SECURITY_TOKEN_COUNT;
2579		break;
2580	}
2581
2582	case TASK_AUDIT_TOKEN:
2583	{
2584		register audit_token_t	*audit_token_p;
2585
2586		if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
2587		    error = KERN_INVALID_ARGUMENT;
2588		    break;
2589		}
2590
2591		audit_token_p = (audit_token_t *) task_info_out;
2592
2593		*audit_token_p = task->audit_token;
2594
2595		*task_info_count = TASK_AUDIT_TOKEN_COUNT;
2596		break;
2597	}
2598
2599	case TASK_SCHED_INFO:
2600		error = KERN_INVALID_ARGUMENT;
2601		break;
2602
2603	case TASK_EVENTS_INFO:
2604	{
2605		register task_events_info_t	events_info;
2606		register thread_t			thread;
2607
2608		if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
2609		   error = KERN_INVALID_ARGUMENT;
2610		   break;
2611		}
2612
2613		events_info = (task_events_info_t) task_info_out;
2614
2615
2616		events_info->faults = task->faults;
2617		events_info->pageins = task->pageins;
2618		events_info->cow_faults = task->cow_faults;
2619		events_info->messages_sent = task->messages_sent;
2620		events_info->messages_received = task->messages_received;
2621		events_info->syscalls_mach = task->syscalls_mach;
2622		events_info->syscalls_unix = task->syscalls_unix;
2623
2624		events_info->csw = task->c_switch;
2625
2626		queue_iterate(&task->threads, thread, thread_t, task_threads) {
2627			events_info->csw	   += thread->c_switch;
2628			events_info->syscalls_mach += thread->syscalls_mach;
2629			events_info->syscalls_unix += thread->syscalls_unix;
2630		}
2631
2632
2633		*task_info_count = TASK_EVENTS_INFO_COUNT;
2634		break;
2635	}
2636	case TASK_AFFINITY_TAG_INFO:
2637	{
2638		if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
2639		    error = KERN_INVALID_ARGUMENT;
2640		    break;
2641		}
2642
2643		error = task_affinity_info(task, task_info_out, task_info_count);
2644		break;
2645	}
2646	case TASK_POWER_INFO:
2647	{
2648		if (*task_info_count < TASK_POWER_INFO_COUNT) {
2649			error = KERN_INVALID_ARGUMENT;
2650			break;
2651		}
2652
2653		task_power_info_locked(task, (task_power_info_t)task_info_out);
2654		break;
2655	}
2656
2657	case TASK_VM_INFO:
2658	case TASK_VM_INFO_PURGEABLE:
2659	{
2660		task_vm_info_t		vm_info;
2661		vm_map_t		map;
2662
2663		if (*task_info_count < TASK_VM_INFO_COUNT) {
2664		    error = KERN_INVALID_ARGUMENT;
2665		    break;
2666		}
2667
2668		vm_info = (task_vm_info_t)task_info_out;
2669
2670		if (task == kernel_task) {
2671			map = kernel_map;
2672			/* no lock */
2673		} else {
2674			map = task->map;
2675			vm_map_lock_read(map);
2676		}
2677
2678		vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
2679		vm_info->region_count = map->hdr.nentries;
2680		vm_info->page_size = vm_map_page_size(map);
2681
2682		vm_info->resident_size = pmap_resident_count(map->pmap);
2683		vm_info->resident_size *= PAGE_SIZE;
2684		vm_info->resident_size_peak = pmap_resident_max(map->pmap);
2685		vm_info->resident_size_peak *= PAGE_SIZE;
2686
2687#define _VM_INFO(_name) \
2688	vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
2689
2690		_VM_INFO(device);
2691		_VM_INFO(device_peak);
2692		_VM_INFO(external);
2693		_VM_INFO(external_peak);
2694		_VM_INFO(internal);
2695		_VM_INFO(internal_peak);
2696		_VM_INFO(reusable);
2697		_VM_INFO(reusable_peak);
2698		_VM_INFO(compressed);
2699		_VM_INFO(compressed_peak);
2700		_VM_INFO(compressed_lifetime);
2701
2702		vm_info->purgeable_volatile_pmap = 0;
2703		vm_info->purgeable_volatile_resident = 0;
2704		vm_info->purgeable_volatile_virtual = 0;
2705		if (task == kernel_task) {
2706			/*
2707			 * We do not maintain the detailed stats for the
2708			 * kernel_pmap, so just count everything as
2709			 * "internal"...
2710			 */
2711			vm_info->internal = vm_info->resident_size;
2712			/*
2713			 * ... but since the memory held by the VM compressor
2714			 * in the kernel address space ought to be attributed
2715			 * to user-space tasks, we subtract it from "internal"
2716			 * to give memory reporting tools a more accurate idea
2717			 * of what the kernel itself is actually using, instead
2718			 * of making it look like the kernel is leaking memory
2719			 * when the system is under memory pressure.
2720			 */
2721			vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
2722					      PAGE_SIZE);
2723		} else {
2724			mach_vm_size_t	volatile_virtual_size;
2725			mach_vm_size_t	volatile_resident_size;
2726			mach_vm_size_t	volatile_pmap_size;
2727			kern_return_t	kr;
2728
2729			if (flavor == TASK_VM_INFO_PURGEABLE) {
2730				kr = vm_map_query_volatile(
2731					map,
2732					&volatile_virtual_size,
2733					&volatile_resident_size,
2734					&volatile_pmap_size);
2735				if (kr == KERN_SUCCESS) {
2736					vm_info->purgeable_volatile_pmap =
2737						volatile_pmap_size;
2738					vm_info->purgeable_volatile_resident =
2739						volatile_resident_size;
2740					vm_info->purgeable_volatile_virtual =
2741						volatile_virtual_size;
2742				}
2743			}
2744			vm_map_unlock_read(map);
2745		}
2746
2747		*task_info_count = TASK_VM_INFO_COUNT;
2748		break;
2749	}
2750
2751	default:
2752		error = KERN_INVALID_ARGUMENT;
2753	}
2754
2755	task_unlock(task);
2756	return (error);
2757}
2758
2759/*
2760 *	task_power_info
2761 *
2762 *	Returns power stats for the task.
2763 *	Note: Called with task locked.
2764 */
2765void
2766task_power_info_locked(
2767	task_t			task,
2768	task_power_info_t	info)
2769{
2770	thread_t		thread;
2771	ledger_amount_t		tmp;
2772
2773	task_lock_assert_owned(task);
2774
2775	ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
2776		(ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
2777	ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
2778		(ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
2779
2780	info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
2781	info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
2782
2783	info->total_user = task->total_user_time;
2784	info->total_system = task->total_system_time;
2785
2786	queue_iterate(&task->threads, thread, thread_t, task_threads) {
2787		uint64_t	tval;
2788		spl_t 		x;
2789
2790		if (thread->options & TH_OPT_IDLE_THREAD)
2791			continue;
2792
2793		x = splsched();
2794		thread_lock(thread);
2795
2796		info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
2797		info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
2798
2799		tval = timer_grab(&thread->user_timer);
2800		info->total_user += tval;
2801
2802		tval = timer_grab(&thread->system_timer);
2803		if (thread->precise_user_kernel_time) {
2804			info->total_system += tval;
2805		} else {
2806			/* system_timer may represent either sys or user */
2807			info->total_user += tval;
2808		}
2809
2810		thread_unlock(thread);
2811		splx(x);
2812	}
2813}
2814
2815kern_return_t
2816task_purgable_info(
2817	task_t			task,
2818	task_purgable_info_t	*stats)
2819{
2820	if (task == TASK_NULL || stats == NULL)
2821		return KERN_INVALID_ARGUMENT;
2822	/* Take task reference */
2823	task_reference(task);
2824	vm_purgeable_stats((vm_purgeable_info_t)stats, task);
2825	/* Drop task reference */
2826	task_deallocate(task);
2827	return KERN_SUCCESS;
2828}
2829
2830void
2831task_vtimer_set(
2832	task_t		task,
2833	integer_t	which)
2834{
2835	thread_t	thread;
2836	spl_t		x;
2837
2838	/* assert(task == current_task()); */ /* bogus assert 4803227 4807483 */
2839
2840	task_lock(task);
2841
2842	task->vtimers |= which;
2843
2844	switch (which) {
2845
2846	case TASK_VTIMER_USER:
2847		queue_iterate(&task->threads, thread, thread_t, task_threads) {
2848			x = splsched();
2849			thread_lock(thread);
2850			if (thread->precise_user_kernel_time)
2851				thread->vtimer_user_save = timer_grab(&thread->user_timer);
2852			else
2853				thread->vtimer_user_save = timer_grab(&thread->system_timer);
2854			thread_unlock(thread);
2855			splx(x);
2856		}
2857		break;
2858
2859	case TASK_VTIMER_PROF:
2860		queue_iterate(&task->threads, thread, thread_t, task_threads) {
2861			x = splsched();
2862			thread_lock(thread);
2863			thread->vtimer_prof_save = timer_grab(&thread->user_timer);
2864			thread->vtimer_prof_save += timer_grab(&thread->system_timer);
2865			thread_unlock(thread);
2866			splx(x);
2867		}
2868		break;
2869
2870	case TASK_VTIMER_RLIM:
2871		queue_iterate(&task->threads, thread, thread_t, task_threads) {
2872			x = splsched();
2873			thread_lock(thread);
2874			thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
2875			thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
2876			thread_unlock(thread);
2877			splx(x);
2878		}
2879		break;
2880	}
2881
2882	task_unlock(task);
2883}
2884
2885void
2886task_vtimer_clear(
2887	task_t		task,
2888	integer_t	which)
2889{
2890	assert(task == current_task());
2891
2892	task_lock(task);
2893
2894	task->vtimers &= ~which;
2895
2896	task_unlock(task);
2897}
2898
2899void
2900task_vtimer_update(
2901__unused
2902	task_t		task,
2903	integer_t	which,
2904	uint32_t	*microsecs)
2905{
2906	thread_t	thread = current_thread();
2907	uint32_t	tdelt;
2908	clock_sec_t	secs;
2909	uint64_t	tsum;
2910
2911	assert(task == current_task());
2912
2913	assert(task->vtimers & which);
2914
2915	secs = tdelt = 0;
2916
2917	switch (which) {
2918
2919	case TASK_VTIMER_USER:
2920		if (thread->precise_user_kernel_time) {
2921			tdelt = (uint32_t)timer_delta(&thread->user_timer,
2922								&thread->vtimer_user_save);
2923		} else {
2924			tdelt = (uint32_t)timer_delta(&thread->system_timer,
2925								&thread->vtimer_user_save);
2926		}
2927		absolutetime_to_microtime(tdelt, &secs, microsecs);
2928		break;
2929
2930	case TASK_VTIMER_PROF:
2931		tsum = timer_grab(&thread->user_timer);
2932		tsum += timer_grab(&thread->system_timer);
2933		tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
2934		absolutetime_to_microtime(tdelt, &secs, microsecs);
2935		/* if the time delta is smaller than a usec, ignore */
2936		if (*microsecs != 0)
2937			thread->vtimer_prof_save = tsum;
2938		break;
2939
2940	case TASK_VTIMER_RLIM:
2941		tsum = timer_grab(&thread->user_timer);
2942		tsum += timer_grab(&thread->system_timer);
2943		tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
2944		thread->vtimer_rlim_save = tsum;
2945		absolutetime_to_microtime(tdelt, &secs, microsecs);
2946		break;
2947	}
2948
2949}
2950
2951/*
2952 *	task_assign:
2953 *
2954 *	Change the assigned processor set for the task
2955 */
2956kern_return_t
2957task_assign(
2958	__unused task_t		task,
2959	__unused processor_set_t	new_pset,
2960	__unused boolean_t	assign_threads)
2961{
2962	return(KERN_FAILURE);
2963}
2964
2965/*
2966 *	task_assign_default:
2967 *
2968 *	Version of task_assign to assign to default processor set.
2969 */
2970kern_return_t
2971task_assign_default(
2972	task_t		task,
2973	boolean_t	assign_threads)
2974{
2975    return (task_assign(task, &pset0, assign_threads));
2976}
2977
2978/*
2979 *	task_get_assignment
2980 *
2981 *	Return name of processor set that task is assigned to.
2982 */
2983kern_return_t
2984task_get_assignment(
2985	task_t		task,
2986	processor_set_t	*pset)
2987{
2988	if (!task->active)
2989		return(KERN_FAILURE);
2990
2991	*pset = &pset0;
2992
2993	return (KERN_SUCCESS);
2994}
2995
2996
2997/*
2998 * 	task_policy
2999 *
3000 *	Set scheduling policy and parameters, both base and limit, for
3001 *	the given task. Policy must be a policy which is enabled for the
3002 *	processor set. Change contained threads if requested.
3003 */
3004kern_return_t
3005task_policy(
3006	__unused task_t			task,
3007	__unused policy_t			policy_id,
3008	__unused policy_base_t		base,
3009	__unused mach_msg_type_number_t	count,
3010	__unused boolean_t			set_limit,
3011	__unused boolean_t			change)
3012{
3013	return(KERN_FAILURE);
3014}
3015
3016/*
3017 *	task_set_policy
3018 *
3019 *	Set scheduling policy and parameters, both base and limit, for
3020 *	the given task. Policy can be any policy implemented by the
3021 *	processor set, whether enabled or not. Change contained threads
3022 *	if requested.
3023 */
3024kern_return_t
3025task_set_policy(
3026	__unused task_t			task,
3027	__unused processor_set_t		pset,
3028	__unused policy_t			policy_id,
3029	__unused policy_base_t		base,
3030	__unused mach_msg_type_number_t	base_count,
3031	__unused policy_limit_t		limit,
3032	__unused mach_msg_type_number_t	limit_count,
3033	__unused boolean_t			change)
3034{
3035	return(KERN_FAILURE);
3036}
3037
3038#if	FAST_TAS
3039kern_return_t
3040task_set_ras_pc(
3041 	task_t		task,
3042 	vm_offset_t	pc,
3043 	vm_offset_t	endpc)
3044{
3045	extern int fast_tas_debug;
3046
3047	if (fast_tas_debug) {
3048		printf("task 0x%x: setting fast_tas to [0x%x, 0x%x]\n",
3049		       task, pc, endpc);
3050	}
3051	task_lock(task);
3052	task->fast_tas_base = pc;
3053	task->fast_tas_end =  endpc;
3054	task_unlock(task);
3055	return KERN_SUCCESS;
3056}
3057#else	/* FAST_TAS */
3058kern_return_t
3059task_set_ras_pc(
3060 	__unused task_t	task,
3061 	__unused vm_offset_t	pc,
3062 	__unused vm_offset_t	endpc)
3063{
3064	return KERN_FAILURE;
3065}
3066#endif	/* FAST_TAS */
3067
3068void
3069task_synchronizer_destroy_all(task_t task)
3070{
3071	semaphore_t	semaphore;
3072
3073	/*
3074	 *  Destroy owned semaphores
3075	 */
3076
3077	while (!queue_empty(&task->semaphore_list)) {
3078		semaphore = (semaphore_t) queue_first(&task->semaphore_list);
3079		(void) semaphore_destroy(task, semaphore);
3080	}
3081}
3082
3083/*
3084 * Install default (machine-dependent) initial thread state
3085 * on the task.  Subsequent thread creation will have this initial
3086 * state set on the thread by machine_thread_inherit_taskwide().
3087 * Flavors and structures are exactly the same as those to thread_set_state()
3088 */
3089kern_return_t
3090task_set_state(
3091	task_t task,
3092	int flavor,
3093	thread_state_t state,
3094	mach_msg_type_number_t state_count)
3095{
3096	kern_return_t ret;
3097
3098	if (task == TASK_NULL) {
3099		return (KERN_INVALID_ARGUMENT);
3100	}
3101
3102	task_lock(task);
3103
3104	if (!task->active) {
3105		task_unlock(task);
3106		return (KERN_FAILURE);
3107	}
3108
3109	ret = machine_task_set_state(task, flavor, state, state_count);
3110
3111	task_unlock(task);
3112	return ret;
3113}
3114
3115/*
3116 * Examine the default (machine-dependent) initial thread state
3117 * on the task, as set by task_set_state().  Flavors and structures
3118 * are exactly the same as those passed to thread_get_state().
3119 */
3120kern_return_t
3121task_get_state(
3122	task_t 	task,
3123	int	flavor,
3124	thread_state_t state,
3125	mach_msg_type_number_t *state_count)
3126{
3127	kern_return_t ret;
3128
3129	if (task == TASK_NULL) {
3130		return (KERN_INVALID_ARGUMENT);
3131	}
3132
3133	task_lock(task);
3134
3135	if (!task->active) {
3136		task_unlock(task);
3137		return (KERN_FAILURE);
3138	}
3139
3140	ret = machine_task_get_state(task, flavor, state, state_count);
3141
3142	task_unlock(task);
3143	return ret;
3144}
3145
3146#if CONFIG_JETSAM
3147#define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
3148
3149void __attribute__((noinline))
3150THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb)
3151{
3152	task_t						task 		= current_task();
3153	int							pid         = 0;
3154	char        				*procname 	= (char *) "unknown";
3155	mach_exception_data_type_t	code[EXCEPTION_CODE_MAX];
3156
3157#ifdef MACH_BSD
3158	pid = proc_selfpid();
3159	if (task->bsd_info != NULL)
3160		procname = proc_name_address(current_task()->bsd_info);
3161#endif
3162
3163	if (hwm_user_cores) {
3164		int				error;
3165		uint64_t		starttime, end;
3166		clock_sec_t		secs = 0;
3167		uint32_t		microsecs = 0;
3168
3169		starttime = mach_absolute_time();
3170		/*
3171		 * Trigger a coredump of this process. Don't proceed unless we know we won't
3172		 * be filling up the disk; and ignore the core size resource limit for this
3173		 * core file.
3174		 */
3175		if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, 1)) != 0) {
3176			printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
3177		}
3178		/*
3179		* coredump() leaves the task suspended.
3180		*/
3181		task_resume_internal(current_task());
3182
3183		end = mach_absolute_time();
3184		absolutetime_to_microtime(end - starttime, &secs, &microsecs);
3185		printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
3186		       proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
3187	}
3188
3189	if (disable_exc_resource) {
3190		printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
3191			"supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
3192		return;
3193	}
3194
3195	printf("process %s[%d] crossed memory high watermark (%d MB); sending "
3196		"EXC_RESOURCE.\n", procname, pid, max_footprint_mb);
3197
3198	code[0] = code[1] = 0;
3199	EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
3200	EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
3201	EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
3202	exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
3203}
3204
3205/*
3206 * Callback invoked when a task exceeds its physical footprint limit.
3207 */
3208void
3209task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
3210{
3211	ledger_amount_t max_footprint_mb;
3212
3213	if (warning == LEDGER_WARNING_DIPPED_BELOW) {
3214		/*
3215		 * Task memory limits only provide a warning on the way up.
3216		 */
3217		return;
3218	}
3219
3220	ledger_get_limit(current_task()->ledger, task_ledgers.phys_footprint, &max_footprint_mb);
3221	max_footprint_mb >>= 20;
3222
3223	/*
3224	 * If this an actual violation (not a warning),
3225	 * generate a non-fatal high watermark EXC_RESOURCE.
3226	 */
3227	if ((warning == 0) && (current_task()->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION)) {
3228		THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE((int)max_footprint_mb);
3229	}
3230
3231	memorystatus_on_ledger_footprint_exceeded((warning == LEDGER_WARNING_ROSE_ABOVE) ? TRUE : FALSE,
3232		(int)max_footprint_mb);
3233}
3234
3235extern int proc_check_footprint_priv(void);
3236
3237kern_return_t
3238task_set_phys_footprint_limit(
3239	task_t task,
3240	int new_limit_mb,
3241	int *old_limit_mb)
3242{
3243	kern_return_t error;
3244
3245	if ((error = proc_check_footprint_priv())) {
3246		return (KERN_NO_ACCESS);
3247	}
3248
3249	return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, FALSE);
3250}
3251
3252kern_return_t
3253task_set_phys_footprint_limit_internal(
3254	task_t task,
3255	int new_limit_mb,
3256	int *old_limit_mb,
3257	boolean_t trigger_exception)
3258{
3259	ledger_amount_t	old;
3260
3261	ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
3262
3263	if (old_limit_mb) {
3264		*old_limit_mb = old >> 20;
3265	}
3266
3267	if (new_limit_mb == -1) {
3268		/*
3269		 * Caller wishes to remove the limit.
3270		 */
3271		ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
3272		                 max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
3273		                 max_task_footprint ? PHYS_FOOTPRINT_WARNING_LEVEL : 0);
3274		return (KERN_SUCCESS);
3275	}
3276
3277#ifdef CONFIG_NOMONITORS
3278	return (KERN_SUCCESS);
3279#endif /* CONFIG_NOMONITORS */
3280
3281	task_lock(task);
3282
3283	if (trigger_exception) {
3284		task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
3285	} else {
3286		task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION;
3287	}
3288
3289	ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
3290		(ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
3291
3292	task_unlock(task);
3293
3294	return (KERN_SUCCESS);
3295}
3296
3297kern_return_t
3298task_get_phys_footprint_limit(
3299	task_t task,
3300	int *limit_mb)
3301{
3302	ledger_amount_t	limit;
3303
3304	ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
3305	*limit_mb = limit >> 20;
3306
3307	return (KERN_SUCCESS);
3308}
3309#else /* CONFIG_JETSAM */
3310kern_return_t
3311task_set_phys_footprint_limit(
3312	__unused task_t task,
3313	__unused int new_limit_mb,
3314	__unused int *old_limit_mb)
3315{
3316	return (KERN_FAILURE);
3317}
3318
3319kern_return_t
3320task_get_phys_footprint_limit(
3321	__unused task_t task,
3322	__unused int *limit_mb)
3323{
3324	return (KERN_FAILURE);
3325}
3326#endif /* CONFIG_JETSAM */
3327
3328/*
3329 * We need to export some functions to other components that
3330 * are currently implemented in macros within the osfmk
3331 * component.  Just export them as functions of the same name.
3332 */
3333boolean_t is_kerneltask(task_t t)
3334{
3335	if (t == kernel_task)
3336		return (TRUE);
3337
3338	return (FALSE);
3339}
3340
3341int
3342check_for_tasksuspend(task_t task)
3343{
3344
3345	if (task == TASK_NULL)
3346		return (0);
3347
3348	return (task->suspend_count > 0);
3349}
3350
3351#undef current_task
3352task_t current_task(void);
3353task_t current_task(void)
3354{
3355	return (current_task_fast());
3356}
3357
3358#undef task_reference
3359void task_reference(task_t task);
3360void
3361task_reference(
3362	task_t		task)
3363{
3364	if (task != TASK_NULL)
3365		task_reference_internal(task);
3366}
3367
3368/*
3369 * This routine is called always with task lock held.
3370 * And it returns a thread handle without reference as the caller
3371 * operates on it under the task lock held.
3372 */
3373thread_t
3374task_findtid(task_t task, uint64_t tid)
3375{
3376	thread_t thread= THREAD_NULL;
3377
3378	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3379			if (thread->thread_id == tid)
3380				return(thread);
3381	}
3382	return(THREAD_NULL);
3383}
3384
3385
3386#if CONFIG_MACF_MACH
3387/*
3388 * Protect 2 task labels against modification by adding a reference on
3389 * both label handles. The locks do not actually have to be held while
3390 * using the labels as only labels with one reference can be modified
3391 * in place.
3392 */
3393
3394void
3395tasklabel_lock2(
3396	task_t a,
3397	task_t b)
3398{
3399	labelh_reference(a->label);
3400	labelh_reference(b->label);
3401}
3402
3403void
3404tasklabel_unlock2(
3405	task_t a,
3406	task_t b)
3407{
3408	labelh_release(a->label);
3409	labelh_release(b->label);
3410}
3411
3412void
3413mac_task_label_update_internal(
3414	struct label	*pl,
3415	struct task	*task)
3416{
3417
3418	tasklabel_lock(task);
3419	task->label = labelh_modify(task->label);
3420	mac_task_label_update(pl, &task->maclabel);
3421	tasklabel_unlock(task);
3422	ip_lock(task->itk_self);
3423	mac_port_label_update_cred(pl, &task->itk_self->ip_label);
3424	ip_unlock(task->itk_self);
3425}
3426
3427void
3428mac_task_label_modify(
3429	struct task	*task,
3430	void		*arg,
3431	void (*f)	(struct label *l, void *arg))
3432{
3433
3434	tasklabel_lock(task);
3435	task->label = labelh_modify(task->label);
3436	(*f)(&task->maclabel, arg);
3437	tasklabel_unlock(task);
3438}
3439
3440struct label *
3441mac_task_get_label(struct task *task)
3442{
3443	return (&task->maclabel);
3444}
3445#endif
3446
3447/*
3448 * Control the CPU usage monitor for a task.
3449 */
3450kern_return_t
3451task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
3452{
3453	int error = KERN_SUCCESS;
3454
3455	if (*flags & CPUMON_MAKE_FATAL) {
3456		task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
3457	} else {
3458		error = KERN_INVALID_ARGUMENT;
3459	}
3460
3461	return error;
3462}
3463
3464/*
3465 * Control the wakeups monitor for a task.
3466 */
3467kern_return_t
3468task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
3469{
3470	ledger_t ledger = task->ledger;
3471
3472	task_lock(task);
3473	if (*flags & WAKEMON_GET_PARAMS) {
3474		ledger_amount_t	limit;
3475		uint64_t		period;
3476
3477		ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
3478		ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
3479
3480		if (limit != LEDGER_LIMIT_INFINITY) {
3481			/*
3482			 * An active limit means the wakeups monitor is enabled.
3483			 */
3484			*rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
3485			*flags = WAKEMON_ENABLE;
3486			if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
3487				*flags |= WAKEMON_MAKE_FATAL;
3488			}
3489		} else {
3490			*flags = WAKEMON_DISABLE;
3491			*rate_hz = -1;
3492		}
3493
3494		/*
3495		 * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
3496		 */
3497 		task_unlock(task);
3498		return KERN_SUCCESS;
3499	}
3500
3501	if (*flags & WAKEMON_ENABLE) {
3502		if (*flags & WAKEMON_SET_DEFAULTS) {
3503			*rate_hz = task_wakeups_monitor_rate;
3504		}
3505
3506#ifndef CONFIG_NOMONITORS
3507		if (*flags & WAKEMON_MAKE_FATAL) {
3508			task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
3509		}
3510#endif /* CONFIG_NOMONITORS */
3511
3512		if (*rate_hz < 0) {
3513			task_unlock(task);
3514			return KERN_INVALID_ARGUMENT;
3515		}
3516
3517#ifndef CONFIG_NOMONITORS
3518		ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
3519			task_wakeups_monitor_ustackshots_trigger_pct);
3520		ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
3521		ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
3522#endif /* CONFIG_NOMONITORS */
3523	} else if (*flags & WAKEMON_DISABLE) {
3524		/*
3525		 * Caller wishes to disable wakeups monitor on the task.
3526		 *
3527		 * Disable telemetry if it was triggered by the wakeups monitor, and
3528		 * remove the limit & callback on the wakeups ledger entry.
3529		 */
3530#if CONFIG_TELEMETRY
3531		telemetry_task_ctl_locked(current_task(), TF_WAKEMON_WARNING, 0);
3532#endif
3533		ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
3534		ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
3535	}
3536
3537	task_unlock(task);
3538	return KERN_SUCCESS;
3539}
3540
3541void
3542task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
3543{
3544	if (warning == LEDGER_WARNING_ROSE_ABOVE) {
3545#if CONFIG_TELEMETRY
3546		/*
3547		 * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
3548		 * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
3549		 */
3550		telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
3551#endif
3552		return;
3553	}
3554
3555#if CONFIG_TELEMETRY
3556	/*
3557	 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
3558	 * exceeded the limit, turn telemetry off for the task.
3559	 */
3560	telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
3561#endif
3562
3563	if (warning == 0) {
3564		THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE();
3565	}
3566}
3567
3568void __attribute__((noinline))
3569THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void)
3570{
3571	task_t						task 		= current_task();
3572	int							pid         = 0;
3573	char        				*procname 	= (char *) "unknown";
3574	uint64_t					observed_wakeups_rate;
3575	uint64_t					permitted_wakeups_rate;
3576	uint64_t					observation_interval;
3577	mach_exception_data_type_t	code[EXCEPTION_CODE_MAX];
3578	struct ledger_entry_info	lei;
3579
3580#ifdef MACH_BSD
3581	pid = proc_selfpid();
3582	if (task->bsd_info != NULL)
3583		procname = proc_name_address(current_task()->bsd_info);
3584#endif
3585
3586	ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
3587
3588	/*
3589	 * Disable the exception notification so we don't overwhelm
3590	 * the listener with an endless stream of redundant exceptions.
3591	 */
3592	uint32_t flags = WAKEMON_DISABLE;
3593	task_wakeups_monitor_ctl(task, &flags, NULL);
3594
3595	observed_wakeups_rate = (lei.lei_balance * (int64_t)NSEC_PER_SEC) / lei.lei_last_refill;
3596	permitted_wakeups_rate = lei.lei_limit / task_wakeups_monitor_interval;
3597	observation_interval = lei.lei_refill_period / NSEC_PER_SEC;
3598
3599	if (disable_exc_resource) {
3600		printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
3601			"supressed by a boot-arg\n", procname, pid);
3602		return;
3603	}
3604	if (audio_active) {
3605		printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
3606		       "supressed due to audio playback\n", procname, pid);
3607		return;
3608	}
3609	printf("process %s[%d] caught causing excessive wakeups. Observed wakeups rate "
3610		"(per sec): %lld; Maximum permitted wakeups rate (per sec): %lld; Observation "
3611		"period: %lld seconds; Task lifetime number of wakeups: %lld\n",
3612		procname, pid, observed_wakeups_rate, permitted_wakeups_rate,
3613		observation_interval, lei.lei_credit);
3614
3615	code[0] = code[1] = 0;
3616	EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
3617	EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
3618	EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0], task_wakeups_monitor_rate);
3619	EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0], observation_interval);
3620	EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1], lei.lei_balance * (int64_t)NSEC_PER_SEC / lei.lei_last_refill);
3621	exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
3622
3623	if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
3624		task_terminate_internal(task);
3625	}
3626}
3627