1/*
2 * Copyright (c) 2013 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <kern/kern_types.h>
30#include <mach/mach_types.h>
31#include <mach/boolean.h>
32
33#include <kern/coalition.h>
34#include <kern/host.h>
35#include <kern/ledger.h>
36#include <kern/kalloc.h>
37#include <kern/mach_param.h> /* for TASK_CHUNK */
38#include <kern/task.h>
39#include <kern/zalloc.h>
40
41#include <libkern/OSAtomic.h>
42
43#include <mach/coalition_notification_server.h>
44#include <mach/host_priv.h>
45#include <mach/host_special_ports.h>
46
47#include <sys/errno.h>
48
49/* defined in task.c */
50extern ledger_template_t task_ledger_template;
51
52/*
53 * Coalition zone needs limits. We expect there will be as many coalitions as
54 * tasks (same order of magnitude), so use the task zone's limits.
55 * */
56#define CONFIG_COALITION_MAX CONFIG_TASK_MAX
57#define COALITION_CHUNK TASK_CHUNK
58
59int unrestrict_coalition_syscalls;
60
61lck_attr_t coalitions_lck_attr;
62lck_grp_t coalitions_lck_grp;
63lck_grp_attr_t coalitions_lck_grp_attr;
64
65/* coalitions_list_lock protects coalition_count, coalitions queue, next_coalition_id. */
66decl_lck_mtx_data(static,coalitions_list_lock);
67static uint64_t coalition_count;
68static uint64_t coalition_next_id = 1;
69static queue_head_t coalitions;
70
71coalition_t default_coalition;
72
73zone_t coalition_zone;
74
75struct coalition {
76	uint64_t id;			/* monotonically increasing */
77
78	ledger_t ledger;
79	uint64_t bytesread;
80	uint64_t byteswritten;
81	uint64_t gpu_time;
82
83	/*
84	 * Count the length of time this coalition had at least one active task.
85	 * This can be a 'denominator' to turn e.g. cpu_time to %cpu.
86	 * */
87	uint64_t last_became_nonempty_time;
88	uint64_t time_nonempty;
89
90	uint64_t task_count;		/* Count of tasks that have started in this coalition */
91	uint64_t dead_task_count;	/* Count of tasks that have exited in this coalition; subtract from task_count to get count of "active" */
92	queue_head_t tasks;		/* List of active tasks in the coalition */
93
94	queue_chain_t coalitions;	/* global list of coalitions */
95
96	decl_lck_mtx_data(,lock)	/* Coalition lock. */
97
98	uint32_t ref_count;		/* Number of references to the memory containing this struct */
99	uint32_t active_count;		/* Number of members of (tasks in) the coalition, plus vouchers referring to the coalition */
100
101	unsigned int privileged : 1;	/* Members of this coalition may create and manage coalitions and may posix_spawn processes into selected coalitions */
102
103	/* ast? */
104
105	/* voucher */
106
107	/* state of the coalition */
108	unsigned int termrequested : 1;		/* launchd has requested termination when coalition becomes empty */
109	unsigned int terminated : 1;		/* coalition became empty and spawns are now forbidden */
110	unsigned int reaped : 1;		/* reaped, invisible to userspace, but waiting for ref_count to go to zero */
111	unsigned int notified : 1;		/* no-more-processes notification was sent via special port */
112};
113
114#define coalition_lock(c) do{ lck_mtx_lock(&c->lock); }while(0)
115#define coalition_unlock(c) do{ lck_mtx_unlock(&c->lock); }while(0)
116
117static void
118coalition_notify_user(uint64_t id, uint32_t flags)
119{
120	mach_port_t user_port;
121	kern_return_t kr;
122
123	kr = host_get_coalition_port(host_priv_self(), &user_port);
124	if ((kr != KERN_SUCCESS) || !IPC_PORT_VALID(user_port)) {
125		return;
126	}
127
128	coalition_notification(user_port, id, flags);
129}
130
131/*
132 * coalition_find_by_id_internal
133 * Returns: Coalition object with specified id, NOT referenced.
134 *          If not found, returns COALITION_NULL.
135 * Condition: coalitions_list_lock must be LOCKED.
136 */
137static coalition_t
138coalition_find_by_id_internal(uint64_t coal_id)
139{
140	if (coal_id == 0) {
141		return COALITION_NULL;
142	}
143
144	lck_mtx_assert(&coalitions_list_lock, LCK_MTX_ASSERT_OWNED);
145	coalition_t coal;
146	queue_iterate(&coalitions, coal, coalition_t, coalitions) {
147		if (coal->id == coal_id) {
148			return coal;
149		}
150	}
151	return COALITION_NULL;
152}
153
154kern_return_t
155coalition_resource_usage_internal(coalition_t coal, struct coalition_resource_usage *cru_out)
156{
157	kern_return_t kr;
158	ledger_amount_t credit, debit;
159
160	ledger_t sum_ledger = ledger_instantiate(task_ledger_template, LEDGER_CREATE_ACTIVE_ENTRIES);
161	if (sum_ledger == LEDGER_NULL) {
162		return KERN_RESOURCE_SHORTAGE;
163	}
164
165	coalition_lock(coal);
166
167	/*
168	 * Start with the coalition's ledger, which holds the totals from all
169	 * the dead tasks.
170	 */
171	ledger_rollup(sum_ledger, coal->ledger);
172	uint64_t bytesread = coal->bytesread;
173	uint64_t byteswritten = coal->byteswritten;
174	uint64_t gpu_time = coal->gpu_time;
175
176	/*
177	 * Add to that all the active tasks' ledgers. Tasks cannot deallocate
178	 * out from under us, since we hold the coalition lock.
179	 */
180	task_t task;
181	queue_iterate(&coal->tasks, task, task_t, coalition_tasks) {
182		ledger_rollup(sum_ledger, task->ledger);
183		bytesread += task->task_io_stats->disk_reads.size;
184		byteswritten += task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size;
185		gpu_time += task_gpu_utilisation(task);
186	}
187
188	/* collect information from the coalition itself */
189	cru_out->tasks_started = coal->task_count;
190	cru_out->tasks_exited = coal->dead_task_count;
191
192	uint64_t time_nonempty = coal->time_nonempty;
193	uint64_t last_became_nonempty_time = coal->last_became_nonempty_time;
194
195	coalition_unlock(coal);
196
197	/* Copy the totals out of sum_ledger */
198	kr = ledger_get_entries(sum_ledger, task_ledgers.cpu_time,
199			&credit, &debit);
200	if (kr != KERN_SUCCESS) {
201		credit = 0;
202	}
203	cru_out->cpu_time = credit;
204
205	kr = ledger_get_entries(sum_ledger, task_ledgers.interrupt_wakeups,
206			&credit, &debit);
207	if (kr != KERN_SUCCESS) {
208		credit = 0;
209	}
210	cru_out->interrupt_wakeups = credit;
211
212	kr = ledger_get_entries(sum_ledger, task_ledgers.platform_idle_wakeups,
213			&credit, &debit);
214	if (kr != KERN_SUCCESS) {
215		credit = 0;
216	}
217	cru_out->platform_idle_wakeups = credit;
218
219	cru_out->bytesread = bytesread;
220	cru_out->byteswritten = byteswritten;
221	cru_out->gpu_time = gpu_time;
222
223	ledger_dereference(sum_ledger);
224	sum_ledger = LEDGER_NULL;
225
226	if (last_became_nonempty_time) {
227		time_nonempty += mach_absolute_time() - last_became_nonempty_time;
228	}
229	absolutetime_to_nanoseconds(time_nonempty, &cru_out->time_nonempty);
230
231	return KERN_SUCCESS;
232}
233
234/*
235 * coalition_create_internal
236 * Returns: New coalition object, referenced for the caller and unlocked.
237 * Condition: coalitions_list_lock must be UNLOCKED.
238 */
239kern_return_t
240coalition_create_internal(coalition_t *out, boolean_t privileged)
241{
242	struct coalition *new_coal = (struct coalition *)zalloc(coalition_zone);
243	if (new_coal == COALITION_NULL) {
244		return KERN_RESOURCE_SHORTAGE;
245	}
246	bzero(new_coal, sizeof(*new_coal));
247
248	new_coal->ledger = ledger_instantiate(task_ledger_template, LEDGER_CREATE_ACTIVE_ENTRIES);
249	if (new_coal->ledger == NULL) {
250		zfree(coalition_zone, new_coal);
251		return KERN_RESOURCE_SHORTAGE;
252	}
253
254	/* One for caller, one for coalitions list */
255	new_coal->ref_count = 2;
256
257	new_coal->privileged = privileged ? TRUE : FALSE;
258
259	lck_mtx_init(&new_coal->lock, &coalitions_lck_grp, &coalitions_lck_attr);
260	queue_init(&new_coal->tasks);
261
262	lck_mtx_lock(&coalitions_list_lock);
263	new_coal->id = coalition_next_id++;
264	coalition_count++;
265	queue_enter(&coalitions, new_coal, coalition_t, coalitions);
266	lck_mtx_unlock(&coalitions_list_lock);
267
268#if COALITION_DEBUG
269	printf("%s: new coal id %llu\n", __func__, new_coal->id);
270#endif
271
272	*out = new_coal;
273	return KERN_SUCCESS;
274}
275
276/*
277 * coalition_release
278 * Condition: coalition must be UNLOCKED.
279 * */
280void
281coalition_release(coalition_t coal)
282{
283	boolean_t do_dealloc = FALSE;
284
285	/* TODO: This can be done with atomics. */
286	coalition_lock(coal);
287	coal->ref_count--;
288	if (coal->ref_count == 0) {
289		do_dealloc = TRUE;
290	}
291#if COALITION_DEBUG
292	uint32_t rc = coal->ref_count;
293#endif /* COALITION_DEBUG */
294
295	coalition_unlock(coal);
296
297#if COALITION_DEBUG
298	printf("%s: coal %llu ref_count-- -> %u%s\n", __func__, coal->id, rc,
299			do_dealloc ? ", will deallocate now" : "");
300#endif /* COALITION_DEBUG */
301
302	if (do_dealloc) {
303		assert(coal->termrequested);
304		assert(coal->terminated);
305		assert(coal->active_count == 0);
306		assert(coal->reaped);
307
308		ledger_dereference(coal->ledger);
309		lck_mtx_destroy(&coal->lock, &coalitions_lck_grp);
310		zfree(coalition_zone, coal);
311	}
312}
313
314/*
315 * coalition_find_by_id
316 * Returns: Coalition object with specified id, referenced.
317 * Condition: coalitions_list_lock must be UNLOCKED.
318 */
319coalition_t
320coalition_find_by_id(uint64_t cid)
321{
322	if (cid == 0) {
323		return COALITION_NULL;
324	}
325
326	lck_mtx_lock(&coalitions_list_lock);
327
328	coalition_t coal = coalition_find_by_id_internal(cid);
329	if (coal == COALITION_NULL) {
330		lck_mtx_unlock(&coalitions_list_lock);
331		return COALITION_NULL;
332	}
333
334	coalition_lock(coal);
335
336	if (coal->reaped) {
337		coalition_unlock(coal);
338		lck_mtx_unlock(&coalitions_list_lock);
339		return COALITION_NULL;
340	}
341
342	if (coal->ref_count == 0) {
343		panic("resurrecting coalition %p id %llu, active_count = %u\n",
344				coal, coal->id, coal->active_count);
345	}
346	coal->ref_count++;
347#if COALITION_DEBUG
348	uint32_t rc = coal->ref_count;
349#endif
350
351	coalition_unlock(coal);
352	lck_mtx_unlock(&coalitions_list_lock);
353
354#if COALITION_DEBUG
355	printf("%s: coal %llu ref_count++ -> %u\n", __func__, coal->id, rc);
356#endif
357	return coal;
358}
359
360/*
361 * coalition_find_and_activate_by_id
362 * Returns: Coalition object with specified id, referenced, and activated.
363 * Condition: coalitions_list_lock must be UNLOCKED.
364 * This is the function to use when putting a 'new' thing into a coalition,
365 * like posix_spawn of an XPC service by launchd.
366 * See also coalition_extend_active.
367 */
368coalition_t
369coalition_find_and_activate_by_id(uint64_t cid)
370{
371	if (cid == 0) {
372		return COALITION_NULL;
373	}
374
375	lck_mtx_lock(&coalitions_list_lock);
376
377	coalition_t coal = coalition_find_by_id_internal(cid);
378	if (coal == COALITION_NULL) {
379		lck_mtx_unlock(&coalitions_list_lock);
380		return COALITION_NULL;
381	}
382
383	coalition_lock(coal);
384
385	if (coal->reaped || coal->terminated) {
386		/* Too late to put something new into this coalition, it's
387		 * already on its way out the door */
388		coalition_unlock(coal);
389		lck_mtx_unlock(&coalitions_list_lock);
390		return COALITION_NULL;
391	}
392
393	if (coal->ref_count == 0) {
394		panic("resurrecting coalition %p id %llu, active_count = %u\n",
395				coal, coal->id, coal->active_count);
396	}
397
398	coal->ref_count++;
399	coal->active_count++;
400
401#if COALITION_DEBUG
402	uint32_t rc = coal->ref_count;
403	uint32_t ac = coal->active_count;
404#endif
405
406	coalition_unlock(coal);
407	lck_mtx_unlock(&coalitions_list_lock);
408
409#if COALITION_DEBUG
410	printf("%s: coal %llu ref_count++ -> %u, active_count++ -> %u\n",
411			__func__, coal->id, rc, ac);
412#endif
413	return coal;
414}
415
416uint64_t
417coalition_id(coalition_t coal)
418{
419	return coal->id;
420}
421
422uint64_t
423task_coalition_id(task_t task)
424{
425	return task->coalition->id;
426}
427
428boolean_t
429coalition_is_privileged(coalition_t coal)
430{
431	return coal->privileged || unrestrict_coalition_syscalls;
432}
433
434boolean_t
435task_is_in_privileged_coalition(task_t task)
436{
437	return task->coalition->privileged || unrestrict_coalition_syscalls;
438}
439
440/*
441 * coalition_get_ledger
442 * Returns: Coalition's ledger, NOT referenced.
443 * Condition: Caller must have a coalition reference.
444 */
445ledger_t
446coalition_get_ledger(coalition_t coal)
447{
448	return coal->ledger;
449}
450
451/*
452 * This is the function to use when you already hold an activation on the
453 * coalition, and want to extend it to a second activation owned by a new
454 * object, like when a task in the coalition calls fork(). This is analogous
455 * to taking a second reference when you already hold one.
456 * See also coalition_find_and_activate_by_id.
457 */
458kern_return_t
459coalition_extend_active(coalition_t coal)
460{
461	coalition_lock(coal);
462
463	if (coal->reaped) {
464		panic("cannot make a reaped coalition active again");
465	}
466
467	if (coal->terminated) {
468		coalition_unlock(coal);
469		return KERN_TERMINATED;
470	}
471
472	assert(coal->active_count > 0);
473	coal->active_count++;
474
475	coalition_unlock(coal);
476	return KERN_SUCCESS;
477}
478
479void
480coalition_remove_active(coalition_t coal)
481{
482	coalition_lock(coal);
483
484	assert(!coal->reaped);
485	assert(coal->active_count > 0);
486
487	coal->active_count--;
488
489	boolean_t do_notify = FALSE;
490	uint64_t notify_id = 0;
491	uint32_t notify_flags = 0;
492	if (coal->termrequested && coal->active_count == 0) {
493		/* We only notify once, when active_count reaches zero.
494		 * We just decremented, so if it reached zero, we mustn't have
495		 * notified already.
496		 */
497		assert(!coal->terminated);
498		coal->terminated = TRUE;
499
500		assert(!coal->notified);
501
502		coal->notified = TRUE;
503		do_notify = TRUE;
504		notify_id = coal->id;
505		notify_flags = 0;
506	}
507
508	coalition_unlock(coal);
509
510	if (do_notify) {
511		coalition_notify_user(notify_id, notify_flags);
512	}
513}
514
515/* Used for kernel_task, launchd, launchd's early boot tasks... */
516kern_return_t
517coalition_default_adopt_task(task_t task)
518{
519	kern_return_t kr;
520	kr = coalition_adopt_task(default_coalition, task);
521	if (kr != KERN_SUCCESS) {
522		panic("failed to adopt task %p into default coalition: %d", task, kr);
523	}
524	return kr;
525}
526
527/*
528 * coalition_adopt_task
529 * Condition: Coalition must be referenced and unlocked. Will fail if coalition
530 * is already terminated.
531 */
532kern_return_t
533coalition_adopt_task(coalition_t coal, task_t task)
534{
535	if (task->coalition) {
536		return KERN_ALREADY_IN_SET;
537	}
538
539	coalition_lock(coal);
540
541	if (coal->reaped || coal->terminated) {
542		coalition_unlock(coal);
543		return KERN_TERMINATED;
544	}
545
546	coal->active_count++;
547
548	coal->ref_count++;
549	task->coalition = coal;
550
551	queue_enter(&coal->tasks, task, task_t, coalition_tasks);
552	coal->task_count++;
553
554	if(coal->task_count < coal->dead_task_count) {
555		panic("%s: coalition %p id %llu task_count < dead_task_count", __func__, coal, coal->id);
556	}
557
558	/* If moving from 0->1 active tasks */
559	if (coal->task_count - coal->dead_task_count == 1) {
560		coal->last_became_nonempty_time = mach_absolute_time();
561	}
562
563#if COALITION_DEBUG
564	uint32_t rc = coal->ref_count;
565#endif
566
567	coalition_unlock(coal);
568
569#if COALITION_DEBUG
570	if (rc) {
571		printf("%s: coal %llu ref_count++ -> %u\n", __func__, coal->id, rc);
572	}
573#endif
574	return KERN_SUCCESS;
575}
576
577/*
578 * coalition_remove_task
579 * Condition: task must be referenced and UNLOCKED; task's coalition must be UNLOCKED
580 */
581kern_return_t
582coalition_remove_task(task_t task)
583{
584	coalition_t coal = task->coalition;
585	assert(coal);
586
587	coalition_lock(coal);
588
589	queue_remove(&coal->tasks, task, task_t, coalition_tasks);
590	coal->dead_task_count++;
591
592	if(coal->task_count < coal->dead_task_count) {
593		panic("%s: coalition %p id %llu task_count < dead_task_count", __func__, coal, coal->id);
594	}
595
596	/* If moving from 1->0 active tasks */
597	if (coal->task_count - coal->dead_task_count == 0) {
598		uint64_t last_time_nonempty = mach_absolute_time() - coal->last_became_nonempty_time;
599		coal->last_became_nonempty_time = 0;
600		coal->time_nonempty += last_time_nonempty;
601	}
602
603	ledger_rollup(coal->ledger, task->ledger);
604	coal->bytesread += task->task_io_stats->disk_reads.size;
605	coal->byteswritten += task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size;
606	coal->gpu_time += task_gpu_utilisation(task);
607
608	coalition_unlock(coal);
609
610	coalition_remove_active(coal);
611	return KERN_SUCCESS;
612}
613
614/*
615 * coalition_terminate_internal
616 * Condition: Coalition must be referenced and UNLOCKED.
617 */
618kern_return_t
619coalition_request_terminate_internal(coalition_t coal)
620{
621	if (coal == default_coalition) {
622		return KERN_DEFAULT_SET;
623	}
624
625	coalition_lock(coal);
626
627	if (coal->reaped) {
628		coalition_unlock(coal);
629		return KERN_INVALID_NAME;
630	}
631
632	if (coal->terminated || coal->termrequested) {
633		coalition_unlock(coal);
634		return KERN_TERMINATED;
635	}
636
637	coal->termrequested = TRUE;
638
639	boolean_t do_notify = FALSE;
640	uint64_t note_id = 0;
641	uint32_t note_flags = 0;
642
643	if (coal->active_count == 0) {
644		/*
645		 * We only notify once, when active_count reaches zero.
646		 * We just decremented, so if it reached zero, we mustn't have
647		 * notified already.
648		 */
649		assert(!coal->terminated);
650		coal->terminated = TRUE;
651
652		assert(!coal->notified);
653
654		coal->notified = TRUE;
655		do_notify = TRUE;
656		note_id = coal->id;
657		note_flags = 0;
658	}
659
660	coalition_unlock(coal);
661
662	if (do_notify) {
663		coalition_notify_user(note_id, note_flags);
664	}
665
666	return KERN_SUCCESS;
667}
668
669/*
670 * coalition_reap_internal
671 * Condition: Coalition must be referenced and UNLOCKED.
672 */
673kern_return_t
674coalition_reap_internal(coalition_t coal)
675{
676	if (coal == default_coalition) {
677		return KERN_DEFAULT_SET;
678	}
679
680	coalition_lock(coal);
681	if (coal->reaped) {
682		coalition_unlock(coal);
683		return KERN_TERMINATED;
684	}
685	if (!coal->terminated) {
686		coalition_unlock(coal);
687		return KERN_FAILURE;
688	}
689	assert(coal->termrequested);
690	if (coal->active_count > 0) {
691		coalition_unlock(coal);
692		return KERN_FAILURE;
693	}
694
695	coal->reaped = TRUE;
696
697	/* Caller, launchd, and coalitions list should each have a reference */
698	assert(coal->ref_count > 2);
699
700	coalition_unlock(coal);
701
702	lck_mtx_lock(&coalitions_list_lock);
703	coalition_count--;
704	queue_remove(&coalitions, coal, coalition_t, coalitions);
705	lck_mtx_unlock(&coalitions_list_lock);
706
707	/* Release the list's reference and launchd's reference. */
708	coalition_release(coal);
709	coalition_release(coal);
710
711	return KERN_SUCCESS;
712}
713
714void
715coalition_init(void)
716{
717	coalition_zone = zinit(
718			sizeof(struct coalition),
719			CONFIG_COALITION_MAX * sizeof(struct coalition),
720			COALITION_CHUNK * sizeof(struct coalition),
721			"coalitions");
722	zone_change(coalition_zone, Z_NOENCRYPT, TRUE);
723	queue_init(&coalitions);
724
725	if (!PE_parse_boot_argn("unrestrict_coalition_syscalls", &unrestrict_coalition_syscalls,
726		sizeof (unrestrict_coalition_syscalls))) {
727		unrestrict_coalition_syscalls = 0;
728	}
729
730	lck_grp_attr_setdefault(&coalitions_lck_grp_attr);
731	lck_grp_init(&coalitions_lck_grp, "coalition", &coalitions_lck_grp_attr);
732	lck_attr_setdefault(&coalitions_lck_attr);
733	lck_mtx_init(&coalitions_list_lock, &coalitions_lck_grp, &coalitions_lck_attr);
734
735	init_task_ledgers();
736
737	kern_return_t kr = coalition_create_internal(&default_coalition, TRUE);
738	if (kr != KERN_SUCCESS) {
739		panic("%s: could not create default coalition: %d", __func__, kr);
740	}
741	/* "Leak" our reference to the global object */
742}
743
744