1/*
2 * Copyright (c) 2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <kern/affinity.h>
30#include <kern/task.h>
31#include <kern/kalloc.h>
32#include <machine/cpu_affinity.h>
33
34/*
35 * Affinity involves 2 objects:
36 * - affinity namespace:
37 *	shared by a task family, this controls affinity tag lookup and
38 *	allocation; it anchors all affinity sets in one namespace
39 * - affinity set:
40 * 	anchors all threads with membership of this affinity set
41 *	and which share an affinity tag in the owning namespace.
42 *
43 * Locking:
44 * - The task lock protects the creation of an affinity namespace.
45 * - The affinity namespace mutex protects the inheritance of a namespace
46 *   and its thread membership. This includes its destruction when the task
47 *   reference count goes to zero.
48 * - The thread mutex protects a thread's affinity set membership, but in
49 *   addition, the thread_lock is taken to write thread->affinity_set since this
50 *   field (representng the active affinity set) is read by the scheduler.
51 *
52 * The lock ordering is: task lock, thread mutex, namespace mutex, thread lock.
53 */
54
55#if AFFINITY_DEBUG
56#define DBG(x...)	kprintf("DBG: " x)
57#else
58#define DBG(x...)
59#endif
60
61struct affinity_space {
62	lck_mtx_t		aspc_lock;
63	uint32_t		aspc_task_count;
64	queue_head_t	aspc_affinities;
65};
66typedef struct affinity_space *affinity_space_t;
67
68static affinity_space_t affinity_space_alloc(void);
69static void affinity_space_free(affinity_space_t aspc);
70static affinity_set_t affinity_set_alloc(void);
71static void affinity_set_free(affinity_set_t aset);
72static affinity_set_t affinity_set_find(affinity_space_t aspc, uint32_t tag);
73static void affinity_set_place(affinity_space_t aspc, affinity_set_t aset);
74static void affinity_set_add(affinity_set_t aset, thread_t thread);
75static affinity_set_t affinity_set_remove(affinity_set_t aset, thread_t thread);
76
77/*
78 * The following globals may be modified by the sysctls
79 *   kern.affinity_sets_enabled	- disables hinting if cleared
80 *   kern.affinity_sets_mapping	- controls cache distribution policy
81 * See bsd/kern_sysctl.c
82 */
83boolean_t	affinity_sets_enabled = TRUE;
84int		affinity_sets_mapping = 1;
85
86boolean_t
87thread_affinity_is_supported(void)
88{
89	return (ml_get_max_affinity_sets() != 0);
90}
91
92
93/*
94 * thread_affinity_get()
95 * Return the affinity tag for a thread.
96 * Called with the thread mutex held.
97 */
98uint32_t
99thread_affinity_get(thread_t thread)
100{
101	uint32_t tag;
102
103	if (thread->affinity_set != NULL)
104		tag = thread->affinity_set->aset_tag;
105	else
106		tag = THREAD_AFFINITY_TAG_NULL;
107
108	return tag;
109}
110
111
112/*
113 * thread_affinity_set()
114 * Place a thread in an affinity set identified by a tag.
115 * Called with thread referenced but not locked.
116 */
117kern_return_t
118thread_affinity_set(thread_t thread, uint32_t tag)
119{
120	affinity_set_t		aset;
121	affinity_set_t		empty_aset = NULL;
122	affinity_space_t	aspc;
123	affinity_space_t	new_aspc = NULL;
124
125	DBG("thread_affinity_set(%p,%u)\n", thread, tag);
126
127	task_lock(thread->task);
128	aspc = thread->task->affinity_space;
129	if (aspc == NULL) {
130		task_unlock(thread->task);
131		new_aspc = affinity_space_alloc();
132		if (new_aspc == NULL)
133			return KERN_RESOURCE_SHORTAGE;
134		task_lock(thread->task);
135		if (thread->task->affinity_space == NULL) {
136			thread->task->affinity_space = new_aspc;
137			new_aspc = NULL;
138		}
139		aspc = thread->task->affinity_space;
140	}
141	task_unlock(thread->task);
142	if (new_aspc)
143		affinity_space_free(new_aspc);
144
145	thread_mtx_lock(thread);
146	if (!thread->active) {
147		/* Beaten to lock and the thread is dead */
148		thread_mtx_unlock(thread);
149		return KERN_TERMINATED;
150	}
151
152	lck_mtx_lock(&aspc->aspc_lock);
153	aset = thread->affinity_set;
154	if (aset != NULL) {
155		/*
156		 * Remove thread from current affinity set
157		 */
158		DBG("thread_affinity_set(%p,%u) removing from aset %p\n",
159			thread, tag, aset);
160		empty_aset = affinity_set_remove(aset, thread);
161	}
162
163	if (tag != THREAD_AFFINITY_TAG_NULL) {
164		aset = affinity_set_find(aspc, tag);
165		if (aset != NULL) {
166			/*
167			 * Add thread to existing affinity set
168			 */
169			DBG("thread_affinity_set(%p,%u) found aset %p\n",
170				thread, tag, aset);
171		} else {
172			/*
173			 * Use the new affinity set, add this thread
174			 * and place it in a suitable processor set.
175			 */
176			if (empty_aset != NULL) {
177				aset = empty_aset;
178				empty_aset = NULL;
179			} else {
180				aset = affinity_set_alloc();
181				if (aset == NULL) {
182					lck_mtx_unlock(&aspc->aspc_lock);
183					thread_mtx_unlock(thread);
184					return KERN_RESOURCE_SHORTAGE;
185				}
186			}
187			DBG("thread_affinity_set(%p,%u) (re-)using aset %p\n",
188				thread, tag, aset);
189			aset->aset_tag = tag;
190			affinity_set_place(aspc, aset);
191		}
192		affinity_set_add(aset, thread);
193	}
194
195	lck_mtx_unlock(&aspc->aspc_lock);
196	thread_mtx_unlock(thread);
197
198	/*
199	 * If we wound up not using an empty aset we created,
200	 * free it here.
201	 */
202	if (empty_aset != NULL)
203		affinity_set_free(empty_aset);
204
205	if (thread == current_thread())
206	        thread_block(THREAD_CONTINUE_NULL);
207
208	return KERN_SUCCESS;
209}
210
211/*
212 * task_affinity_create()
213 * Called from task create.
214 */
215void
216task_affinity_create(task_t parent_task, task_t child_task)
217{
218	affinity_space_t	aspc = parent_task->affinity_space;
219
220	DBG("task_affinity_create(%p,%p)\n", parent_task, child_task);
221
222	assert(aspc);
223
224	/*
225	 * Bump the task reference count on the shared namespace and
226	 * give it to the child.
227	 */
228	lck_mtx_lock(&aspc->aspc_lock);
229	aspc->aspc_task_count++;
230	child_task->affinity_space = aspc;
231	lck_mtx_unlock(&aspc->aspc_lock);
232}
233
234/*
235 * task_affinity_deallocate()
236 * Called from task_deallocate() when there's a namespace to dereference.
237 */
238void
239task_affinity_deallocate(task_t	task)
240{
241	affinity_space_t	aspc = task->affinity_space;
242
243	DBG("task_affinity_deallocate(%p) aspc %p task_count %d\n",
244		task, aspc, aspc->aspc_task_count);
245
246	lck_mtx_lock(&aspc->aspc_lock);
247	if (--(aspc->aspc_task_count) == 0) {
248		assert(queue_empty(&aspc->aspc_affinities));
249		lck_mtx_unlock(&aspc->aspc_lock);
250		affinity_space_free(aspc);
251	} else {
252		lck_mtx_unlock(&aspc->aspc_lock);
253	}
254}
255
256/*
257 * task_affinity_info()
258 * Return affinity tag info (number, min, max) for the task.
259 *
260 * Conditions: task is locked.
261 */
262kern_return_t
263task_affinity_info(
264	task_t			task,
265	task_info_t		task_info_out,
266	mach_msg_type_number_t	*task_info_count)
267{
268	affinity_set_t			aset;
269	affinity_space_t		aspc;
270	task_affinity_tag_info_t	info;
271
272	*task_info_count = TASK_AFFINITY_TAG_INFO_COUNT;
273	info = (task_affinity_tag_info_t) task_info_out;
274	info->set_count = 0;
275	info->task_count = 0;
276	info->min = THREAD_AFFINITY_TAG_NULL;
277	info->max = THREAD_AFFINITY_TAG_NULL;
278
279	aspc = task->affinity_space;
280	if (aspc) {
281		lck_mtx_lock(&aspc->aspc_lock);
282		queue_iterate(&aspc->aspc_affinities,
283				 aset, affinity_set_t, aset_affinities) {
284			info->set_count++;
285			if (info->min == THREAD_AFFINITY_TAG_NULL ||
286			    aset->aset_tag < (uint32_t) info->min)
287				info->min = aset->aset_tag;
288			if (info->max == THREAD_AFFINITY_TAG_NULL ||
289			    aset->aset_tag > (uint32_t) info->max)
290				info->max = aset->aset_tag;
291		}
292		info->task_count = aspc->aspc_task_count;
293		lck_mtx_unlock(&aspc->aspc_lock);
294	}
295	return KERN_SUCCESS;
296}
297
298/*
299 * Called from thread_dup() during fork() with child's mutex held.
300 * Set the child into the parent's affinity set.
301 * Note the affinity space is shared.
302 */
303void
304thread_affinity_dup(thread_t parent, thread_t child)
305{
306	affinity_set_t			aset;
307	affinity_space_t		aspc;
308
309	thread_mtx_lock(parent);
310	aset = parent->affinity_set;
311	DBG("thread_affinity_dup(%p,%p) aset %p\n", parent, child, aset);
312	if (aset == NULL) {
313		thread_mtx_unlock(parent);
314		return;
315	}
316
317	aspc = aset->aset_space;
318	assert(aspc == parent->task->affinity_space);
319	assert(aspc == child->task->affinity_space);
320
321	lck_mtx_lock(&aspc->aspc_lock);
322	affinity_set_add(aset, child);
323	lck_mtx_unlock(&aspc->aspc_lock);
324
325	thread_mtx_unlock(parent);
326}
327
328/*
329 * thread_affinity_terminate()
330 * Remove thread from any affinity set.
331 * Called with the thread mutex locked.
332 */
333void
334thread_affinity_terminate(thread_t thread)
335{
336	affinity_set_t		aset = thread->affinity_set;
337	affinity_space_t	aspc;
338
339	DBG("thread_affinity_terminate(%p)\n", thread);
340
341	aspc = aset->aset_space;
342	lck_mtx_lock(&aspc->aspc_lock);
343	if (affinity_set_remove(aset, thread)) {
344		affinity_set_free(aset);
345	}
346	lck_mtx_unlock(&aspc->aspc_lock);
347}
348
349/*
350 * thread_affinity_exec()
351 * Called from execve() to cancel any current affinity - a new image implies
352 * the calling thread terminates any expressed or inherited affinity.
353 */
354void
355thread_affinity_exec(thread_t thread)
356{
357	if (thread->affinity_set != AFFINITY_SET_NULL)
358		thread_affinity_terminate(thread);
359}
360
361/*
362 * Create an empty affinity namespace data structure.
363 */
364static affinity_space_t
365affinity_space_alloc(void)
366{
367	affinity_space_t	aspc;
368
369	aspc = (affinity_space_t) kalloc(sizeof(struct affinity_space));
370	if (aspc == NULL)
371		return NULL;
372
373	lck_mtx_init(&aspc->aspc_lock, &task_lck_grp, &task_lck_attr);
374	queue_init(&aspc->aspc_affinities);
375	aspc->aspc_task_count = 1;
376
377	DBG("affinity_space_create() returns %p\n", aspc);
378	return aspc;
379}
380
381/*
382 * Destroy the given empty affinity namespace data structure.
383 */
384static void
385affinity_space_free(affinity_space_t aspc)
386{
387	assert(queue_empty(&aspc->aspc_affinities));
388
389	DBG("affinity_space_free(%p)\n", aspc);
390	kfree(aspc, sizeof(struct affinity_space));
391}
392
393
394/*
395 * Create an empty affinity set data structure
396 * entering it into a list anchored by the owning task.
397 */
398static affinity_set_t
399affinity_set_alloc(void)
400{
401	affinity_set_t	aset;
402
403	aset = (affinity_set_t) kalloc(sizeof(struct affinity_set));
404	if (aset == NULL)
405		return NULL;
406
407	aset->aset_thread_count = 0;
408	queue_init(&aset->aset_affinities);
409	queue_init(&aset->aset_threads);
410	aset->aset_num = 0;
411	aset->aset_pset = PROCESSOR_SET_NULL;
412	aset->aset_space = NULL;
413
414	DBG("affinity_set_create() returns %p\n", aset);
415	return aset;
416}
417
418/*
419 * Destroy the given empty affinity set data structure
420 * after removing it from the parent task.
421 */
422static void
423affinity_set_free(affinity_set_t aset)
424{
425	assert(queue_empty(&aset->aset_threads));
426
427	DBG("affinity_set_free(%p)\n", aset);
428	kfree(aset, sizeof(struct affinity_set));
429}
430
431/*
432 * Add a thread to an affinity set.
433 * The caller must have the thread mutex and space locked.
434 */
435static void
436affinity_set_add(affinity_set_t aset, thread_t thread)
437{
438	spl_t	s;
439
440	DBG("affinity_set_add(%p,%p)\n", aset, thread);
441	queue_enter(&aset->aset_threads,
442		thread, thread_t, affinity_threads);
443	aset->aset_thread_count++;
444	s = splsched();
445	thread_lock(thread);
446	thread->affinity_set = affinity_sets_enabled ? aset : NULL;
447	thread_unlock(thread);
448	splx(s);
449}
450
451/*
452 * Remove a thread from an affinity set returning the set if now empty.
453 * The caller must have the thread mutex and space locked.
454 */
455static affinity_set_t
456affinity_set_remove(affinity_set_t aset, thread_t thread)
457{
458	spl_t	s;
459
460	s = splsched();
461	thread_lock(thread);
462	thread->affinity_set = NULL;
463	thread_unlock(thread);
464	splx(s);
465
466	aset->aset_thread_count--;
467	queue_remove(&aset->aset_threads,
468		thread, thread_t, affinity_threads);
469	if (queue_empty(&aset->aset_threads)) {
470		queue_remove(&aset->aset_space->aspc_affinities,
471				aset, affinity_set_t, aset_affinities);
472		assert(aset->aset_thread_count == 0);
473		aset->aset_tag = THREAD_AFFINITY_TAG_NULL;
474		aset->aset_num = 0;
475		aset->aset_pset = PROCESSOR_SET_NULL;
476		aset->aset_space = NULL;
477		DBG("affinity_set_remove(%p,%p) set now empty\n", aset, thread);
478		return aset;
479	} else {
480		DBG("affinity_set_remove(%p,%p)\n", aset, thread);
481		return NULL;
482	}
483}
484
485/*
486 * Find an affinity set in the parent task with the given affinity tag.
487 * The caller must have the space locked.
488 */
489static affinity_set_t
490affinity_set_find(affinity_space_t space, uint32_t tag)
491{
492	affinity_set_t	aset;
493
494	queue_iterate(&space->aspc_affinities,
495			 aset, affinity_set_t, aset_affinities) {
496		if (aset->aset_tag == tag) {
497			DBG("affinity_set_find(%p,%u) finds %p\n",
498		 	    space, tag, aset);
499			return aset;
500		}
501	}
502	DBG("affinity_set_find(%p,%u) not found\n", space, tag);
503	return NULL;
504}
505
506/*
507 * affinity_set_place() assigns an affinity set to a suitable processor_set.
508 * The selection criteria is:
509 *  - the set currently occupied by the least number of affinities
510 *    belonging to the owning the task.
511 * The caller must have the space locked.
512 */
513static void
514affinity_set_place(affinity_space_t aspc, affinity_set_t new_aset)
515{
516	unsigned int	num_cpu_asets = ml_get_max_affinity_sets();
517	unsigned int	set_occupancy[num_cpu_asets];
518	unsigned int	i;
519	unsigned int	i_least_occupied;
520	affinity_set_t	aset;
521
522	for (i = 0; i < num_cpu_asets; i++)
523		set_occupancy[i] = 0;
524
525	/*
526	 * Scan the affinity sets calculating the number of sets
527	 * occupy the available physical affinities.
528	 */
529	queue_iterate(&aspc->aspc_affinities,
530			 aset, affinity_set_t, aset_affinities) {
531		if(aset->aset_num < num_cpu_asets)
532			set_occupancy[aset->aset_num]++;
533		else
534			panic("aset_num = %d in %s\n", aset->aset_num, __FUNCTION__);
535	}
536
537	/*
538	 * Find the least occupied set (or the first empty set).
539	 * To distribute placements somewhat, start searching from
540	 * a cpu affinity chosen randomly per namespace:
541	 *   [(unsigned int)aspc % 127] % num_cpu_asets
542	 * unless this mapping policy is overridden.
543	 */
544	if (affinity_sets_mapping == 0)
545		i_least_occupied = 0;
546	else
547		i_least_occupied = (unsigned int)(((uintptr_t)aspc % 127) % num_cpu_asets);
548	for (i = 0; i < num_cpu_asets; i++) {
549		unsigned int	j = (i_least_occupied + i) % num_cpu_asets;
550		if (set_occupancy[j] == 0) {
551			i_least_occupied = j;
552			break;
553		}
554		if (set_occupancy[j] < set_occupancy[i_least_occupied])
555			i_least_occupied = j;
556	}
557	new_aset->aset_num = i_least_occupied;
558	new_aset->aset_pset = ml_affinity_to_pset(i_least_occupied);
559
560	/* Add the new affinity set to the group */
561	new_aset->aset_space = aspc;
562	queue_enter(&aspc->aspc_affinities,
563			new_aset, affinity_set_t, aset_affinities);
564
565	DBG("affinity_set_place(%p,%p) selected affinity %u pset %p\n",
566	    aspc, new_aset, new_aset->aset_num, new_aset->aset_pset);
567}
568