1/*
2 * Copyright (c) 2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <kern/affinity.h>
30#include <kern/task.h>
31#include <kern/kalloc.h>
32#include <machine/cpu_affinity.h>
33
34/*
35 * Affinity involves 2 objects:
36 * - affinity namespace:
37 *	shared by a task family, this controls affinity tag lookup and
38 *	allocation; it anchors all affinity sets in one namespace
39 * - affinity set:
40 * 	anchors all threads with membership of this affinity set
41 *	and which share an affinity tag in the owning namespace.
42 *
43 * Locking:
44 * - The task lock protects the creation of an affinity namespace.
45 * - The affinity namespace mutex protects the inheritance of a namespace
46 *   and its thread membership. This includes its destruction when the task
47 *   reference count goes to zero.
48 * - The thread mutex protects a thread's affinity set membership, but in
49 *   addition, the thread_lock is taken to write thread->affinity_set since this
50 *   field (representng the active affinity set) is read by the scheduler.
51 *
52 * The lock ordering is: task lock, thread mutex, namespace mutex, thread lock.
53 */
54
55#if AFFINITY_DEBUG
56#define DBG(x...)	kprintf("DBG: " x)
57#else
58#define DBG(x...)
59#endif
60
61struct affinity_space {
62	lck_mtx_t		aspc_lock;
63	uint32_t		aspc_task_count;
64	queue_head_t	aspc_affinities;
65};
66typedef struct affinity_space *affinity_space_t;
67
68static affinity_space_t affinity_space_alloc(void);
69static void affinity_space_free(affinity_space_t aspc);
70static affinity_set_t affinity_set_alloc(void);
71static void affinity_set_free(affinity_set_t aset);
72static affinity_set_t affinity_set_find(affinity_space_t aspc, uint32_t tag);
73static void affinity_set_place(affinity_space_t aspc, affinity_set_t aset);
74static void affinity_set_add(affinity_set_t aset, thread_t thread);
75static affinity_set_t affinity_set_remove(affinity_set_t aset, thread_t thread);
76
77/*
78 * The following globals may be modified by the sysctls
79 *   kern.affinity_sets_enabled	- disables hinting if cleared
80 *   kern.affinity_sets_mapping	- controls cache distribution policy
81 * See bsd/kern_sysctl.c
82 *
83 * Affinity sets are not used on embedded, which typically only
84 * has a single pset, and last-processor affinity is
85 * more important than pset affinity.
86 */
87boolean_t	affinity_sets_enabled = TRUE;
88int		affinity_sets_mapping = 1;
89
90boolean_t
91thread_affinity_is_supported(void)
92{
93	return (ml_get_max_affinity_sets() != 0);
94}
95
96
97/*
98 * thread_affinity_get()
99 * Return the affinity tag for a thread.
100 * Called with the thread mutex held.
101 */
102uint32_t
103thread_affinity_get(thread_t thread)
104{
105	uint32_t tag;
106
107	if (thread->affinity_set != NULL)
108		tag = thread->affinity_set->aset_tag;
109	else
110		tag = THREAD_AFFINITY_TAG_NULL;
111
112	return tag;
113}
114
115
116/*
117 * thread_affinity_set()
118 * Place a thread in an affinity set identified by a tag.
119 * Called with thread referenced but not locked.
120 */
121kern_return_t
122thread_affinity_set(thread_t thread, uint32_t tag)
123{
124	affinity_set_t		aset;
125	affinity_set_t		empty_aset = NULL;
126	affinity_space_t	aspc;
127	affinity_space_t	new_aspc = NULL;
128
129	DBG("thread_affinity_set(%p,%u)\n", thread, tag);
130
131	task_lock(thread->task);
132	aspc = thread->task->affinity_space;
133	if (aspc == NULL) {
134		task_unlock(thread->task);
135		new_aspc = affinity_space_alloc();
136		if (new_aspc == NULL)
137			return KERN_RESOURCE_SHORTAGE;
138		task_lock(thread->task);
139		if (thread->task->affinity_space == NULL) {
140			thread->task->affinity_space = new_aspc;
141			new_aspc = NULL;
142		}
143		aspc = thread->task->affinity_space;
144	}
145	task_unlock(thread->task);
146	if (new_aspc)
147		affinity_space_free(new_aspc);
148
149	thread_mtx_lock(thread);
150	if (!thread->active) {
151		/* Beaten to lock and the thread is dead */
152		thread_mtx_unlock(thread);
153		return KERN_TERMINATED;
154	}
155
156	lck_mtx_lock(&aspc->aspc_lock);
157	aset = thread->affinity_set;
158	if (aset != NULL) {
159		/*
160		 * Remove thread from current affinity set
161		 */
162		DBG("thread_affinity_set(%p,%u) removing from aset %p\n",
163			thread, tag, aset);
164		empty_aset = affinity_set_remove(aset, thread);
165	}
166
167	if (tag != THREAD_AFFINITY_TAG_NULL) {
168		aset = affinity_set_find(aspc, tag);
169		if (aset != NULL) {
170			/*
171			 * Add thread to existing affinity set
172			 */
173			DBG("thread_affinity_set(%p,%u) found aset %p\n",
174				thread, tag, aset);
175		} else {
176			/*
177			 * Use the new affinity set, add this thread
178			 * and place it in a suitable processor set.
179			 */
180			if (empty_aset != NULL) {
181				aset = empty_aset;
182				empty_aset = NULL;
183			} else {
184				aset = affinity_set_alloc();
185				if (aset == NULL) {
186					lck_mtx_unlock(&aspc->aspc_lock);
187					thread_mtx_unlock(thread);
188					return KERN_RESOURCE_SHORTAGE;
189				}
190			}
191			DBG("thread_affinity_set(%p,%u) (re-)using aset %p\n",
192				thread, tag, aset);
193			aset->aset_tag = tag;
194			affinity_set_place(aspc, aset);
195		}
196		affinity_set_add(aset, thread);
197	}
198
199	lck_mtx_unlock(&aspc->aspc_lock);
200	thread_mtx_unlock(thread);
201
202	/*
203	 * If we wound up not using an empty aset we created,
204	 * free it here.
205	 */
206	if (empty_aset != NULL)
207		affinity_set_free(empty_aset);
208
209	if (thread == current_thread())
210	        thread_block(THREAD_CONTINUE_NULL);
211
212	return KERN_SUCCESS;
213}
214
215/*
216 * task_affinity_create()
217 * Called from task create.
218 */
219void
220task_affinity_create(task_t parent_task, task_t child_task)
221{
222	affinity_space_t	aspc = parent_task->affinity_space;
223
224	DBG("task_affinity_create(%p,%p)\n", parent_task, child_task);
225
226	assert(aspc);
227
228	/*
229	 * Bump the task reference count on the shared namespace and
230	 * give it to the child.
231	 */
232	lck_mtx_lock(&aspc->aspc_lock);
233	aspc->aspc_task_count++;
234	child_task->affinity_space = aspc;
235	lck_mtx_unlock(&aspc->aspc_lock);
236}
237
238/*
239 * task_affinity_deallocate()
240 * Called from task_deallocate() when there's a namespace to dereference.
241 */
242void
243task_affinity_deallocate(task_t	task)
244{
245	affinity_space_t	aspc = task->affinity_space;
246
247	DBG("task_affinity_deallocate(%p) aspc %p task_count %d\n",
248		task, aspc, aspc->aspc_task_count);
249
250	lck_mtx_lock(&aspc->aspc_lock);
251	if (--(aspc->aspc_task_count) == 0) {
252		assert(queue_empty(&aspc->aspc_affinities));
253		lck_mtx_unlock(&aspc->aspc_lock);
254		affinity_space_free(aspc);
255	} else {
256		lck_mtx_unlock(&aspc->aspc_lock);
257	}
258}
259
260/*
261 * task_affinity_info()
262 * Return affinity tag info (number, min, max) for the task.
263 *
264 * Conditions: task is locked.
265 */
266kern_return_t
267task_affinity_info(
268	task_t			task,
269	task_info_t		task_info_out,
270	mach_msg_type_number_t	*task_info_count)
271{
272	affinity_set_t			aset;
273	affinity_space_t		aspc;
274	task_affinity_tag_info_t	info;
275
276	*task_info_count = TASK_AFFINITY_TAG_INFO_COUNT;
277	info = (task_affinity_tag_info_t) task_info_out;
278	info->set_count = 0;
279	info->task_count = 0;
280	info->min = THREAD_AFFINITY_TAG_NULL;
281	info->max = THREAD_AFFINITY_TAG_NULL;
282
283	aspc = task->affinity_space;
284	if (aspc) {
285		lck_mtx_lock(&aspc->aspc_lock);
286		queue_iterate(&aspc->aspc_affinities,
287				 aset, affinity_set_t, aset_affinities) {
288			info->set_count++;
289			if (info->min == THREAD_AFFINITY_TAG_NULL ||
290			    aset->aset_tag < (uint32_t) info->min)
291				info->min = aset->aset_tag;
292			if (info->max == THREAD_AFFINITY_TAG_NULL ||
293			    aset->aset_tag > (uint32_t) info->max)
294				info->max = aset->aset_tag;
295		}
296		info->task_count = aspc->aspc_task_count;
297		lck_mtx_unlock(&aspc->aspc_lock);
298	}
299	return KERN_SUCCESS;
300}
301
302/*
303 * Called from thread_dup() during fork() with child's mutex held.
304 * Set the child into the parent's affinity set.
305 * Note the affinity space is shared.
306 */
307void
308thread_affinity_dup(thread_t parent, thread_t child)
309{
310	affinity_set_t			aset;
311	affinity_space_t		aspc;
312
313	thread_mtx_lock(parent);
314	aset = parent->affinity_set;
315	DBG("thread_affinity_dup(%p,%p) aset %p\n", parent, child, aset);
316	if (aset == NULL) {
317		thread_mtx_unlock(parent);
318		return;
319	}
320
321	aspc = aset->aset_space;
322	assert(aspc == parent->task->affinity_space);
323	assert(aspc == child->task->affinity_space);
324
325	lck_mtx_lock(&aspc->aspc_lock);
326	affinity_set_add(aset, child);
327	lck_mtx_unlock(&aspc->aspc_lock);
328
329	thread_mtx_unlock(parent);
330}
331
332/*
333 * thread_affinity_terminate()
334 * Remove thread from any affinity set.
335 * Called with the thread mutex locked.
336 */
337void
338thread_affinity_terminate(thread_t thread)
339{
340	affinity_set_t		aset = thread->affinity_set;
341	affinity_space_t	aspc;
342
343	DBG("thread_affinity_terminate(%p)\n", thread);
344
345	aspc = aset->aset_space;
346	lck_mtx_lock(&aspc->aspc_lock);
347	if (affinity_set_remove(aset, thread)) {
348		affinity_set_free(aset);
349	}
350	lck_mtx_unlock(&aspc->aspc_lock);
351}
352
353/*
354 * thread_affinity_exec()
355 * Called from execve() to cancel any current affinity - a new image implies
356 * the calling thread terminates any expressed or inherited affinity.
357 */
358void
359thread_affinity_exec(thread_t thread)
360{
361	if (thread->affinity_set != AFFINITY_SET_NULL)
362		thread_affinity_terminate(thread);
363}
364
365/*
366 * Create an empty affinity namespace data structure.
367 */
368static affinity_space_t
369affinity_space_alloc(void)
370{
371	affinity_space_t	aspc;
372
373	aspc = (affinity_space_t) kalloc(sizeof(struct affinity_space));
374	if (aspc == NULL)
375		return NULL;
376
377	lck_mtx_init(&aspc->aspc_lock, &task_lck_grp, &task_lck_attr);
378	queue_init(&aspc->aspc_affinities);
379	aspc->aspc_task_count = 1;
380
381	DBG("affinity_space_create() returns %p\n", aspc);
382	return aspc;
383}
384
385/*
386 * Destroy the given empty affinity namespace data structure.
387 */
388static void
389affinity_space_free(affinity_space_t aspc)
390{
391	assert(queue_empty(&aspc->aspc_affinities));
392
393	lck_mtx_destroy(&aspc->aspc_lock, &task_lck_grp);
394	DBG("affinity_space_free(%p)\n", aspc);
395	kfree(aspc, sizeof(struct affinity_space));
396}
397
398
399/*
400 * Create an empty affinity set data structure
401 * entering it into a list anchored by the owning task.
402 */
403static affinity_set_t
404affinity_set_alloc(void)
405{
406	affinity_set_t	aset;
407
408	aset = (affinity_set_t) kalloc(sizeof(struct affinity_set));
409	if (aset == NULL)
410		return NULL;
411
412	aset->aset_thread_count = 0;
413	queue_init(&aset->aset_affinities);
414	queue_init(&aset->aset_threads);
415	aset->aset_num = 0;
416	aset->aset_pset = PROCESSOR_SET_NULL;
417	aset->aset_space = NULL;
418
419	DBG("affinity_set_create() returns %p\n", aset);
420	return aset;
421}
422
423/*
424 * Destroy the given empty affinity set data structure
425 * after removing it from the parent task.
426 */
427static void
428affinity_set_free(affinity_set_t aset)
429{
430	assert(queue_empty(&aset->aset_threads));
431
432	DBG("affinity_set_free(%p)\n", aset);
433	kfree(aset, sizeof(struct affinity_set));
434}
435
436/*
437 * Add a thread to an affinity set.
438 * The caller must have the thread mutex and space locked.
439 */
440static void
441affinity_set_add(affinity_set_t aset, thread_t thread)
442{
443	spl_t	s;
444
445	DBG("affinity_set_add(%p,%p)\n", aset, thread);
446	queue_enter(&aset->aset_threads,
447		thread, thread_t, affinity_threads);
448	aset->aset_thread_count++;
449	s = splsched();
450	thread_lock(thread);
451	thread->affinity_set = affinity_sets_enabled ? aset : NULL;
452	thread_unlock(thread);
453	splx(s);
454}
455
456/*
457 * Remove a thread from an affinity set returning the set if now empty.
458 * The caller must have the thread mutex and space locked.
459 */
460static affinity_set_t
461affinity_set_remove(affinity_set_t aset, thread_t thread)
462{
463	spl_t	s;
464
465	s = splsched();
466	thread_lock(thread);
467	thread->affinity_set = NULL;
468	thread_unlock(thread);
469	splx(s);
470
471	aset->aset_thread_count--;
472	queue_remove(&aset->aset_threads,
473		thread, thread_t, affinity_threads);
474	if (queue_empty(&aset->aset_threads)) {
475		queue_remove(&aset->aset_space->aspc_affinities,
476				aset, affinity_set_t, aset_affinities);
477		assert(aset->aset_thread_count == 0);
478		aset->aset_tag = THREAD_AFFINITY_TAG_NULL;
479		aset->aset_num = 0;
480		aset->aset_pset = PROCESSOR_SET_NULL;
481		aset->aset_space = NULL;
482		DBG("affinity_set_remove(%p,%p) set now empty\n", aset, thread);
483		return aset;
484	} else {
485		DBG("affinity_set_remove(%p,%p)\n", aset, thread);
486		return NULL;
487	}
488}
489
490/*
491 * Find an affinity set in the parent task with the given affinity tag.
492 * The caller must have the space locked.
493 */
494static affinity_set_t
495affinity_set_find(affinity_space_t space, uint32_t tag)
496{
497	affinity_set_t	aset;
498
499	queue_iterate(&space->aspc_affinities,
500			 aset, affinity_set_t, aset_affinities) {
501		if (aset->aset_tag == tag) {
502			DBG("affinity_set_find(%p,%u) finds %p\n",
503		 	    space, tag, aset);
504			return aset;
505		}
506	}
507	DBG("affinity_set_find(%p,%u) not found\n", space, tag);
508	return NULL;
509}
510
511/*
512 * affinity_set_place() assigns an affinity set to a suitable processor_set.
513 * The selection criteria is:
514 *  - the set currently occupied by the least number of affinities
515 *    belonging to the owning the task.
516 * The caller must have the space locked.
517 */
518static void
519affinity_set_place(affinity_space_t aspc, affinity_set_t new_aset)
520{
521	unsigned int	num_cpu_asets = ml_get_max_affinity_sets();
522	unsigned int	set_occupancy[num_cpu_asets];
523	unsigned int	i;
524	unsigned int	i_least_occupied;
525	affinity_set_t	aset;
526
527	for (i = 0; i < num_cpu_asets; i++)
528		set_occupancy[i] = 0;
529
530	/*
531	 * Scan the affinity sets calculating the number of sets
532	 * occupy the available physical affinities.
533	 */
534	queue_iterate(&aspc->aspc_affinities,
535			 aset, affinity_set_t, aset_affinities) {
536		if(aset->aset_num < num_cpu_asets)
537			set_occupancy[aset->aset_num]++;
538		else
539			panic("aset_num = %d in %s\n", aset->aset_num, __FUNCTION__);
540	}
541
542	/*
543	 * Find the least occupied set (or the first empty set).
544	 * To distribute placements somewhat, start searching from
545	 * a cpu affinity chosen randomly per namespace:
546	 *   [(unsigned int)aspc % 127] % num_cpu_asets
547	 * unless this mapping policy is overridden.
548	 */
549	if (affinity_sets_mapping == 0)
550		i_least_occupied = 0;
551	else
552		i_least_occupied = (unsigned int)(((uintptr_t)aspc % 127) % num_cpu_asets);
553	for (i = 0; i < num_cpu_asets; i++) {
554		unsigned int	j = (i_least_occupied + i) % num_cpu_asets;
555		if (set_occupancy[j] == 0) {
556			i_least_occupied = j;
557			break;
558		}
559		if (set_occupancy[j] < set_occupancy[i_least_occupied])
560			i_least_occupied = j;
561	}
562	new_aset->aset_num = i_least_occupied;
563	new_aset->aset_pset = ml_affinity_to_pset(i_least_occupied);
564
565	/* Add the new affinity set to the group */
566	new_aset->aset_space = aspc;
567	queue_enter(&aspc->aspc_affinities,
568			new_aset, affinity_set_t, aset_affinities);
569
570	DBG("affinity_set_place(%p,%p) selected affinity %u pset %p\n",
571	    aspc, new_aset, new_aset->aset_num, new_aset->aset_pset);
572}
573