1/* 2 * Copyright (c) 2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29#include <kern/affinity.h> 30#include <kern/task.h> 31#include <kern/kalloc.h> 32#include <machine/cpu_affinity.h> 33 34/* 35 * Affinity involves 2 objects: 36 * - affinity namespace: 37 * shared by a task family, this controls affinity tag lookup and 38 * allocation; it anchors all affinity sets in one namespace 39 * - affinity set: 40 * anchors all threads with membership of this affinity set 41 * and which share an affinity tag in the owning namespace. 42 * 43 * Locking: 44 * - The task lock protects the creation of an affinity namespace. 45 * - The affinity namespace mutex protects the inheritance of a namespace 46 * and its thread membership. This includes its destruction when the task 47 * reference count goes to zero. 48 * - The thread mutex protects a thread's affinity set membership, but in 49 * addition, the thread_lock is taken to write thread->affinity_set since this 50 * field (representng the active affinity set) is read by the scheduler. 51 * 52 * The lock ordering is: task lock, thread mutex, namespace mutex, thread lock. 53 */ 54 55#if AFFINITY_DEBUG 56#define DBG(x...) kprintf("DBG: " x) 57#else 58#define DBG(x...) 59#endif 60 61struct affinity_space { 62 lck_mtx_t aspc_lock; 63 uint32_t aspc_task_count; 64 queue_head_t aspc_affinities; 65}; 66typedef struct affinity_space *affinity_space_t; 67 68static affinity_space_t affinity_space_alloc(void); 69static void affinity_space_free(affinity_space_t aspc); 70static affinity_set_t affinity_set_alloc(void); 71static void affinity_set_free(affinity_set_t aset); 72static affinity_set_t affinity_set_find(affinity_space_t aspc, uint32_t tag); 73static void affinity_set_place(affinity_space_t aspc, affinity_set_t aset); 74static void affinity_set_add(affinity_set_t aset, thread_t thread); 75static affinity_set_t affinity_set_remove(affinity_set_t aset, thread_t thread); 76 77/* 78 * The following globals may be modified by the sysctls 79 * kern.affinity_sets_enabled - disables hinting if cleared 80 * kern.affinity_sets_mapping - controls cache distribution policy 81 * See bsd/kern_sysctl.c 82 */ 83boolean_t affinity_sets_enabled = TRUE; 84int affinity_sets_mapping = 1; 85 86boolean_t 87thread_affinity_is_supported(void) 88{ 89 return (ml_get_max_affinity_sets() != 0); 90} 91 92 93/* 94 * thread_affinity_get() 95 * Return the affinity tag for a thread. 96 * Called with the thread mutex held. 97 */ 98uint32_t 99thread_affinity_get(thread_t thread) 100{ 101 uint32_t tag; 102 103 if (thread->affinity_set != NULL) 104 tag = thread->affinity_set->aset_tag; 105 else 106 tag = THREAD_AFFINITY_TAG_NULL; 107 108 return tag; 109} 110 111 112/* 113 * thread_affinity_set() 114 * Place a thread in an affinity set identified by a tag. 115 * Called with thread referenced but not locked. 116 */ 117kern_return_t 118thread_affinity_set(thread_t thread, uint32_t tag) 119{ 120 affinity_set_t aset; 121 affinity_set_t empty_aset = NULL; 122 affinity_space_t aspc; 123 affinity_space_t new_aspc = NULL; 124 125 DBG("thread_affinity_set(%p,%u)\n", thread, tag); 126 127 task_lock(thread->task); 128 aspc = thread->task->affinity_space; 129 if (aspc == NULL) { 130 task_unlock(thread->task); 131 new_aspc = affinity_space_alloc(); 132 if (new_aspc == NULL) 133 return KERN_RESOURCE_SHORTAGE; 134 task_lock(thread->task); 135 if (thread->task->affinity_space == NULL) { 136 thread->task->affinity_space = new_aspc; 137 new_aspc = NULL; 138 } 139 aspc = thread->task->affinity_space; 140 } 141 task_unlock(thread->task); 142 if (new_aspc) 143 affinity_space_free(new_aspc); 144 145 thread_mtx_lock(thread); 146 if (!thread->active) { 147 /* Beaten to lock and the thread is dead */ 148 thread_mtx_unlock(thread); 149 return KERN_TERMINATED; 150 } 151 152 lck_mtx_lock(&aspc->aspc_lock); 153 aset = thread->affinity_set; 154 if (aset != NULL) { 155 /* 156 * Remove thread from current affinity set 157 */ 158 DBG("thread_affinity_set(%p,%u) removing from aset %p\n", 159 thread, tag, aset); 160 empty_aset = affinity_set_remove(aset, thread); 161 } 162 163 if (tag != THREAD_AFFINITY_TAG_NULL) { 164 aset = affinity_set_find(aspc, tag); 165 if (aset != NULL) { 166 /* 167 * Add thread to existing affinity set 168 */ 169 DBG("thread_affinity_set(%p,%u) found aset %p\n", 170 thread, tag, aset); 171 } else { 172 /* 173 * Use the new affinity set, add this thread 174 * and place it in a suitable processor set. 175 */ 176 if (empty_aset != NULL) { 177 aset = empty_aset; 178 empty_aset = NULL; 179 } else { 180 aset = affinity_set_alloc(); 181 if (aset == NULL) { 182 lck_mtx_unlock(&aspc->aspc_lock); 183 thread_mtx_unlock(thread); 184 return KERN_RESOURCE_SHORTAGE; 185 } 186 } 187 DBG("thread_affinity_set(%p,%u) (re-)using aset %p\n", 188 thread, tag, aset); 189 aset->aset_tag = tag; 190 affinity_set_place(aspc, aset); 191 } 192 affinity_set_add(aset, thread); 193 } 194 195 lck_mtx_unlock(&aspc->aspc_lock); 196 thread_mtx_unlock(thread); 197 198 /* 199 * If we wound up not using an empty aset we created, 200 * free it here. 201 */ 202 if (empty_aset != NULL) 203 affinity_set_free(empty_aset); 204 205 if (thread == current_thread()) 206 thread_block(THREAD_CONTINUE_NULL); 207 208 return KERN_SUCCESS; 209} 210 211/* 212 * task_affinity_create() 213 * Called from task create. 214 */ 215void 216task_affinity_create(task_t parent_task, task_t child_task) 217{ 218 affinity_space_t aspc = parent_task->affinity_space; 219 220 DBG("task_affinity_create(%p,%p)\n", parent_task, child_task); 221 222 assert(aspc); 223 224 /* 225 * Bump the task reference count on the shared namespace and 226 * give it to the child. 227 */ 228 lck_mtx_lock(&aspc->aspc_lock); 229 aspc->aspc_task_count++; 230 child_task->affinity_space = aspc; 231 lck_mtx_unlock(&aspc->aspc_lock); 232} 233 234/* 235 * task_affinity_deallocate() 236 * Called from task_deallocate() when there's a namespace to dereference. 237 */ 238void 239task_affinity_deallocate(task_t task) 240{ 241 affinity_space_t aspc = task->affinity_space; 242 243 DBG("task_affinity_deallocate(%p) aspc %p task_count %d\n", 244 task, aspc, aspc->aspc_task_count); 245 246 lck_mtx_lock(&aspc->aspc_lock); 247 if (--(aspc->aspc_task_count) == 0) { 248 assert(queue_empty(&aspc->aspc_affinities)); 249 lck_mtx_unlock(&aspc->aspc_lock); 250 affinity_space_free(aspc); 251 } else { 252 lck_mtx_unlock(&aspc->aspc_lock); 253 } 254} 255 256/* 257 * task_affinity_info() 258 * Return affinity tag info (number, min, max) for the task. 259 * 260 * Conditions: task is locked. 261 */ 262kern_return_t 263task_affinity_info( 264 task_t task, 265 task_info_t task_info_out, 266 mach_msg_type_number_t *task_info_count) 267{ 268 affinity_set_t aset; 269 affinity_space_t aspc; 270 task_affinity_tag_info_t info; 271 272 *task_info_count = TASK_AFFINITY_TAG_INFO_COUNT; 273 info = (task_affinity_tag_info_t) task_info_out; 274 info->set_count = 0; 275 info->task_count = 0; 276 info->min = THREAD_AFFINITY_TAG_NULL; 277 info->max = THREAD_AFFINITY_TAG_NULL; 278 279 aspc = task->affinity_space; 280 if (aspc) { 281 lck_mtx_lock(&aspc->aspc_lock); 282 queue_iterate(&aspc->aspc_affinities, 283 aset, affinity_set_t, aset_affinities) { 284 info->set_count++; 285 if (info->min == THREAD_AFFINITY_TAG_NULL || 286 aset->aset_tag < (uint32_t) info->min) 287 info->min = aset->aset_tag; 288 if (info->max == THREAD_AFFINITY_TAG_NULL || 289 aset->aset_tag > (uint32_t) info->max) 290 info->max = aset->aset_tag; 291 } 292 info->task_count = aspc->aspc_task_count; 293 lck_mtx_unlock(&aspc->aspc_lock); 294 } 295 return KERN_SUCCESS; 296} 297 298/* 299 * Called from thread_dup() during fork() with child's mutex held. 300 * Set the child into the parent's affinity set. 301 * Note the affinity space is shared. 302 */ 303void 304thread_affinity_dup(thread_t parent, thread_t child) 305{ 306 affinity_set_t aset; 307 affinity_space_t aspc; 308 309 thread_mtx_lock(parent); 310 aset = parent->affinity_set; 311 DBG("thread_affinity_dup(%p,%p) aset %p\n", parent, child, aset); 312 if (aset == NULL) { 313 thread_mtx_unlock(parent); 314 return; 315 } 316 317 aspc = aset->aset_space; 318 assert(aspc == parent->task->affinity_space); 319 assert(aspc == child->task->affinity_space); 320 321 lck_mtx_lock(&aspc->aspc_lock); 322 affinity_set_add(aset, child); 323 lck_mtx_unlock(&aspc->aspc_lock); 324 325 thread_mtx_unlock(parent); 326} 327 328/* 329 * thread_affinity_terminate() 330 * Remove thread from any affinity set. 331 * Called with the thread mutex locked. 332 */ 333void 334thread_affinity_terminate(thread_t thread) 335{ 336 affinity_set_t aset = thread->affinity_set; 337 affinity_space_t aspc; 338 339 DBG("thread_affinity_terminate(%p)\n", thread); 340 341 aspc = aset->aset_space; 342 lck_mtx_lock(&aspc->aspc_lock); 343 if (affinity_set_remove(aset, thread)) { 344 affinity_set_free(aset); 345 } 346 lck_mtx_unlock(&aspc->aspc_lock); 347} 348 349/* 350 * thread_affinity_exec() 351 * Called from execve() to cancel any current affinity - a new image implies 352 * the calling thread terminates any expressed or inherited affinity. 353 */ 354void 355thread_affinity_exec(thread_t thread) 356{ 357 if (thread->affinity_set != AFFINITY_SET_NULL) 358 thread_affinity_terminate(thread); 359} 360 361/* 362 * Create an empty affinity namespace data structure. 363 */ 364static affinity_space_t 365affinity_space_alloc(void) 366{ 367 affinity_space_t aspc; 368 369 aspc = (affinity_space_t) kalloc(sizeof(struct affinity_space)); 370 if (aspc == NULL) 371 return NULL; 372 373 lck_mtx_init(&aspc->aspc_lock, &task_lck_grp, &task_lck_attr); 374 queue_init(&aspc->aspc_affinities); 375 aspc->aspc_task_count = 1; 376 377 DBG("affinity_space_create() returns %p\n", aspc); 378 return aspc; 379} 380 381/* 382 * Destroy the given empty affinity namespace data structure. 383 */ 384static void 385affinity_space_free(affinity_space_t aspc) 386{ 387 assert(queue_empty(&aspc->aspc_affinities)); 388 389 DBG("affinity_space_free(%p)\n", aspc); 390 kfree(aspc, sizeof(struct affinity_space)); 391} 392 393 394/* 395 * Create an empty affinity set data structure 396 * entering it into a list anchored by the owning task. 397 */ 398static affinity_set_t 399affinity_set_alloc(void) 400{ 401 affinity_set_t aset; 402 403 aset = (affinity_set_t) kalloc(sizeof(struct affinity_set)); 404 if (aset == NULL) 405 return NULL; 406 407 aset->aset_thread_count = 0; 408 queue_init(&aset->aset_affinities); 409 queue_init(&aset->aset_threads); 410 aset->aset_num = 0; 411 aset->aset_pset = PROCESSOR_SET_NULL; 412 aset->aset_space = NULL; 413 414 DBG("affinity_set_create() returns %p\n", aset); 415 return aset; 416} 417 418/* 419 * Destroy the given empty affinity set data structure 420 * after removing it from the parent task. 421 */ 422static void 423affinity_set_free(affinity_set_t aset) 424{ 425 assert(queue_empty(&aset->aset_threads)); 426 427 DBG("affinity_set_free(%p)\n", aset); 428 kfree(aset, sizeof(struct affinity_set)); 429} 430 431/* 432 * Add a thread to an affinity set. 433 * The caller must have the thread mutex and space locked. 434 */ 435static void 436affinity_set_add(affinity_set_t aset, thread_t thread) 437{ 438 spl_t s; 439 440 DBG("affinity_set_add(%p,%p)\n", aset, thread); 441 queue_enter(&aset->aset_threads, 442 thread, thread_t, affinity_threads); 443 aset->aset_thread_count++; 444 s = splsched(); 445 thread_lock(thread); 446 thread->affinity_set = affinity_sets_enabled ? aset : NULL; 447 thread_unlock(thread); 448 splx(s); 449} 450 451/* 452 * Remove a thread from an affinity set returning the set if now empty. 453 * The caller must have the thread mutex and space locked. 454 */ 455static affinity_set_t 456affinity_set_remove(affinity_set_t aset, thread_t thread) 457{ 458 spl_t s; 459 460 s = splsched(); 461 thread_lock(thread); 462 thread->affinity_set = NULL; 463 thread_unlock(thread); 464 splx(s); 465 466 aset->aset_thread_count--; 467 queue_remove(&aset->aset_threads, 468 thread, thread_t, affinity_threads); 469 if (queue_empty(&aset->aset_threads)) { 470 queue_remove(&aset->aset_space->aspc_affinities, 471 aset, affinity_set_t, aset_affinities); 472 assert(aset->aset_thread_count == 0); 473 aset->aset_tag = THREAD_AFFINITY_TAG_NULL; 474 aset->aset_num = 0; 475 aset->aset_pset = PROCESSOR_SET_NULL; 476 aset->aset_space = NULL; 477 DBG("affinity_set_remove(%p,%p) set now empty\n", aset, thread); 478 return aset; 479 } else { 480 DBG("affinity_set_remove(%p,%p)\n", aset, thread); 481 return NULL; 482 } 483} 484 485/* 486 * Find an affinity set in the parent task with the given affinity tag. 487 * The caller must have the space locked. 488 */ 489static affinity_set_t 490affinity_set_find(affinity_space_t space, uint32_t tag) 491{ 492 affinity_set_t aset; 493 494 queue_iterate(&space->aspc_affinities, 495 aset, affinity_set_t, aset_affinities) { 496 if (aset->aset_tag == tag) { 497 DBG("affinity_set_find(%p,%u) finds %p\n", 498 space, tag, aset); 499 return aset; 500 } 501 } 502 DBG("affinity_set_find(%p,%u) not found\n", space, tag); 503 return NULL; 504} 505 506/* 507 * affinity_set_place() assigns an affinity set to a suitable processor_set. 508 * The selection criteria is: 509 * - the set currently occupied by the least number of affinities 510 * belonging to the owning the task. 511 * The caller must have the space locked. 512 */ 513static void 514affinity_set_place(affinity_space_t aspc, affinity_set_t new_aset) 515{ 516 unsigned int num_cpu_asets = ml_get_max_affinity_sets(); 517 unsigned int set_occupancy[num_cpu_asets]; 518 unsigned int i; 519 unsigned int i_least_occupied; 520 affinity_set_t aset; 521 522 for (i = 0; i < num_cpu_asets; i++) 523 set_occupancy[i] = 0; 524 525 /* 526 * Scan the affinity sets calculating the number of sets 527 * occupy the available physical affinities. 528 */ 529 queue_iterate(&aspc->aspc_affinities, 530 aset, affinity_set_t, aset_affinities) { 531 if(aset->aset_num < num_cpu_asets) 532 set_occupancy[aset->aset_num]++; 533 else 534 panic("aset_num = %d in %s\n", aset->aset_num, __FUNCTION__); 535 } 536 537 /* 538 * Find the least occupied set (or the first empty set). 539 * To distribute placements somewhat, start searching from 540 * a cpu affinity chosen randomly per namespace: 541 * [(unsigned int)aspc % 127] % num_cpu_asets 542 * unless this mapping policy is overridden. 543 */ 544 if (affinity_sets_mapping == 0) 545 i_least_occupied = 0; 546 else 547 i_least_occupied = (unsigned int)(((uintptr_t)aspc % 127) % num_cpu_asets); 548 for (i = 0; i < num_cpu_asets; i++) { 549 unsigned int j = (i_least_occupied + i) % num_cpu_asets; 550 if (set_occupancy[j] == 0) { 551 i_least_occupied = j; 552 break; 553 } 554 if (set_occupancy[j] < set_occupancy[i_least_occupied]) 555 i_least_occupied = j; 556 } 557 new_aset->aset_num = i_least_occupied; 558 new_aset->aset_pset = ml_affinity_to_pset(i_least_occupied); 559 560 /* Add the new affinity set to the group */ 561 new_aset->aset_space = aspc; 562 queue_enter(&aspc->aspc_affinities, 563 new_aset, affinity_set_t, aset_affinities); 564 565 DBG("affinity_set_place(%p,%p) selected affinity %u pset %p\n", 566 aspc, new_aset, new_aset->aset_num, new_aset->aset_pset); 567} 568