1/* 2 * Copyright (c) 2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29#include <kern/affinity.h> 30#include <kern/task.h> 31#include <kern/kalloc.h> 32#include <machine/cpu_affinity.h> 33 34/* 35 * Affinity involves 2 objects: 36 * - affinity namespace: 37 * shared by a task family, this controls affinity tag lookup and 38 * allocation; it anchors all affinity sets in one namespace 39 * - affinity set: 40 * anchors all threads with membership of this affinity set 41 * and which share an affinity tag in the owning namespace. 42 * 43 * Locking: 44 * - The task lock protects the creation of an affinity namespace. 45 * - The affinity namespace mutex protects the inheritance of a namespace 46 * and its thread membership. This includes its destruction when the task 47 * reference count goes to zero. 48 * - The thread mutex protects a thread's affinity set membership, but in 49 * addition, the thread_lock is taken to write thread->affinity_set since this 50 * field (representng the active affinity set) is read by the scheduler. 51 * 52 * The lock ordering is: task lock, thread mutex, namespace mutex, thread lock. 53 */ 54 55#if AFFINITY_DEBUG 56#define DBG(x...) kprintf("DBG: " x) 57#else 58#define DBG(x...) 59#endif 60 61struct affinity_space { 62 lck_mtx_t aspc_lock; 63 uint32_t aspc_task_count; 64 queue_head_t aspc_affinities; 65}; 66typedef struct affinity_space *affinity_space_t; 67 68static affinity_space_t affinity_space_alloc(void); 69static void affinity_space_free(affinity_space_t aspc); 70static affinity_set_t affinity_set_alloc(void); 71static void affinity_set_free(affinity_set_t aset); 72static affinity_set_t affinity_set_find(affinity_space_t aspc, uint32_t tag); 73static void affinity_set_place(affinity_space_t aspc, affinity_set_t aset); 74static void affinity_set_add(affinity_set_t aset, thread_t thread); 75static affinity_set_t affinity_set_remove(affinity_set_t aset, thread_t thread); 76 77/* 78 * The following globals may be modified by the sysctls 79 * kern.affinity_sets_enabled - disables hinting if cleared 80 * kern.affinity_sets_mapping - controls cache distribution policy 81 * See bsd/kern_sysctl.c 82 * 83 * Affinity sets are not used on embedded, which typically only 84 * has a single pset, and last-processor affinity is 85 * more important than pset affinity. 86 */ 87boolean_t affinity_sets_enabled = TRUE; 88int affinity_sets_mapping = 1; 89 90boolean_t 91thread_affinity_is_supported(void) 92{ 93 return (ml_get_max_affinity_sets() != 0); 94} 95 96 97/* 98 * thread_affinity_get() 99 * Return the affinity tag for a thread. 100 * Called with the thread mutex held. 101 */ 102uint32_t 103thread_affinity_get(thread_t thread) 104{ 105 uint32_t tag; 106 107 if (thread->affinity_set != NULL) 108 tag = thread->affinity_set->aset_tag; 109 else 110 tag = THREAD_AFFINITY_TAG_NULL; 111 112 return tag; 113} 114 115 116/* 117 * thread_affinity_set() 118 * Place a thread in an affinity set identified by a tag. 119 * Called with thread referenced but not locked. 120 */ 121kern_return_t 122thread_affinity_set(thread_t thread, uint32_t tag) 123{ 124 affinity_set_t aset; 125 affinity_set_t empty_aset = NULL; 126 affinity_space_t aspc; 127 affinity_space_t new_aspc = NULL; 128 129 DBG("thread_affinity_set(%p,%u)\n", thread, tag); 130 131 task_lock(thread->task); 132 aspc = thread->task->affinity_space; 133 if (aspc == NULL) { 134 task_unlock(thread->task); 135 new_aspc = affinity_space_alloc(); 136 if (new_aspc == NULL) 137 return KERN_RESOURCE_SHORTAGE; 138 task_lock(thread->task); 139 if (thread->task->affinity_space == NULL) { 140 thread->task->affinity_space = new_aspc; 141 new_aspc = NULL; 142 } 143 aspc = thread->task->affinity_space; 144 } 145 task_unlock(thread->task); 146 if (new_aspc) 147 affinity_space_free(new_aspc); 148 149 thread_mtx_lock(thread); 150 if (!thread->active) { 151 /* Beaten to lock and the thread is dead */ 152 thread_mtx_unlock(thread); 153 return KERN_TERMINATED; 154 } 155 156 lck_mtx_lock(&aspc->aspc_lock); 157 aset = thread->affinity_set; 158 if (aset != NULL) { 159 /* 160 * Remove thread from current affinity set 161 */ 162 DBG("thread_affinity_set(%p,%u) removing from aset %p\n", 163 thread, tag, aset); 164 empty_aset = affinity_set_remove(aset, thread); 165 } 166 167 if (tag != THREAD_AFFINITY_TAG_NULL) { 168 aset = affinity_set_find(aspc, tag); 169 if (aset != NULL) { 170 /* 171 * Add thread to existing affinity set 172 */ 173 DBG("thread_affinity_set(%p,%u) found aset %p\n", 174 thread, tag, aset); 175 } else { 176 /* 177 * Use the new affinity set, add this thread 178 * and place it in a suitable processor set. 179 */ 180 if (empty_aset != NULL) { 181 aset = empty_aset; 182 empty_aset = NULL; 183 } else { 184 aset = affinity_set_alloc(); 185 if (aset == NULL) { 186 lck_mtx_unlock(&aspc->aspc_lock); 187 thread_mtx_unlock(thread); 188 return KERN_RESOURCE_SHORTAGE; 189 } 190 } 191 DBG("thread_affinity_set(%p,%u) (re-)using aset %p\n", 192 thread, tag, aset); 193 aset->aset_tag = tag; 194 affinity_set_place(aspc, aset); 195 } 196 affinity_set_add(aset, thread); 197 } 198 199 lck_mtx_unlock(&aspc->aspc_lock); 200 thread_mtx_unlock(thread); 201 202 /* 203 * If we wound up not using an empty aset we created, 204 * free it here. 205 */ 206 if (empty_aset != NULL) 207 affinity_set_free(empty_aset); 208 209 if (thread == current_thread()) 210 thread_block(THREAD_CONTINUE_NULL); 211 212 return KERN_SUCCESS; 213} 214 215/* 216 * task_affinity_create() 217 * Called from task create. 218 */ 219void 220task_affinity_create(task_t parent_task, task_t child_task) 221{ 222 affinity_space_t aspc = parent_task->affinity_space; 223 224 DBG("task_affinity_create(%p,%p)\n", parent_task, child_task); 225 226 assert(aspc); 227 228 /* 229 * Bump the task reference count on the shared namespace and 230 * give it to the child. 231 */ 232 lck_mtx_lock(&aspc->aspc_lock); 233 aspc->aspc_task_count++; 234 child_task->affinity_space = aspc; 235 lck_mtx_unlock(&aspc->aspc_lock); 236} 237 238/* 239 * task_affinity_deallocate() 240 * Called from task_deallocate() when there's a namespace to dereference. 241 */ 242void 243task_affinity_deallocate(task_t task) 244{ 245 affinity_space_t aspc = task->affinity_space; 246 247 DBG("task_affinity_deallocate(%p) aspc %p task_count %d\n", 248 task, aspc, aspc->aspc_task_count); 249 250 lck_mtx_lock(&aspc->aspc_lock); 251 if (--(aspc->aspc_task_count) == 0) { 252 assert(queue_empty(&aspc->aspc_affinities)); 253 lck_mtx_unlock(&aspc->aspc_lock); 254 affinity_space_free(aspc); 255 } else { 256 lck_mtx_unlock(&aspc->aspc_lock); 257 } 258} 259 260/* 261 * task_affinity_info() 262 * Return affinity tag info (number, min, max) for the task. 263 * 264 * Conditions: task is locked. 265 */ 266kern_return_t 267task_affinity_info( 268 task_t task, 269 task_info_t task_info_out, 270 mach_msg_type_number_t *task_info_count) 271{ 272 affinity_set_t aset; 273 affinity_space_t aspc; 274 task_affinity_tag_info_t info; 275 276 *task_info_count = TASK_AFFINITY_TAG_INFO_COUNT; 277 info = (task_affinity_tag_info_t) task_info_out; 278 info->set_count = 0; 279 info->task_count = 0; 280 info->min = THREAD_AFFINITY_TAG_NULL; 281 info->max = THREAD_AFFINITY_TAG_NULL; 282 283 aspc = task->affinity_space; 284 if (aspc) { 285 lck_mtx_lock(&aspc->aspc_lock); 286 queue_iterate(&aspc->aspc_affinities, 287 aset, affinity_set_t, aset_affinities) { 288 info->set_count++; 289 if (info->min == THREAD_AFFINITY_TAG_NULL || 290 aset->aset_tag < (uint32_t) info->min) 291 info->min = aset->aset_tag; 292 if (info->max == THREAD_AFFINITY_TAG_NULL || 293 aset->aset_tag > (uint32_t) info->max) 294 info->max = aset->aset_tag; 295 } 296 info->task_count = aspc->aspc_task_count; 297 lck_mtx_unlock(&aspc->aspc_lock); 298 } 299 return KERN_SUCCESS; 300} 301 302/* 303 * Called from thread_dup() during fork() with child's mutex held. 304 * Set the child into the parent's affinity set. 305 * Note the affinity space is shared. 306 */ 307void 308thread_affinity_dup(thread_t parent, thread_t child) 309{ 310 affinity_set_t aset; 311 affinity_space_t aspc; 312 313 thread_mtx_lock(parent); 314 aset = parent->affinity_set; 315 DBG("thread_affinity_dup(%p,%p) aset %p\n", parent, child, aset); 316 if (aset == NULL) { 317 thread_mtx_unlock(parent); 318 return; 319 } 320 321 aspc = aset->aset_space; 322 assert(aspc == parent->task->affinity_space); 323 assert(aspc == child->task->affinity_space); 324 325 lck_mtx_lock(&aspc->aspc_lock); 326 affinity_set_add(aset, child); 327 lck_mtx_unlock(&aspc->aspc_lock); 328 329 thread_mtx_unlock(parent); 330} 331 332/* 333 * thread_affinity_terminate() 334 * Remove thread from any affinity set. 335 * Called with the thread mutex locked. 336 */ 337void 338thread_affinity_terminate(thread_t thread) 339{ 340 affinity_set_t aset = thread->affinity_set; 341 affinity_space_t aspc; 342 343 DBG("thread_affinity_terminate(%p)\n", thread); 344 345 aspc = aset->aset_space; 346 lck_mtx_lock(&aspc->aspc_lock); 347 if (affinity_set_remove(aset, thread)) { 348 affinity_set_free(aset); 349 } 350 lck_mtx_unlock(&aspc->aspc_lock); 351} 352 353/* 354 * thread_affinity_exec() 355 * Called from execve() to cancel any current affinity - a new image implies 356 * the calling thread terminates any expressed or inherited affinity. 357 */ 358void 359thread_affinity_exec(thread_t thread) 360{ 361 if (thread->affinity_set != AFFINITY_SET_NULL) 362 thread_affinity_terminate(thread); 363} 364 365/* 366 * Create an empty affinity namespace data structure. 367 */ 368static affinity_space_t 369affinity_space_alloc(void) 370{ 371 affinity_space_t aspc; 372 373 aspc = (affinity_space_t) kalloc(sizeof(struct affinity_space)); 374 if (aspc == NULL) 375 return NULL; 376 377 lck_mtx_init(&aspc->aspc_lock, &task_lck_grp, &task_lck_attr); 378 queue_init(&aspc->aspc_affinities); 379 aspc->aspc_task_count = 1; 380 381 DBG("affinity_space_create() returns %p\n", aspc); 382 return aspc; 383} 384 385/* 386 * Destroy the given empty affinity namespace data structure. 387 */ 388static void 389affinity_space_free(affinity_space_t aspc) 390{ 391 assert(queue_empty(&aspc->aspc_affinities)); 392 393 lck_mtx_destroy(&aspc->aspc_lock, &task_lck_grp); 394 DBG("affinity_space_free(%p)\n", aspc); 395 kfree(aspc, sizeof(struct affinity_space)); 396} 397 398 399/* 400 * Create an empty affinity set data structure 401 * entering it into a list anchored by the owning task. 402 */ 403static affinity_set_t 404affinity_set_alloc(void) 405{ 406 affinity_set_t aset; 407 408 aset = (affinity_set_t) kalloc(sizeof(struct affinity_set)); 409 if (aset == NULL) 410 return NULL; 411 412 aset->aset_thread_count = 0; 413 queue_init(&aset->aset_affinities); 414 queue_init(&aset->aset_threads); 415 aset->aset_num = 0; 416 aset->aset_pset = PROCESSOR_SET_NULL; 417 aset->aset_space = NULL; 418 419 DBG("affinity_set_create() returns %p\n", aset); 420 return aset; 421} 422 423/* 424 * Destroy the given empty affinity set data structure 425 * after removing it from the parent task. 426 */ 427static void 428affinity_set_free(affinity_set_t aset) 429{ 430 assert(queue_empty(&aset->aset_threads)); 431 432 DBG("affinity_set_free(%p)\n", aset); 433 kfree(aset, sizeof(struct affinity_set)); 434} 435 436/* 437 * Add a thread to an affinity set. 438 * The caller must have the thread mutex and space locked. 439 */ 440static void 441affinity_set_add(affinity_set_t aset, thread_t thread) 442{ 443 spl_t s; 444 445 DBG("affinity_set_add(%p,%p)\n", aset, thread); 446 queue_enter(&aset->aset_threads, 447 thread, thread_t, affinity_threads); 448 aset->aset_thread_count++; 449 s = splsched(); 450 thread_lock(thread); 451 thread->affinity_set = affinity_sets_enabled ? aset : NULL; 452 thread_unlock(thread); 453 splx(s); 454} 455 456/* 457 * Remove a thread from an affinity set returning the set if now empty. 458 * The caller must have the thread mutex and space locked. 459 */ 460static affinity_set_t 461affinity_set_remove(affinity_set_t aset, thread_t thread) 462{ 463 spl_t s; 464 465 s = splsched(); 466 thread_lock(thread); 467 thread->affinity_set = NULL; 468 thread_unlock(thread); 469 splx(s); 470 471 aset->aset_thread_count--; 472 queue_remove(&aset->aset_threads, 473 thread, thread_t, affinity_threads); 474 if (queue_empty(&aset->aset_threads)) { 475 queue_remove(&aset->aset_space->aspc_affinities, 476 aset, affinity_set_t, aset_affinities); 477 assert(aset->aset_thread_count == 0); 478 aset->aset_tag = THREAD_AFFINITY_TAG_NULL; 479 aset->aset_num = 0; 480 aset->aset_pset = PROCESSOR_SET_NULL; 481 aset->aset_space = NULL; 482 DBG("affinity_set_remove(%p,%p) set now empty\n", aset, thread); 483 return aset; 484 } else { 485 DBG("affinity_set_remove(%p,%p)\n", aset, thread); 486 return NULL; 487 } 488} 489 490/* 491 * Find an affinity set in the parent task with the given affinity tag. 492 * The caller must have the space locked. 493 */ 494static affinity_set_t 495affinity_set_find(affinity_space_t space, uint32_t tag) 496{ 497 affinity_set_t aset; 498 499 queue_iterate(&space->aspc_affinities, 500 aset, affinity_set_t, aset_affinities) { 501 if (aset->aset_tag == tag) { 502 DBG("affinity_set_find(%p,%u) finds %p\n", 503 space, tag, aset); 504 return aset; 505 } 506 } 507 DBG("affinity_set_find(%p,%u) not found\n", space, tag); 508 return NULL; 509} 510 511/* 512 * affinity_set_place() assigns an affinity set to a suitable processor_set. 513 * The selection criteria is: 514 * - the set currently occupied by the least number of affinities 515 * belonging to the owning the task. 516 * The caller must have the space locked. 517 */ 518static void 519affinity_set_place(affinity_space_t aspc, affinity_set_t new_aset) 520{ 521 unsigned int num_cpu_asets = ml_get_max_affinity_sets(); 522 unsigned int set_occupancy[num_cpu_asets]; 523 unsigned int i; 524 unsigned int i_least_occupied; 525 affinity_set_t aset; 526 527 for (i = 0; i < num_cpu_asets; i++) 528 set_occupancy[i] = 0; 529 530 /* 531 * Scan the affinity sets calculating the number of sets 532 * occupy the available physical affinities. 533 */ 534 queue_iterate(&aspc->aspc_affinities, 535 aset, affinity_set_t, aset_affinities) { 536 if(aset->aset_num < num_cpu_asets) 537 set_occupancy[aset->aset_num]++; 538 else 539 panic("aset_num = %d in %s\n", aset->aset_num, __FUNCTION__); 540 } 541 542 /* 543 * Find the least occupied set (or the first empty set). 544 * To distribute placements somewhat, start searching from 545 * a cpu affinity chosen randomly per namespace: 546 * [(unsigned int)aspc % 127] % num_cpu_asets 547 * unless this mapping policy is overridden. 548 */ 549 if (affinity_sets_mapping == 0) 550 i_least_occupied = 0; 551 else 552 i_least_occupied = (unsigned int)(((uintptr_t)aspc % 127) % num_cpu_asets); 553 for (i = 0; i < num_cpu_asets; i++) { 554 unsigned int j = (i_least_occupied + i) % num_cpu_asets; 555 if (set_occupancy[j] == 0) { 556 i_least_occupied = j; 557 break; 558 } 559 if (set_occupancy[j] < set_occupancy[i_least_occupied]) 560 i_least_occupied = j; 561 } 562 new_aset->aset_num = i_least_occupied; 563 new_aset->aset_pset = ml_affinity_to_pset(i_least_occupied); 564 565 /* Add the new affinity set to the group */ 566 new_aset->aset_space = aspc; 567 queue_enter(&aspc->aspc_affinities, 568 new_aset, affinity_set_t, aset_affinities); 569 570 DBG("affinity_set_place(%p,%p) selected affinity %u pset %p\n", 571 aspc, new_aset, new_aset->aset_num, new_aset->aset_pset); 572} 573