1/* 2 * Copyright (c) 2009 Apple Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. Please obtain a copy of the License at 10 * http://www.opensource.apple.com/apsl/ and read it before using this 11 * file. 12 * 13 * The Original Code and all software distributed under the License are 14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 18 * Please see the License for the specific language governing rights and 19 * limitations under the License. 20 * 21 * @APPLE_LICENSE_HEADER_END@ 22 */ 23 24#include <kern/kalloc.h> 25#include <kern/kern_types.h> 26#include <kern/locks.h> 27#include <kern/misc_protos.h> 28#include <kern/task.h> 29#include <kern/thread.h> 30#include <kern/zalloc.h> 31#include <machine/machine_cpu.h> 32 33#include <pmc/pmc.h> 34 35#include <libkern/OSAtomic.h> 36 37#if defined(__i386__) || defined(__x86_64__) 38#include <i386/mp.h> 39#endif 40 41#if CONFIG_COUNTERS 42 43/* various debug logging enable */ 44#undef DEBUG_COUNTERS 45 46typedef uint8_t pmc_state_event_t; 47 48#define PMC_STATE_EVENT_START 0 49#define PMC_STATE_EVENT_STOP 1 50#define PMC_STATE_EVENT_FREE 2 51#define PMC_STATE_EVENT_INTERRUPT 3 52#define PMC_STATE_EVENT_END_OF_INTERRUPT 4 53#define PMC_STATE_EVENT_CONTEXT_IN 5 54#define PMC_STATE_EVENT_CONTEXT_OUT 6 55#define PMC_STATE_EVENT_LOAD_FINISHED 7 56#define PMC_STATE_EVENT_STORE_FINISHED 8 57 58/* PMC spin timeouts */ 59#define PMC_SPIN_THRESHOLD 10 /* Number of spins to allow before checking mach_absolute_time() */ 60#define PMC_SPIN_TIMEOUT_US 10 /* Time in microseconds before the spin causes an assert */ 61 62uint64_t pmc_spin_timeout_count = 0; /* Number of times where a PMC spin loop causes a timeout */ 63 64#ifdef DEBUG_COUNTERS 65# include <pexpert/pexpert.h> 66# define COUNTER_DEBUG(...) \ 67 do { \ 68 kprintf("[%s:%s][%u] ", __FILE__, __PRETTY_FUNCTION__, cpu_number()); \ 69 kprintf(__VA_ARGS__); \ 70 } while(0) 71 72# define PRINT_PERF_MON(x) \ 73 do { \ 74 kprintf("perfmon: %p (obj: %p refCt: %u switchable: %u)\n", \ 75 x, x->object, x->useCount, \ 76 (x->methods.flags & PERFMON_FLAG_SUPPORTS_CONTEXT_SWITCHING) ? \ 77 1 : 0); \ 78 } while(0) 79 80static const char const * pmc_state_state_name(pmc_state_t state) { 81 switch (PMC_STATE_STATE(state)) { 82 case PMC_STATE_STATE_INVALID: 83 return "INVALID"; 84 case PMC_STATE_STATE_STOP: 85 return "STOP"; 86 case PMC_STATE_STATE_CAN_RUN: 87 return "CAN_RUN"; 88 case PMC_STATE_STATE_LOAD: 89 return "LOAD"; 90 case PMC_STATE_STATE_RUN: 91 return "RUN"; 92 case PMC_STATE_STATE_STORE: 93 return "STORE"; 94 case PMC_STATE_STATE_INTERRUPT: 95 return "INTERRUPT"; 96 case PMC_STATE_STATE_DEALLOC: 97 return "DEALLOC"; 98 default: 99 return "UNKNOWN"; 100 } 101} 102 103static const char const * pmc_state_event_name(pmc_state_event_t event) { 104 switch (event) { 105 case PMC_STATE_EVENT_START: 106 return "START"; 107 case PMC_STATE_EVENT_STOP: 108 return "STOP"; 109 case PMC_STATE_EVENT_FREE: 110 return "FREE"; 111 case PMC_STATE_EVENT_INTERRUPT: 112 return "INTERRUPT"; 113 case PMC_STATE_EVENT_END_OF_INTERRUPT: 114 return "END OF INTERRUPT"; 115 case PMC_STATE_EVENT_CONTEXT_IN: 116 return "CONTEXT IN"; 117 case PMC_STATE_EVENT_CONTEXT_OUT: 118 return "CONTEXT OUT"; 119 case PMC_STATE_EVENT_LOAD_FINISHED: 120 return "LOAD_FINISHED"; 121 case PMC_STATE_EVENT_STORE_FINISHED: 122 return "STORE_FINISHED"; 123 default: 124 return "UNKNOWN"; 125 } 126} 127 128# define PMC_STATE_FORMAT "<%s, %u, %s%s%s>" 129# define PMC_STATE_ARGS(x) pmc_state_state_name(x), PMC_STATE_CONTEXT_COUNT(x), ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_INTERRUPTING) ? "I" : ""), \ 130 ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_STOPPING) ? "S" : ""), ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_DEALLOCING) ? "D" : "") 131#else 132# define COUNTER_DEBUG(...) 133# define PRINT_PERF_MON(x) 134# define PMC_STATE_FORMAT 135# define PMC_STATE_ARGS(x) 136#endif 137 138/*!struct 139 * pmc_config is the data behind a pmc_config_t. 140 * @member object A pointer to an instance of IOPerformanceCounterConfiguration 141 * @member method A pointer to a method to call to handle PMI. 142 * @member interrupt_after_value Cause a PMI after the counter counts this many 143 * events. 144 * @member refCon Passed to the @method method as the refCon argument. 145 */ 146struct pmc_config { 147 pmc_config_object_t object; 148 volatile pmc_interrupt_method_t method; 149 uint64_t interrupt_after_value; 150 void *refCon; 151}; 152 153/* 154 * Allocation Zones 155 * 156 * Two allocation zones - Perf zone small and Perf zone big. 157 * Each zone has associated maximums, defined below. 158 * The small zone is the max of the smallest allocation objects (all sizes on 159 * K64): 160 * perf_monitor_t - 48 bytes 161 * perf_monitor_methods_t - 28 bytes 162 * pmc_reservation_t - 48 bytes 163 * pmc_config_t - 32 bytes 164 * perf_small_zone unit size is (on K64) 48 bytes 165 * perf_small_zone max count must be max number of perf monitors, plus (max 166 * number of reservations * 2). The "*2" is because each reservation has a 167 * pmc_config_t within. 168 * 169 * Big zone is max of the larger allocation units 170 * pmc_t - 144 bytes 171 * pmc_methods_t - 116 bytes 172 * perf_big_zone unit size is (on K64) 144 bytes 173 * perf_big_zone max count is the max number of PMCs we support. 174 */ 175 176static zone_t perf_small_zone = NULL; 177#define MAX_PERF_SMALLS (256 + 8196 + 8196) 178#define PERF_SMALL_UNIT_SZ (MAX(MAX(sizeof(struct perf_monitor), \ 179 sizeof(struct pmc_reservation)), sizeof(struct pmc_config))) 180 181static zone_t perf_big_zone = NULL; 182#define MAX_PERF_BIGS (1024) 183#define PERF_BIG_UNIT_SZ (sizeof(struct pmc)) 184 185/* 186 * Locks and Lock groups 187 */ 188static lck_grp_t *pmc_lock_grp = LCK_GRP_NULL; 189static lck_grp_attr_t *pmc_lock_grp_attr; 190static lck_attr_t *pmc_lock_attr; 191 192/* PMC tracking queue locks */ 193 194static lck_mtx_t cpu_monitor_queue_mutex; /* protects per-cpu queues at initialisation time */ 195static lck_spin_t perf_monitor_queue_spin; /* protects adding and removing from queue */ 196static lck_spin_t perf_counters_queue_spin; /* protects adding and removing from queue */ 197 198/* Reservation tracking queues lock */ 199static lck_spin_t reservations_spin; 200 201/* 202 * Tracking queues 203 * 204 * Keeps track of registered perf monitors and perf counters 205 */ 206 207static queue_head_t **cpu_monitor_queues = NULL; 208 209static queue_head_t *perf_monitors_queue = NULL; 210static volatile uint32_t perf_monitors_count = 0U; 211 212static queue_head_t *perf_counters_queue = NULL; 213static volatile uint32_t perf_counters_count = 0U; 214 215/* 216 * Reservation queues 217 * 218 * Keeps track of all system, task, and thread-level reservations (both active and 219 * inactive). 220 * 221 * We track them all here (rather than in their respective task or thread only) 222 * so that we can inspect our tracking data directly (rather than peeking at 223 * every task and thread) to determine if/when a new reservation would 224 * constitute a conflict. 225 */ 226 227static queue_head_t *system_reservations = NULL; 228static volatile uint32_t system_reservation_count = 0U; 229 230static queue_head_t *task_reservations = NULL; 231static volatile uint32_t task_reservation_count = 0U; 232 233static queue_head_t *thread_reservations = NULL; 234static volatile uint32_t thread_reservation_count = 0U; 235 236#if XNU_KERNEL_PRIVATE 237 238/* 239 * init_pmc_locks creates and initializes all the locks and lock groups and lock 240 * attributes required for the pmc sub-system. 241 */ 242static void init_pmc_locks(void) { 243 pmc_lock_attr = lck_attr_alloc_init(); 244 assert(pmc_lock_attr); 245 246 pmc_lock_grp_attr = lck_grp_attr_alloc_init(); 247 assert(pmc_lock_grp_attr); 248 249 pmc_lock_grp = lck_grp_alloc_init("pmc", pmc_lock_grp_attr); 250 assert(pmc_lock_grp); 251 252 lck_spin_init(&perf_monitor_queue_spin, pmc_lock_grp, pmc_lock_attr); 253 lck_spin_init(&perf_counters_queue_spin, pmc_lock_grp, pmc_lock_attr); 254 255 lck_spin_init(&reservations_spin, pmc_lock_grp, pmc_lock_attr); 256 257 lck_mtx_init(&cpu_monitor_queue_mutex, pmc_lock_grp, pmc_lock_attr); 258} 259 260/* 261 * init_pmc_zones initializes the allocation zones used by the pmc subsystem 262 */ 263static void init_pmc_zones(void) { 264 perf_small_zone = zinit(PERF_SMALL_UNIT_SZ, 265 MAX_PERF_SMALLS * PERF_SMALL_UNIT_SZ, MAX_PERF_SMALLS, 266 "pmc.small zone"); 267 268 assert(perf_small_zone); 269 270 perf_big_zone = zinit(PERF_BIG_UNIT_SZ, 271 MAX_PERF_BIGS * PERF_BIG_UNIT_SZ, MAX_PERF_BIGS, 272 "pmc.big zone"); 273 274 assert(perf_big_zone); 275} 276 277/* 278 * init_pmc_queues allocates and initializes the tracking queues for 279 * registering and reserving individual pmcs and perf monitors. 280 */ 281static void init_pmc_queues(void) { 282 283 perf_monitors_queue = (queue_head_t*)kalloc(sizeof(queue_head_t)); 284 assert(perf_monitors_queue); 285 286 queue_init(perf_monitors_queue); 287 288 perf_counters_queue = (queue_head_t*)kalloc(sizeof(queue_head_t)); 289 assert(perf_counters_queue); 290 291 queue_init(perf_counters_queue); 292 293 system_reservations = (queue_head_t*)kalloc(sizeof(queue_t)); 294 assert(system_reservations); 295 296 queue_init(system_reservations); 297 298 task_reservations = (queue_head_t*)kalloc(sizeof(queue_head_t)); 299 assert(task_reservations); 300 301 queue_init(task_reservations); 302 303 thread_reservations = (queue_head_t*)kalloc(sizeof(queue_head_t)); 304 assert(thread_reservations); 305 306 queue_init(thread_reservations); 307} 308 309/* 310 * pmc_bootstrap brings up all the necessary infrastructure required to use the 311 * pmc sub-system. 312 */ 313__private_extern__ 314void pmc_bootstrap(void) { 315 /* build our alloc zones */ 316 init_pmc_zones(); 317 318 /* build the locks */ 319 init_pmc_locks(); 320 321 /* build our tracking queues */ 322 init_pmc_queues(); 323} 324 325#endif /* XNU_KERNEL_PRIVATE */ 326 327/* 328 * Perf Monitor Internals 329 */ 330 331static perf_monitor_t perf_monitor_alloc(void) { 332 /* perf monitors come from the perf small zone */ 333 return (perf_monitor_t)zalloc(perf_small_zone); 334} 335 336static void perf_monitor_free(void *pm) { 337 zfree(perf_small_zone, pm); 338} 339 340static void perf_monitor_init(perf_monitor_t pm, int cpu) { 341 assert(pm); 342 343 pm->object = NULL; 344 345 bzero(&(pm->methods), sizeof(perf_monitor_methods_t)); 346 347 pm->useCount = 1; /* initial retain count of 1, for caller */ 348 349 pm->reservedCounters = 0; 350 351 pm->cpu = cpu; 352 353 pm->link.next = pm->link.prev = (queue_entry_t)NULL; 354 pm->cpu_link.next = pm->cpu_link.prev = (queue_entry_t)NULL; 355} 356 357/* 358 * perf_monitor_dequeue removes the given perf_monitor_t from the 359 * perf_monitor_queue, thereby unregistering it with the system. 360 */ 361static void perf_monitor_dequeue(perf_monitor_t pm) { 362 lck_spin_lock(&perf_monitor_queue_spin); 363 364 if (pm->methods.flags & PERFMON_FLAG_REQUIRES_IDLE_NOTIFICATIONS) { 365 /* If this flag is set, the monitor is already validated to be 366 * accessible from a single cpu only. 367 */ 368 queue_remove(cpu_monitor_queues[pm->cpu], pm, perf_monitor_t, cpu_link); 369 } 370 371 /* 372 * remove the @pm object from the @perf_monitor_queue queue (it is of type 373 * <perf_monitor_t> and has a field called @link that is the queue_link_t 374 */ 375 queue_remove(perf_monitors_queue, pm, perf_monitor_t, link); 376 377 perf_monitors_count--; 378 379 lck_spin_unlock(&perf_monitor_queue_spin); 380} 381 382/* 383 * perf_monitor_enqueue adds the given perf_monitor_t to the perf_monitor_queue, 384 * thereby registering it for use with the system. 385 */ 386static void perf_monitor_enqueue(perf_monitor_t pm) { 387 388 lck_mtx_lock(&cpu_monitor_queue_mutex); 389 lck_spin_lock(&perf_monitor_queue_spin); 390 391 if (pm->cpu >= 0) { 392 /* Deferred initialisation; saves memory and permits ml_get_max_cpus() 393 * to block until cpu initialisation is complete. 394 */ 395 if (!cpu_monitor_queues) { 396 uint32_t max_cpus; 397 queue_head_t **queues; 398 uint32_t i; 399 400 lck_spin_unlock(&perf_monitor_queue_spin); 401 402 max_cpus = ml_get_max_cpus(); 403 404 queues = (queue_head_t**)kalloc(sizeof(queue_head_t*) * max_cpus); 405 assert(queues); 406 for (i = 0; i < max_cpus; i++) { 407 queue_head_t *queue = (queue_head_t*)kalloc(sizeof(queue_head_t)); 408 assert(queue); 409 queue_init(queue); 410 queues[i] = queue; 411 } 412 413 lck_spin_lock(&perf_monitor_queue_spin); 414 415 cpu_monitor_queues = queues; 416 } 417 418 queue_enter(cpu_monitor_queues[pm->cpu], pm, perf_monitor_t, cpu_link); 419 } 420 421 queue_enter(perf_monitors_queue, pm, perf_monitor_t, link); 422 perf_monitors_count++; 423 424 lck_spin_unlock(&perf_monitor_queue_spin); 425 lck_mtx_unlock(&cpu_monitor_queue_mutex); 426} 427 428/* 429 * perf_monitor_reference increments the reference count for the given 430 * perf_monitor_t. 431 */ 432static void perf_monitor_reference(perf_monitor_t pm) { 433 assert(pm); 434 435 OSIncrementAtomic(&(pm->useCount)); 436} 437 438/* 439 * perf_monitor_deallocate decrements the reference count for the given 440 * perf_monitor_t. If the reference count hits 0, the object is released back 441 * to the perf_small_zone via a call to perf_monitor_free(). 442 */ 443static void perf_monitor_deallocate(perf_monitor_t pm) { 444 assert(pm); 445 446 /* If we just removed the last reference count */ 447 if(1 == OSDecrementAtomic(&(pm->useCount))) { 448 /* Free the object */ 449 perf_monitor_free(pm); 450 } 451} 452 453/* 454 * perf_monitor_find attempts to find a perf_monitor_t that corresponds to the 455 * given C++ object pointer that was used when registering with the subsystem. 456 * 457 * If found, the method returns the perf_monitor_t with an extra reference 458 * placed on the object (or NULL if not 459 * found). 460 * 461 * NOTE: Caller must use perf_monitor_deallocate to remove the extra reference after 462 * calling perf_monitor_find. 463 */ 464static perf_monitor_t perf_monitor_find(perf_monitor_object_t monitor) { 465 assert(monitor); 466 perf_monitor_t element = NULL; 467 perf_monitor_t found = NULL; 468 469 lck_spin_lock(&perf_monitor_queue_spin); 470 471 queue_iterate(perf_monitors_queue, element, perf_monitor_t, link) { 472 if(element->object == monitor) { 473 perf_monitor_reference(element); 474 found = element; 475 break; 476 } 477 } 478 479 lck_spin_unlock(&perf_monitor_queue_spin); 480 481 return found; 482} 483 484/* 485 * perf_monitor_add_pmc adds a newly registered PMC to the perf monitor it is 486 * associated with. 487 */ 488 489static void perf_monitor_add_pmc(perf_monitor_t pm, pmc_t pmc __unused) { 490 assert(pm); 491 assert(pmc); 492 493 /* Today, we merely add a reference count now that a new pmc is attached */ 494 perf_monitor_reference(pm); 495} 496 497/* 498 * perf_monitor_remove_pmc removes a newly *un*registered PMC from the perf 499 * monitor it is associated with. 500 */ 501static void perf_monitor_remove_pmc(perf_monitor_t pm, pmc_t pmc __unused) { 502 assert(pm); 503 assert(pmc); 504 505 /* Today, we merely remove a reference count now that the pmc is detached */ 506 perf_monitor_deallocate(pm); 507} 508 509/* 510 * Perf Counter internals 511 */ 512 513static pmc_t pmc_alloc(void) { 514 return (pmc_t)zalloc(perf_big_zone); 515} 516 517static void pmc_free(void *pmc) { 518 zfree(perf_big_zone, pmc); 519} 520 521/* 522 * pmc_init initializes a newly allocated pmc_t 523 */ 524static void pmc_init(pmc_t pmc) { 525 assert(pmc); 526 527 pmc->object = NULL; 528 pmc->monitor = NULL; 529 530 bzero(&pmc->methods, sizeof(pmc_methods_t)); 531 532 /* One reference for the caller */ 533 pmc->useCount = 1; 534} 535 536/* 537 * pmc_reference increments the reference count of the given pmc_t 538 */ 539static void pmc_reference(pmc_t pmc) { 540 assert(pmc); 541 542 OSIncrementAtomic(&(pmc->useCount)); 543} 544 545/* 546 * pmc_deallocate decrements the reference count of the given pmc_t. If the 547 * reference count hits zero, the given pmc_t is deallocated and released back 548 * to the allocation zone. 549 */ 550static void pmc_deallocate(pmc_t pmc) { 551 assert(pmc); 552 553 /* If we just removed the last reference count */ 554 if(1 == OSDecrementAtomic(&(pmc->useCount))) { 555 /* Free the pmc */ 556 pmc_free(pmc); 557 } 558} 559 560/* 561 * pmc_dequeue removes the given, newly *un*registered pmc from the 562 * perf_counters_queue. 563 */ 564static void pmc_dequeue(pmc_t pmc) { 565 lck_spin_lock(&perf_counters_queue_spin); 566 567 queue_remove(perf_counters_queue, pmc, pmc_t, link); 568 569 perf_counters_count--; 570 571 lck_spin_unlock(&perf_counters_queue_spin); 572} 573 574/* 575 * pmc_enqueue adds the given, newly registered pmc to the perf_counters_queue 576 */ 577static void pmc_enqueue(pmc_t pmc) { 578 lck_spin_lock(&perf_counters_queue_spin); 579 580 queue_enter(perf_counters_queue, pmc, pmc_t, link); 581 582 perf_counters_count++; 583 584 lck_spin_unlock(&perf_counters_queue_spin); 585} 586 587/* 588 * pmc_find attempts to locate a pmc_t that was registered with the given 589 * pmc_object_t pointer. If found, it returns the pmc_t with an extra reference 590 * which must be dropped by the caller by calling pmc_deallocate(). 591 */ 592static pmc_t pmc_find(pmc_object_t object) { 593 assert(object); 594 595 lck_spin_lock(&perf_counters_queue_spin); 596 597 pmc_t element = NULL; 598 pmc_t found = NULL; 599 600 queue_iterate(perf_counters_queue, element, pmc_t, link) { 601 if(element->object == object) { 602 pmc_reference(element); 603 found = element; 604 break; 605 } 606 } 607 608 lck_spin_unlock(&perf_counters_queue_spin); 609 610 return found; 611} 612 613/* 614 * Config internals 615 */ 616 617/* Allocate a pmc_config_t */ 618static pmc_config_t pmc_config_alloc(pmc_t pmc __unused) { 619 return (pmc_config_t)zalloc(perf_small_zone); 620} 621 622/* Free a pmc_config_t, and underlying pmc_config_object_t (if needed) */ 623static void pmc_config_free(pmc_t pmc, pmc_config_t config) { 624 assert(pmc); 625 assert(config); 626 627 if(config->object) { 628 pmc->methods.free_config(pmc->object, config->object); 629 config->object = NULL; 630 } 631 632 zfree(perf_small_zone, config); 633} 634 635static kern_return_t pmc_open(pmc_t pmc) { 636 assert(pmc); 637 assert(pmc->object); 638 assert(pmc->open_object); 639 640 return pmc->methods.open(pmc->object, pmc->open_object); 641} 642 643static kern_return_t pmc_close(pmc_t pmc) { 644 assert(pmc); 645 assert(pmc->object); 646 assert(pmc->open_object); 647 648 return pmc->methods.close(pmc->object, pmc->open_object); 649} 650 651/* 652 * Reservation Internals 653 */ 654 655static kern_return_t pmc_internal_reservation_set_pmc(pmc_reservation_t resv, pmc_t pmc); 656static void pmc_internal_reservation_store(pmc_reservation_t reservation); 657static void pmc_internal_reservation_load(pmc_reservation_t reservation); 658 659static pmc_reservation_t reservation_alloc(void) { 660 /* pmc reservations come from the perf small zone */ 661 return (pmc_reservation_t)zalloc(perf_small_zone); 662} 663 664/* 665 * reservation_free deallocates and releases all resources associated with the 666 * given pmc_reservation_t. This includes freeing the config used to create the 667 * reservation, decrementing the reference count for the pmc used to create the 668 * reservation, and deallocating the reservation's memory. 669 */ 670static void reservation_free(pmc_reservation_t resv) { 671 /* Free config */ 672 if(resv->config) { 673 assert(resv->pmc); 674 675 pmc_free_config(resv->pmc, resv->config); 676 677 resv->config = NULL; 678 } 679 680 /* release PMC */ 681 (void)pmc_internal_reservation_set_pmc(resv, NULL); 682 683 /* Free reservation */ 684 zfree(perf_small_zone, resv); 685} 686 687/* 688 * reservation_init initializes a newly created reservation. 689 */ 690static void reservation_init(pmc_reservation_t resv) { 691 assert(resv); 692 693 resv->pmc = NULL; 694 resv->config = NULL; 695 resv->value = 0ULL; 696 697 resv->flags = 0U; 698 resv->state = PMC_STATE(PMC_STATE_STATE_STOP, 0, 0); 699 resv->active_last_context_in = 0U; 700 701 /* 702 * Since this member is a union, we only need to set either the task 703 * or thread to NULL. 704 */ 705 resv->task = TASK_NULL; 706} 707 708/* 709 * pmc_internal_reservation_set_pmc sets the pmc associated with the reservation object. If 710 * there was one set already, it is deallocated (reference is dropped) before 711 * the new one is set. This methods increases the reference count of the given 712 * pmc_t. 713 * 714 * NOTE: It is okay to pass NULL as the pmc_t - this will have the effect of 715 * dropping the reference on any previously set pmc, and setting the reservation 716 * to having no pmc set. 717 */ 718static kern_return_t pmc_internal_reservation_set_pmc(pmc_reservation_t resv, pmc_t pmc) { 719 assert(resv); 720 721 if(resv->pmc) { 722 (void)pmc_close(resv->pmc); 723 pmc_deallocate(resv->pmc); 724 resv->pmc = NULL; 725 } 726 727 resv->pmc = pmc; 728 729 if(resv->pmc) { 730 pmc_reference(resv->pmc); 731 if(KERN_SUCCESS != pmc_open(resv->pmc)) { 732 pmc_deallocate(resv->pmc); 733 resv->pmc = NULL; 734 735 return KERN_FAILURE; 736 } 737 } 738 739 return KERN_SUCCESS; 740} 741 742/* 743 * Used to place reservation into one of the system, task, and thread queues 744 * Assumes the queue's spin lock is already held. 745 */ 746static void pmc_internal_reservation_enqueue(queue_t queue, pmc_reservation_t resv) { 747 assert(queue); 748 assert(resv); 749 750 queue_enter(queue, resv, pmc_reservation_t, link); 751} 752 753static void pmc_internal_reservation_dequeue(queue_t queue, pmc_reservation_t resv) { 754 assert(queue); 755 assert(resv); 756 757 queue_remove(queue, resv, pmc_reservation_t, link); 758} 759 760/* Returns TRUE if the reservation applies to the current execution context */ 761static boolean_t pmc_internal_reservation_matches_context(pmc_reservation_t resv) { 762 boolean_t ret = FALSE; 763 assert(resv); 764 765 if(PMC_FLAG_IS_SYSTEM_SCOPE(resv->flags)) { 766 ret = TRUE; 767 } else if(PMC_FLAG_IS_TASK_SCOPE(resv->flags)) { 768 if(current_task() == resv->task) { 769 ret = TRUE; 770 } 771 } else if(PMC_FLAG_IS_THREAD_SCOPE(resv->flags)) { 772 if(current_thread() == resv->thread) { 773 ret = TRUE; 774 } 775 } 776 777 return ret; 778} 779 780/* 781 * pmc_accessible_core_count returns the number of logical cores that can access 782 * a given @pmc. 0 means every core in the system. 783 */ 784static uint32_t pmc_accessible_core_count(pmc_t pmc) { 785 assert(pmc); 786 787 uint32_t *cores = NULL; 788 size_t coreCt = 0UL; 789 790 if(KERN_SUCCESS != pmc->methods.accessible_cores(pmc->object, 791 &cores, &coreCt)) { 792 coreCt = 0U; 793 } 794 795 return (uint32_t)coreCt; 796} 797 798/* spin lock for the queue must already be held */ 799/* 800 * This method will inspect the task/thread of the reservation to see if it 801 * matches the new incoming one (for thread/task reservations only). Will only 802 * return TRUE if the task/thread matches. 803 */ 804static boolean_t pmc_internal_reservation_queue_contains_pmc(queue_t queue, pmc_reservation_t resv) { 805 assert(queue); 806 assert(resv); 807 808 boolean_t ret = FALSE; 809 pmc_reservation_t tmp = NULL; 810 811 queue_iterate(queue, tmp, pmc_reservation_t, link) { 812 if(tmp->pmc == resv->pmc) { 813 /* PMC matches - make sure scope matches first */ 814 switch(PMC_FLAG_SCOPE(tmp->flags)) { 815 case PMC_FLAG_SCOPE_SYSTEM: 816 /* 817 * Found a reservation in system queue with same pmc - always a 818 * conflict. 819 */ 820 ret = TRUE; 821 break; 822 case PMC_FLAG_SCOPE_THREAD: 823 /* 824 * Found one in thread queue with the same PMC as the 825 * argument. Only a conflict if argument scope isn't 826 * thread or system, or the threads match. 827 */ 828 ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_THREAD) || 829 (tmp->thread == resv->thread); 830 831 if(!ret) { 832 /* 833 * so far, no conflict - check that the pmc that is 834 * being reserved isn't accessible from more than 835 * one core, if it is, we need to say it's already 836 * taken. 837 */ 838 if(1 != pmc_accessible_core_count(tmp->pmc)) { 839 ret = TRUE; 840 } 841 } 842 break; 843 case PMC_FLAG_SCOPE_TASK: 844 /* 845 * Follow similar semantics for task scope. 846 */ 847 848 ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_TASK) || 849 (tmp->task == resv->task); 850 if(!ret) { 851 /* 852 * so far, no conflict - check that the pmc that is 853 * being reserved isn't accessible from more than 854 * one core, if it is, we need to say it's already 855 * taken. 856 */ 857 if(1 != pmc_accessible_core_count(tmp->pmc)) { 858 ret = TRUE; 859 } 860 } 861 862 break; 863 } 864 865 if(ret) break; 866 } 867 } 868 869 return ret; 870} 871 872/* 873 * pmc_internal_reservation_validate_for_pmc returns TRUE if the given reservation can be 874 * added to its target queue without creating conflicts (target queue is 875 * determined by the reservation's scope flags). Further, this method returns 876 * FALSE if any level contains a reservation for a PMC that can be accessed from 877 * more than just 1 core, and the given reservation also wants the same PMC. 878 */ 879static boolean_t pmc_internal_reservation_validate_for_pmc(pmc_reservation_t resv) { 880 assert(resv); 881 boolean_t ret = TRUE; 882 883 if(pmc_internal_reservation_queue_contains_pmc(system_reservations, resv) || 884 pmc_internal_reservation_queue_contains_pmc(task_reservations, resv) || 885 pmc_internal_reservation_queue_contains_pmc(thread_reservations, resv)) { 886 ret = FALSE; 887 } 888 889 return ret; 890} 891 892static void pmc_internal_update_thread_flag(thread_t thread, boolean_t newFlag) { 893 assert(thread); 894 895 /* See if this thread needs it's PMC flag set */ 896 pmc_reservation_t tmp = NULL; 897 898 if(!newFlag) { 899 /* 900 * If the parent task just dropped its reservation, iterate the thread 901 * reservations to see if we need to keep the pmc flag set for the given 902 * thread or not. 903 */ 904 lck_spin_lock(&reservations_spin); 905 906 queue_iterate(thread_reservations, tmp, pmc_reservation_t, link) { 907 if(tmp->thread == thread) { 908 newFlag = TRUE; 909 break; 910 } 911 } 912 913 lck_spin_unlock(&reservations_spin); 914 } 915 916 if(newFlag) { 917 OSBitOrAtomic(THREAD_PMC_FLAG, &thread->t_chud); 918 } else { 919 OSBitAndAtomic(~(THREAD_PMC_FLAG), &thread->t_chud); 920 } 921} 922 923/* 924 * This operation is (worst case) O(N*M) where N is number of threads in the 925 * given task, and M is the number of thread reservations in our system. 926 */ 927static void pmc_internal_update_task_flag(task_t task, boolean_t newFlag) { 928 assert(task); 929 thread_t thread = NULL; 930 931 if(newFlag) { 932 OSBitOrAtomic(TASK_PMC_FLAG, &task->t_chud); 933 } else { 934 OSBitAndAtomic(~(TASK_PMC_FLAG), &task->t_chud); 935 } 936 937 task_lock(task); 938 939 queue_iterate(&task->threads, thread, thread_t, task_threads) { 940 /* propagate the task's mask down to each thread */ 941 pmc_internal_update_thread_flag(thread, newFlag); 942 } 943 944 task_unlock(task); 945} 946 947/* 948 * pmc_internal_reservation_add adds a reservation to the global tracking queues after 949 * ensuring there are no reservation conflicts. To do this, it takes all the 950 * spin locks for all the queue (to ensure no other core goes and adds a 951 * reservation for the same pmc to a queue that has already been checked). 952 */ 953static boolean_t pmc_internal_reservation_add(pmc_reservation_t resv) { 954 assert(resv); 955 956 boolean_t ret = FALSE; 957 958 /* always lock all three in the same order */ 959 lck_spin_lock(&reservations_spin); 960 961 /* Check if the reservation can be added without conflicts */ 962 if(pmc_internal_reservation_validate_for_pmc(resv)) { 963 964 /* add reservation to appropriate scope */ 965 switch(PMC_FLAG_SCOPE(resv->flags)) { 966 case PMC_FLAG_SCOPE_SYSTEM: 967 /* Simply add it to the system queue */ 968 pmc_internal_reservation_enqueue(system_reservations, resv); 969 system_reservation_count++; 970 971 lck_spin_unlock(&reservations_spin); 972 973 break; 974 975 case PMC_FLAG_SCOPE_TASK: 976 assert(resv->task); 977 978 /* Not only do we enqueue it in our local queue for tracking */ 979 pmc_internal_reservation_enqueue(task_reservations, resv); 980 task_reservation_count++; 981 982 lck_spin_unlock(&reservations_spin); 983 984 /* update the task mask, and propagate it to existing threads */ 985 pmc_internal_update_task_flag(resv->task, TRUE); 986 break; 987 988 /* Thread-switched counter */ 989 case PMC_FLAG_SCOPE_THREAD: 990 assert(resv->thread); 991 992 /* 993 * Works the same as a task-switched counter, only at 994 * thread-scope 995 */ 996 997 pmc_internal_reservation_enqueue(thread_reservations, resv); 998 thread_reservation_count++; 999 1000 lck_spin_unlock(&reservations_spin); 1001 1002 pmc_internal_update_thread_flag(resv->thread, TRUE); 1003 break; 1004 } 1005 1006 ret = TRUE; 1007 } else { 1008 lck_spin_unlock(&reservations_spin); 1009 } 1010 1011 return ret; 1012} 1013 1014static void pmc_internal_reservation_broadcast(pmc_reservation_t reservation, void (*action_func)(void *)) { 1015 uint32_t * cores; 1016 size_t core_cnt; 1017 1018 /* Get the list of accessible cores */ 1019 if (KERN_SUCCESS == pmc_get_accessible_core_list(reservation->pmc, &cores, &core_cnt)) { 1020 boolean_t intrs_enabled = ml_set_interrupts_enabled(FALSE); 1021 1022 /* Fast case: the PMC is only accessible from one core and we happen to be on it */ 1023 if (core_cnt == 1 && cores[0] == (uint32_t)cpu_number()) { 1024 action_func(reservation); 1025 } else { 1026 /* Call action_func on every accessible core */ 1027#if defined(__i386__) || defined(__x86_64__) 1028 size_t ii; 1029 cpumask_t mask = 0; 1030 1031 /* Build a mask for the accessible cores */ 1032 if (core_cnt > 0) { 1033 for (ii = 0; ii < core_cnt; ii++) { 1034 mask |= cpu_to_cpumask(cores[ii]); 1035 } 1036 } else { 1037 /* core_cnt = 0 really means all cpus */ 1038 mask = CPUMASK_ALL; 1039 } 1040 mp_cpus_call(mask, ASYNC, action_func, reservation); 1041#else 1042#error pmc_reservation_interrupt needs an inter-processor method invocation mechanism for this architecture 1043#endif 1044 } 1045 1046 ml_set_interrupts_enabled(intrs_enabled); 1047 } 1048 1049} 1050 1051/* 1052 * pmc_internal_reservation_remove removes the given reservation from the appropriate 1053 * reservation queue according to its scope. 1054 * 1055 * NOTE: The scope flag must have been set for this method to function. 1056 */ 1057static void pmc_internal_reservation_remove(pmc_reservation_t resv) { 1058 assert(resv); 1059 1060 /* 1061 * Due to the way the macros are written, we can't just blindly queue-remove 1062 * the reservation without knowing which queue it's in. We figure this out 1063 * using the reservation's scope flags. 1064 */ 1065 1066 /* Lock the global spin lock */ 1067 lck_spin_lock(&reservations_spin); 1068 1069 switch(PMC_FLAG_SCOPE(resv->flags)) { 1070 1071 case PMC_FLAG_SCOPE_SYSTEM: 1072 pmc_internal_reservation_dequeue(system_reservations, resv); 1073 system_reservation_count--; 1074 1075 lck_spin_unlock(&reservations_spin); 1076 1077 break; 1078 1079 case PMC_FLAG_SCOPE_TASK: 1080 /* remove from the global queue */ 1081 pmc_internal_reservation_dequeue(task_reservations, resv); 1082 task_reservation_count--; 1083 1084 /* unlock the global */ 1085 lck_spin_unlock(&reservations_spin); 1086 1087 /* Recalculate task's counter mask */ 1088 pmc_internal_update_task_flag(resv->task, FALSE); 1089 1090 break; 1091 1092 case PMC_FLAG_SCOPE_THREAD: 1093 pmc_internal_reservation_dequeue(thread_reservations, resv); 1094 thread_reservation_count--; 1095 1096 lck_spin_unlock(&reservations_spin); 1097 1098 /* recalculate the thread's counter mask */ 1099 pmc_internal_update_thread_flag(resv->thread, FALSE); 1100 1101 break; 1102 } 1103} 1104 1105/* Reservation State Machine 1106 * 1107 * The PMC subsystem uses a 3-tuple of state information packed into a 32-bit quantity and a 1108 * set of 9 events to provide MP-safe bookkeeping and control flow. The 3-tuple is comprised 1109 * of a state, a count of active contexts, and a set of modifier flags. A state machine defines 1110 * the possible transitions at each event point given the current 3-tuple. Atomicity is handled 1111 * by reading the current 3-tuple, applying the transformations indicated by the state machine 1112 * and then attempting to OSCompareAndSwap the transformed value. If the OSCompareAndSwap fails, 1113 * the process is repeated until either the OSCompareAndSwap succeeds or not valid transitions are 1114 * available. 1115 * 1116 * The state machine is described using tuple notation for the current state and a related notation 1117 * for describing the transformations. For concisness, the flag and state names are abbreviated as 1118 * follows: 1119 * 1120 * states: 1121 * S = STOP 1122 * CR = CAN_RUN 1123 * L = LOAD 1124 * R = RUN 1125 * ST = STORE 1126 * I = INTERRUPT 1127 * D = DEALLOC 1128 * 1129 * flags: 1130 * 1131 * S = STOPPING 1132 * D = DEALLOCING 1133 * I = INTERRUPTING 1134 * 1135 * The tuple notation is formed from the following pattern: 1136 * 1137 * tuple = < state, active-context-count, flags > 1138 * state = S | CR | L | R | ST | I | D 1139 * active-context-count = 0 | >0 | 1 | >1 1140 * flags = flags flag | blank 1141 * flag = S | D | I 1142 * 1143 * The transform notation is similar, but only describes the modifications made to the current state. 1144 * The notation is formed from the following pattern: 1145 * 1146 * transform = < state, active-context-count, flags > 1147 * state = S | CR | L | R | ST | I | D 1148 * active-context-count = + | - | blank 1149 * flags = flags flag | flags !flag | blank 1150 * flag = S | D | I 1151 * 1152 * And now for the state machine: 1153 * State Start Stop Free Interrupt End Interrupt Context In Context Out Load Finished Store Finished 1154 * <CR, 0, > <S, , > <D, , > <L, +, > 1155 * <D, 0, > 1156 * <D, 1, D> < , -, !D> 1157 * <D, >1, D> < , -, > 1158 * <I, 0, D> <D, , !D> 1159 * <I, 0, S> < , , !S> < , , !SD> <S, , !S> 1160 * <I, 0, > < , , S> < , , D> <CR, , > 1161 * <L, 1, D> <ST, -, > 1162 * <L, 1, ID> <ST, -, > 1163 * <L, 1, IS> < , , !SD> <ST, -, > 1164 * <L, 1, S> < , , !S> < , , !SD> <ST, -, > 1165 * <L, 1, > < , , S> < , , D> < , , IS> < , +, > <R, , > 1166 * <L, >1, D> < , -, > <R, -, > 1167 * <L, >1, ID> < , -, > <R, -, > 1168 * <L, >1, IS> < , , !SD> < , -, > <R, -, > 1169 * <L, >1, S> < , , !S> < , , !SD> < , -, > <R, -, > 1170 * <L, >1, > < , , S> < , , D> < , , IS> < , +, > < , -, > <R, , > 1171 * <R, 1, D> <ST, -, > 1172 * <R, 1, ID> <ST, -, > 1173 * <R, 1, IS> < , , !SD> <ST, -, > 1174 * <R, 1, S> < , , !S> < , , !SD> <ST, -, > 1175 * <R, 1, > < , , S> < , , D> < , , IS> < , +, > <ST, -, > 1176 * <R, >1, D> < , -, > 1177 * <R, >1, ID> < , -, > 1178 * <R, >1, IS> < , , !SD> < , -, > 1179 * <R, >1, S> < , , !S> < , , !SD> < , -, > 1180 * <R, >1, > < , , S> < , , D> < , , IS> < , +, > < , -, > 1181 * <S, 0, > <CR, , > <D, , > 1182 * <S, 1, ID> <I, -, !I> 1183 * <S, 1, IS> < , , !SD> <I, -, !I> 1184 * <S, 1, S> < , , !S> <D, , !SD> < , -, !S> 1185 * <S, 1, > < , , S> <D, , D> <L, +, > <CR, -, > 1186 * <S, >1, ID> < , -, > 1187 * <S, >1, IS> < , , !SD> < , -, > 1188 * <S, >1, S> < , , !S> <D, , !SD> < , -, > 1189 * <S, >1, > < , , S> <D, , D> <L, +, > < , -, > 1190 * <ST, 0, D> <D, , !D> 1191 * <ST, 0, ID> <I, , !I> 1192 * <ST, 0, IS> < , , !SD> <I, , !I> 1193 * <ST, 0, S> < , , !S> < , , !SD> <S, , !S> 1194 * <ST, 0, > < , , S> < , , D> < , , IS> < , +, > <CR, , > 1195 * <ST, >0, D> < , -, > <D, , > 1196 * <ST, >0, ID> < , -, > <S, , > 1197 * <ST, >0, IS> < , , !SD> < , -, > <S, , > 1198 * <ST, >0, S> < , , !S> < , , !SD> < , -, > <S, , > 1199 * <ST, >0, > < , , S> < , , D> < , , IS> < , +, > < , -, > <L, , > 1200 */ 1201 1202static uint32_t pmc_internal_reservation_next_state(uint32_t current_state, pmc_state_event_t event) { 1203 uint32_t new_state = PMC_STATE(PMC_STATE_STATE_INVALID, 0, 0); 1204 1205 switch (event) { 1206 case PMC_STATE_EVENT_START: 1207 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { 1208 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING): 1209 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING): 1210 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING): 1211 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING): 1212 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING): 1213 new_state = PMC_STATE_MODIFY(current_state, 0, 0, PMC_STATE_FLAGS_STOPPING); 1214 break; 1215 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0): 1216 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) { 1217 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0); 1218 } 1219 break; 1220 } 1221 break; 1222 case PMC_STATE_EVENT_STOP: 1223 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { 1224 case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0): 1225 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0); 1226 break; 1227 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0): 1228 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): 1229 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0): 1230 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): 1231 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_STOPPING, 0); 1232 break; 1233 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0): 1234 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) { 1235 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_STOPPING, 0); 1236 } 1237 break; 1238 } 1239 break; 1240 case PMC_STATE_EVENT_FREE: 1241 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { 1242 case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0): 1243 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0); 1244 break; 1245 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING): 1246 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): 1247 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING): 1248 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): 1249 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING): 1250 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): 1251 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): 1252 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING): 1253 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_DEALLOCING, PMC_STATE_FLAGS_STOPPING); 1254 break; 1255 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0): 1256 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): 1257 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0): 1258 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): 1259 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_DEALLOCING, 0); 1260 break; 1261 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING): 1262 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING, PMC_STATE_FLAGS_STOPPING); 1263 break; 1264 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0): 1265 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) { 1266 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING, 0); 1267 } else { 1268 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0); 1269 } 1270 break; 1271 } 1272 break; 1273 case PMC_STATE_EVENT_INTERRUPT: 1274 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { 1275 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): 1276 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0): 1277 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): 1278 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING, 0); 1279 break; 1280 } 1281 break; 1282 case PMC_STATE_EVENT_END_OF_INTERRUPT: 1283 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { 1284 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_DEALLOCING): 1285 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, PMC_STATE_FLAGS_DEALLOCING); 1286 break; 1287 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING): 1288 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, PMC_STATE_FLAGS_STOPPING); 1289 break; 1290 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0): 1291 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0); 1292 break; 1293 } 1294 break; 1295 case PMC_STATE_EVENT_CONTEXT_IN: 1296 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { 1297 case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0): 1298 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 1, 0, 0); 1299 break; 1300 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): 1301 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0): 1302 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): 1303 new_state = PMC_STATE_MODIFY(current_state, 1, 0, 0); 1304 break; 1305 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0): 1306 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) { 1307 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 1, 0, 0); 1308 } 1309 break; 1310 } 1311 break; 1312 case PMC_STATE_EVENT_CONTEXT_OUT: 1313 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { 1314 case PMC_STATE(PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING): 1315 if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) { 1316 new_state = PMC_STATE_MODIFY(current_state, -1, 0, PMC_STATE_FLAGS_DEALLOCING); 1317 } else { 1318 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); 1319 } 1320 break; 1321 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_DEALLOCING): 1322 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): 1323 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): 1324 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING): 1325 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): 1326 if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) { 1327 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); 1328 } 1329 break; 1330 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_DEALLOCING): 1331 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): 1332 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): 1333 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING): 1334 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0): 1335 if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) { 1336 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STORE, -1, 0, 0); 1337 } else { 1338 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); 1339 } 1340 break; 1341 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): 1342 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): 1343 if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) { 1344 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_INTERRUPT, -1, 0, PMC_STATE_FLAGS_INTERRUPTING); 1345 } else { 1346 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); 1347 } 1348 break; 1349 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING): 1350 if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) { 1351 new_state = PMC_STATE_MODIFY(current_state, -1, 0, PMC_STATE_FLAGS_STOPPING); 1352 } else { 1353 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); 1354 } 1355 break; 1356 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0): 1357 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) { 1358 if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) { 1359 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, -1, 0, 0); 1360 } else { 1361 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); 1362 } 1363 } 1364 break; 1365 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_DEALLOCING): 1366 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): 1367 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): 1368 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING): 1369 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): 1370 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) { 1371 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); 1372 } 1373 break; 1374 } 1375 break; 1376 case PMC_STATE_EVENT_LOAD_FINISHED: 1377 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { 1378 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_DEALLOCING): 1379 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): 1380 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): 1381 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING): 1382 if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) { 1383 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_RUN, -1, 0, 0); 1384 } else { 1385 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STORE, -1, 0, 0); 1386 } 1387 break; 1388 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): 1389 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_RUN, 0, 0, 0); 1390 break; 1391 } 1392 break; 1393 case PMC_STATE_EVENT_STORE_FINISHED: 1394 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { 1395 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_DEALLOCING): 1396 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) { 1397 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, PMC_STATE_FLAGS_DEALLOCING); 1398 } else { 1399 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0); 1400 } 1401 break; 1402 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): 1403 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): 1404 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) { 1405 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_INTERRUPT, 0, 0, PMC_STATE_FLAGS_INTERRUPTING); 1406 } else { 1407 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0); 1408 } 1409 break; 1410 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING): 1411 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) { 1412 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, PMC_STATE_FLAGS_STOPPING); 1413 } else { 1414 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0); 1415 } 1416 break; 1417 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): 1418 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) { 1419 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0); 1420 } else { 1421 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 0, 0, 0); 1422 } 1423 break; 1424 } 1425 break; 1426 } 1427 1428 return new_state; 1429} 1430 1431static uint32_t pmc_internal_reservation_move_for_event(pmc_reservation_t reservation, pmc_state_event_t event, pmc_state_t *old_state_out) { 1432 pmc_state_t oldState; 1433 pmc_state_t newState; 1434 1435 assert(reservation); 1436 1437 /* Determine what state change, if any, we need to do. Keep trying until either we succeed doing a transition 1438 * or the there is no valid move. 1439 */ 1440 do { 1441 oldState = reservation->state; 1442 newState = pmc_internal_reservation_next_state(oldState, event); 1443 } while (newState != PMC_STATE_INVALID && !OSCompareAndSwap(oldState, newState, &(reservation->state))); 1444 1445 if (newState != PMC_STATE_INVALID) { 1446 COUNTER_DEBUG("Moved reservation %p from state "PMC_STATE_FORMAT" to state "PMC_STATE_FORMAT" for event %s\n", reservation, PMC_STATE_ARGS(oldState), PMC_STATE_ARGS(newState), pmc_state_event_name(event)); 1447 } else { 1448 COUNTER_DEBUG("No valid moves for reservation %p in state "PMC_STATE_FORMAT" for event %s\n", reservation, PMC_STATE_ARGS(oldState), pmc_state_event_name(event)); 1449 } 1450 1451 if (old_state_out != NULL) { 1452 *old_state_out = oldState; 1453 } 1454 1455 return newState; 1456} 1457 1458static void pmc_internal_reservation_context_out(pmc_reservation_t reservation) { 1459 assert(reservation); 1460 pmc_state_t newState; 1461 pmc_state_t oldState; 1462 1463 /* Clear that the this reservation was active when this cpu did its last context in */ 1464 OSBitAndAtomic(~(1U << cpu_number()), &(reservation->active_last_context_in)); 1465 1466 /* Move the state machine */ 1467 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_CONTEXT_OUT, &oldState))) { 1468 return; 1469 } 1470 1471 /* Do any actions required based on the state change */ 1472 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_STORE && PMC_STATE_STATE(oldState) != PMC_STATE_STATE_STORE) { 1473 /* Just moved into STORE, so store the reservation. */ 1474 pmc_internal_reservation_store(reservation); 1475 } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) { 1476 /* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */ 1477 thread_wakeup((event_t)reservation); 1478 } 1479 1480} 1481 1482static void pmc_internal_reservation_context_in(pmc_reservation_t reservation) { 1483 assert(reservation); 1484 pmc_state_t oldState; 1485 pmc_state_t newState; 1486 1487 /* Move the state machine */ 1488 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_CONTEXT_IN, &oldState))) { 1489 return; 1490 } 1491 1492 /* Mark that the reservation was active when this cpu did its last context in */ 1493 OSBitOrAtomic(1U << cpu_number(), &(reservation->active_last_context_in)); 1494 1495 /* Do any actions required based on the state change */ 1496 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_LOAD && PMC_STATE_STATE(oldState) != PMC_STATE_STATE_LOAD) { 1497 /* Just moved into LOAD, so load the reservation. */ 1498 pmc_internal_reservation_load(reservation); 1499 } 1500 1501} 1502 1503static void pmc_internal_reservation_store(pmc_reservation_t reservation) { 1504 assert(reservation); 1505 assert(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_STORE); 1506 1507 assert(reservation->pmc); 1508 assert(reservation->config); 1509 1510 pmc_state_t newState; 1511 kern_return_t ret = KERN_SUCCESS; 1512 1513 pmc_t store_pmc = reservation->pmc; 1514 pmc_object_t store_pmc_obj = store_pmc->object; 1515 perf_monitor_t store_pm = store_pmc->monitor; 1516 1517 /* 1518 * Instruct the Perf Monitor that contains this counter to turn 1519 * off the global disable for this counter. 1520 */ 1521 ret = store_pm->methods.disable_counters(store_pm->object, &store_pmc_obj, 1); 1522 if(KERN_SUCCESS != ret) { 1523 COUNTER_DEBUG(" [error] disable_counters: 0x%x\n", ret); 1524 return; 1525 } 1526 1527 /* Instruct the counter to disable itself */ 1528 ret = store_pmc->methods.disable(store_pmc_obj); 1529 if(KERN_SUCCESS != ret) { 1530 COUNTER_DEBUG(" [error] disable: 0x%x\n", ret); 1531 } 1532 1533 /* store the counter value into the reservation's stored count */ 1534 ret = store_pmc->methods.get_count(store_pmc_obj, &reservation->value); 1535 if(KERN_SUCCESS != ret) { 1536 COUNTER_DEBUG(" [error] get_count: 0x%x\n", ret); 1537 return; 1538 } 1539 1540 /* Advance the state machine now that the STORE is finished */ 1541 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_STORE_FINISHED, NULL))) { 1542 return; 1543 } 1544 1545 /* Do any actions required based on the state change */ 1546 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_LOAD) { 1547 /* Just moved into LOAD, so load the reservation. */ 1548 pmc_internal_reservation_load(reservation); 1549 } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) { 1550 /* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */ 1551 thread_wakeup((event_t)reservation); 1552 } 1553 1554} 1555 1556static void pmc_internal_reservation_load(pmc_reservation_t reservation) { 1557 assert(reservation); 1558 assert(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_LOAD); 1559 1560 pmc_state_t newState; 1561 kern_return_t ret = KERN_SUCCESS; 1562 1563 assert(reservation->pmc); 1564 assert(reservation->config); 1565 1566 pmc_t load_pmc = reservation->pmc; 1567 pmc_object_t load_pmc_obj = load_pmc->object; 1568 perf_monitor_t load_pm = load_pmc->monitor; 1569 1570 /* Set the control register up with the stored configuration */ 1571 ret = load_pmc->methods.set_config(load_pmc_obj, reservation->config->object); 1572 if(KERN_SUCCESS != ret) { 1573 COUNTER_DEBUG(" [error] set_config: 0x%x\n", ret); 1574 return; 1575 } 1576 1577 /* load the counter value */ 1578 ret = load_pmc->methods.set_count(load_pmc_obj, reservation->value); 1579 if(KERN_SUCCESS != ret) { 1580 COUNTER_DEBUG(" [error] set_count: 0x%x\n", ret); 1581 return; 1582 } 1583 1584 /* Locally enable the counter */ 1585 ret = load_pmc->methods.enable(load_pmc_obj); 1586 if(KERN_SUCCESS != ret) { 1587 COUNTER_DEBUG(" [error] enable: 0x%x\n", ret); 1588 return; 1589 } 1590 1591 /* 1592 * Instruct the Perf Monitor containing the pmc to enable the 1593 * counter. 1594 */ 1595 ret = load_pm->methods.enable_counters(load_pm->object, &load_pmc_obj, 1); 1596 if(KERN_SUCCESS != ret) { 1597 COUNTER_DEBUG(" [error] enable_counters: 0x%x\n", ret); 1598 /* not on the hardware. */ 1599 return; 1600 } 1601 1602 /* Advance the state machine now that the STORE is finished */ 1603 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_LOAD_FINISHED, NULL))) { 1604 return; 1605 } 1606 1607 /* Do any actions required based on the state change */ 1608 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_STORE) { 1609 /* Just moved into STORE, so store the reservation. */ 1610 pmc_internal_reservation_store(reservation); 1611 } 1612 1613} 1614 1615/* 1616 * pmc_accessible_from_core will return TRUE if the given @pmc is directly 1617 * (e.g., hardware) readable from the given logical core. 1618 * 1619 * NOTE: This method is interrupt safe. 1620 */ 1621static inline boolean_t pmc_accessible_from_core(pmc_t pmc, uint32_t logicalCore) { 1622 boolean_t ret = FALSE; 1623 1624 assert(pmc); 1625 1626 ret = pmc->methods.accessible_from_core(pmc->object, logicalCore); 1627 1628 return ret; 1629} 1630 1631static void pmc_internal_reservation_start_cpu(void * arg) { 1632 pmc_reservation_t reservation = (pmc_reservation_t)arg; 1633 1634 assert(reservation); 1635 1636 1637 if (pmc_internal_reservation_matches_context(reservation)) { 1638 /* We are in context, but the reservation may have already had the context_in method run. Attempt 1639 * to set this cpu's bit in the active_last_context_in mask. If we set it, call context_in. 1640 */ 1641 uint32_t oldMask = OSBitOrAtomic(1U << cpu_number(), &(reservation->active_last_context_in)); 1642 1643 if ((oldMask & (1U << cpu_number())) == 0) { 1644 COUNTER_DEBUG("Starting already in-context reservation %p for cpu %d\n", reservation, cpu_number()); 1645 1646 pmc_internal_reservation_context_in(reservation); 1647 } 1648 } 1649} 1650 1651static void pmc_internal_reservation_stop_cpu(void * arg) { 1652 pmc_reservation_t reservation = (pmc_reservation_t)arg; 1653 1654 assert(reservation); 1655 1656 1657 if (pmc_internal_reservation_matches_context(reservation)) { 1658 COUNTER_DEBUG("Stopping in-context reservation %p for cpu %d\n", reservation, cpu_number()); 1659 1660 pmc_internal_reservation_context_out(reservation); 1661 } 1662} 1663 1664/*!fn 1665 * pmc_reservation_interrupt is called when a PMC reservation which was setup 1666 * with an interrupt threshold counts the requested number of events. When the 1667 * underlying counter hits the threshold, an interrupt is generated, and this 1668 * method is called. This method marks the reservation as stopped, and passes 1669 * control off to the user-registered callback method, along with the 1670 * reservation (so that the user can, for example, write a 0 to the counter, and 1671 * restart the reservation). 1672 * This method assumes the reservation has a valid pmc_config_t within. 1673 * 1674 * @param target The pmc_reservation_t that caused the interrupt. 1675 * @param refCon User specified reference constant. 1676 */ 1677static void pmc_reservation_interrupt(void *target, void *refCon) { 1678 pmc_reservation_t reservation = (pmc_reservation_t)target; 1679 pmc_state_t newState; 1680 uint64_t timeout; 1681 uint32_t spins; 1682 1683 assert(reservation); 1684 1685 /* Move the state machine */ 1686 if (PMC_STATE_INVALID == pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_INTERRUPT, NULL)) { 1687 return; 1688 } 1689 1690 /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching 1691 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu 1692 * on every cpu that can access the PMC. 1693 */ 1694 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu); 1695 1696 /* Spin waiting for the state to turn to INTERRUPT */ 1697 nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout); 1698 timeout += mach_absolute_time(); 1699 spins = 0; 1700 while (PMC_STATE_STATE(reservation->state) != PMC_STATE_STATE_INTERRUPT) { 1701 /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */ 1702 if (++spins > PMC_SPIN_THRESHOLD) { 1703 if (mach_absolute_time() > timeout) { 1704 pmc_spin_timeout_count++; 1705 assert(0); 1706 } 1707 } 1708 1709 cpu_pause(); 1710 } 1711 1712 assert(reservation->config); 1713 assert(reservation->config->method); 1714 1715 /* Call the registered callback handler */ 1716#if DEBUG_COUNTERS 1717 uint64_t start = mach_absolute_time(); 1718#endif /* DEBUG */ 1719 1720 (void)reservation->config->method(reservation, refCon); 1721 1722#if DEBUG_COUNTERS 1723 uint64_t end = mach_absolute_time(); 1724 if((end - start) > 5000ULL) { 1725 kprintf("%s - user method %p took %llu ns\n", __FUNCTION__, 1726 reservation->config->method, (end - start)); 1727 } 1728#endif 1729 1730 /* Move the state machine */ 1731 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_END_OF_INTERRUPT, NULL))) { 1732 return; 1733 } 1734 1735 /* Do any post-move actions necessary */ 1736 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_CAN_RUN) { 1737 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_start_cpu); 1738 } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) { 1739 /* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */ 1740 thread_wakeup((event_t)reservation); 1741 } 1742} 1743 1744/* 1745 * Apple-private KPI for Apple kext's (IOProfileFamily) only 1746 */ 1747 1748#if 0 1749#pragma mark - 1750#pragma mark IOProfileFamily private KPI 1751#endif 1752 1753/* 1754 * perf_monitor_register registers a new Performance Monitor, and its associated 1755 * callback methods. The given perf_monitor_object_t is the first argument to 1756 * each callback when they are called. 1757 */ 1758kern_return_t perf_monitor_register(perf_monitor_object_t monitor, 1759 perf_monitor_methods_t *methods) { 1760 int cpu = -1; 1761 1762 COUNTER_DEBUG("registering perf monitor %p\n", monitor); 1763 1764 if(!monitor || !methods) { 1765 return KERN_INVALID_ARGUMENT; 1766 } 1767 1768 /* Protect against out-of-date driver kexts */ 1769 if(MACH_PERFMON_METHODS_VERSION != methods->perf_monitor_methods_version) { 1770 return KERN_INVALID_ARGUMENT; 1771 } 1772 1773 /* If the monitor requires idle notifications, ensure that it is 1774 * accessible from a single core only. 1775 */ 1776 if (methods->flags & PERFMON_FLAG_REQUIRES_IDLE_NOTIFICATIONS) { 1777 uint32_t *cores; 1778 size_t core_cnt; 1779 1780 if (KERN_SUCCESS == methods->accessible_cores(monitor, &cores, &core_cnt)) { 1781 /* 1782 * Guard against disabled cores - monitors will always match and 1783 * attempt registration, irrespective of 'cpus=x' boot-arg. 1784 */ 1785 if ((core_cnt == 1) && (cores[0] < (uint32_t)ml_get_max_cpus())) { 1786 cpu = cores[0]; 1787 } else { 1788 return KERN_INVALID_ARGUMENT; 1789 } 1790 } 1791 } 1792 1793 /* All methods are required */ 1794 if(!methods->accessible_cores | 1795 !methods->enable_counters || !methods->disable_counters || 1796 !methods->on_idle || !methods->on_idle_exit) { 1797 return KERN_INVALID_ARGUMENT; 1798 } 1799 1800 /* prevent dupes. */ 1801 perf_monitor_t dupe = perf_monitor_find(monitor); 1802 if(dupe) { 1803 COUNTER_DEBUG("Duplicate registration for %p\n", monitor); 1804 perf_monitor_deallocate(dupe); 1805 return KERN_FAILURE; 1806 } 1807 1808 perf_monitor_t pm = perf_monitor_alloc(); 1809 if(!pm) { 1810 return KERN_RESOURCE_SHORTAGE; 1811 } 1812 1813 /* initialize the object */ 1814 perf_monitor_init(pm, cpu); 1815 1816 /* copy in the registration info */ 1817 pm->object = monitor; 1818 memcpy(&(pm->methods), methods, sizeof(perf_monitor_methods_t)); 1819 1820 /* place it in the tracking queues */ 1821 perf_monitor_enqueue(pm); 1822 1823 /* debug it */ 1824 PRINT_PERF_MON(pm); 1825 1826 return KERN_SUCCESS; 1827} 1828 1829/* 1830 * perf_monitor_unregister unregisters a previously registered Perf Monitor, 1831 * looking it up by reference pointer (the same that was used in 1832 * perf_monitor_register()). 1833 */ 1834kern_return_t perf_monitor_unregister(perf_monitor_object_t monitor) { 1835 kern_return_t ret = KERN_FAILURE; 1836 1837 COUNTER_DEBUG("unregistering perf monitor %p\n", monitor); 1838 1839 if(!monitor) { 1840 return KERN_INVALID_ARGUMENT; 1841 } 1842 1843 perf_monitor_t pm = perf_monitor_find(monitor); 1844 if(pm) { 1845 /* Remove it from the queues. */ 1846 perf_monitor_dequeue(pm); 1847 1848 /* drop extra retain from find */ 1849 perf_monitor_deallocate(pm); 1850 1851 /* and release the object */ 1852 perf_monitor_deallocate(pm); 1853 1854 ret = KERN_SUCCESS; 1855 } else { 1856 COUNTER_DEBUG("could not find a registered pm that matches!\n"); 1857 } 1858 1859 return ret; 1860} 1861 1862/* 1863 * pmc_register registers a new PMC for use with the pmc subsystem. Each PMC is 1864 * associated with a Perf Monitor. Perf Monitors are looked up by the reference 1865 * pointer that was used to previously register them. 1866 * 1867 * PMCs are registered with a reference pointer (@pmc_object), and a set of 1868 * callback methods. When the given callback methods are called from xnu, the 1869 * first argument will always be the reference pointer used to register the PMC. 1870 * 1871 * NOTE: @monitor must have been successfully registered via 1872 * perf_monitor_register before this method will succeed. 1873 */ 1874kern_return_t pmc_register(perf_monitor_object_t monitor, pmc_object_t pmc_object, 1875 pmc_methods_t *methods, void *object) { 1876 1877 COUNTER_DEBUG("%p %p\n", monitor, pmc_object); 1878 1879 if(!monitor || !pmc_object || !methods || !object) { 1880 return KERN_INVALID_ARGUMENT; 1881 } 1882 1883 /* Prevent version mismatches */ 1884 if(MACH_PMC_METHODS_VERSION != methods->pmc_methods_version) { 1885 COUNTER_DEBUG("version mismatch\n"); 1886 return KERN_INVALID_ARGUMENT; 1887 } 1888 1889 /* All methods are required. */ 1890 if(!methods->create_config || 1891 !methods->free_config || 1892 !methods->config_set_value || 1893 !methods->config_set_threshold || 1894 !methods->config_set_handler || 1895 !methods->set_config || 1896 !methods->get_monitor || 1897 !methods->get_name || 1898 !methods->accessible_from_core || 1899 !methods->accessible_cores || 1900 !methods->get_count || 1901 !methods->set_count || 1902 !methods->disable || 1903 !methods->enable || 1904 !methods->open || 1905 !methods->close) { 1906 return KERN_INVALID_ARGUMENT; 1907 } 1908 1909 /* make sure this perf monitor object is already registered */ 1910 /* 1911 * NOTE: this adds a reference to the parent, so we'll have to drop it in 1912 * any failure code paths from here on out. 1913 */ 1914 perf_monitor_t pm = perf_monitor_find(monitor); 1915 if(!pm) { 1916 COUNTER_DEBUG("Could not find perf monitor for %p\n", monitor); 1917 return KERN_INVALID_ARGUMENT; 1918 } 1919 1920 /* make a new pmc */ 1921 pmc_t pmc = pmc_alloc(); 1922 if(!pmc) { 1923 /* drop the extra reference from perf_monitor_find() */ 1924 perf_monitor_deallocate(pm); 1925 return KERN_RESOURCE_SHORTAGE; 1926 } 1927 1928 /* init it */ 1929 pmc_init(pmc); 1930 1931 pmc->object = pmc_object; 1932 pmc->open_object = object; 1933 1934 /* copy the callbacks in */ 1935 memcpy(&(pmc->methods), methods, sizeof(pmc_methods_t)); 1936 1937 pmc->monitor = pm; 1938 1939 perf_monitor_add_pmc(pmc->monitor, pmc); 1940 1941 /* enqueue it in our tracking queue */ 1942 pmc_enqueue(pmc); 1943 1944 /* drop extra reference from perf_monitor_find() */ 1945 perf_monitor_deallocate(pm); 1946 1947 return KERN_SUCCESS; 1948} 1949 1950/* 1951 * pmc_unregister unregisters a previously registered PMC, looking it up by 1952 * reference point to *both* the Perf Monitor it was created with, and the PMC's 1953 * reference pointer itself. 1954 */ 1955kern_return_t pmc_unregister(perf_monitor_object_t monitor, pmc_object_t pmc_object) { 1956 COUNTER_DEBUG("%p %p\n", monitor, pmc_object); 1957 1958 if(!monitor || !pmc_object) { 1959 return KERN_INVALID_ARGUMENT; 1960 } 1961 1962 pmc_t pmc = pmc_find(pmc_object); 1963 if(!pmc) { 1964 COUNTER_DEBUG("Could not find a matching pmc.\n"); 1965 return KERN_FAILURE; 1966 } 1967 1968 /* remove it from the global queue */ 1969 pmc_dequeue(pmc); 1970 1971 perf_monitor_remove_pmc(pmc->monitor, pmc); 1972 1973 /* remove extra reference count from pmc_find() */ 1974 pmc_deallocate(pmc); 1975 1976 /* dealloc the pmc */ 1977 pmc_deallocate(pmc); 1978 1979 return KERN_SUCCESS; 1980} 1981 1982static void perf_monitor_reservation_add(perf_monitor_t monitor) { 1983 assert(monitor); 1984 OSIncrementAtomic(&(monitor->reservedCounters)); 1985} 1986 1987static void perf_monitor_reservation_remove(perf_monitor_t monitor) { 1988 assert(monitor); 1989 OSDecrementAtomic(&(monitor->reservedCounters)); 1990} 1991 1992#if 0 1993#pragma mark - 1994#pragma mark KPI 1995#endif 1996 1997/* 1998 * Begin in-kernel and in-kext KPI methods 1999 */ 2000 2001/* 2002 * pmc_create_config creates a new configuration area from a given @pmc. 2003 * 2004 * NOTE: This method is not interrupt safe. 2005 */ 2006kern_return_t pmc_create_config(pmc_t pmc, pmc_config_t *config) { 2007 pmc_config_t tmp = NULL; 2008 2009 if(!pmc || !config) { 2010 return KERN_INVALID_ARGUMENT; 2011 } 2012 2013 pmc_reference(pmc); 2014 2015 tmp = pmc_config_alloc(pmc); 2016 if(tmp) { 2017 tmp->object = pmc->methods.create_config(pmc->object); 2018 2019 if(!tmp->object) { 2020 pmc_config_free(pmc, tmp); 2021 tmp = NULL; 2022 } else { 2023 tmp->interrupt_after_value = 0ULL; 2024 tmp->method = NULL; 2025 tmp->refCon = NULL; 2026 } 2027 } 2028 2029 pmc_deallocate(pmc); 2030 2031 if(!tmp) { 2032 return KERN_RESOURCE_SHORTAGE; 2033 } 2034 2035 *config = tmp; 2036 2037 return KERN_SUCCESS; 2038} 2039 2040/* 2041 * pmc_free_config frees a configuration area created from a given @pmc 2042 * 2043 * NOTE: This method is not interrupt safe. 2044 */ 2045void pmc_free_config(pmc_t pmc, pmc_config_t config) { 2046 assert(pmc); 2047 assert(config); 2048 2049 pmc_reference(pmc); 2050 2051 pmc_config_free(pmc, config); 2052 2053 pmc_deallocate(pmc); 2054} 2055 2056/* 2057 * pmc_config_set_value sets up configuration area key-value pairs. These pairs 2058 * are to be either pre-known, or looked up via CoreProfile.framework. 2059 * 2060 * NOTE: This method is not interrupt safe. 2061 */ 2062kern_return_t pmc_config_set_value(pmc_t pmc, pmc_config_t config, 2063 uint8_t id, uint64_t value) { 2064 2065 kern_return_t ret = KERN_INVALID_ARGUMENT; 2066 2067 if(!pmc || !config) { 2068 return ret; 2069 } 2070 2071 pmc_reference(pmc); 2072 2073 ret = pmc->methods.config_set_value(config->object, id, value); 2074 2075 pmc_deallocate(pmc); 2076 2077 return ret; 2078} 2079 2080/* 2081 * pmc_config_set_interrupt_threshold modifies a config object, instructing 2082 * the pmc that it should generate a call to the given pmc_interrupt_method_t 2083 * after the counter counts @threshold events. 2084 * 2085 * PMC Threshold handler methods will have the pmc_reservation_t that generated the interrupt 2086 * as the first argument when the interrupt handler is invoked, and the given 2087 * @refCon (which may be NULL) as the second. 2088 * 2089 * See pmc_interrupt_method_t. 2090 * 2091 * NOTE: This method is not interrupt safe. 2092 */ 2093kern_return_t pmc_config_set_interrupt_threshold(pmc_t pmc, pmc_config_t config, 2094 uint64_t threshold, pmc_interrupt_method_t method, void *refCon) { 2095 kern_return_t ret = KERN_INVALID_ARGUMENT; 2096 2097 if(!config || !pmc) { 2098 return ret; 2099 } 2100 2101 assert(config); 2102 assert(pmc); 2103 2104 pmc_reference(pmc); 2105 2106 do { 2107 /* 2108 * We have a minor annoyance to side-step here. The driver layer expects 2109 * the config to never change once a reservation has been taken out with 2110 * it. However, in order to have the PMI method have the reservation as 2111 * the first argument (in order to allow the user-method to, for 2112 * example, write a 0 to it, and restart it), we need to create the 2113 * pmc_reservation_t before setting it up in the config object. 2114 * We overcome this by caching the method in the pmc_config_t stand-in, 2115 * and mutating the pmc_config_object_t just before returning a 2116 * reservation (in pmc_reserve() and friends, below). 2117 */ 2118 2119 /* might as well stash this away too. */ 2120 config->interrupt_after_value = threshold; 2121 config->method = method; 2122 config->refCon = refCon; 2123 2124 ret = KERN_SUCCESS; 2125 2126 }while(0); 2127 2128 pmc_deallocate(pmc); 2129 2130 return ret; 2131} 2132 2133/* 2134 * pmc_get_pmc_list returns an allocated list of pmc_t's, as well as the number 2135 * of pmc_t's returned. Callers should free this list with a call to 2136 * pmc_free_pmc_list(). 2137 * 2138 * NOTE: This method is not interrupt safe. 2139 */ 2140kern_return_t pmc_get_pmc_list(pmc_t **pmcs, size_t *pmcCount) { 2141 pmc_t *array = NULL; 2142 pmc_t pmc = NULL; 2143 size_t count = 0UL; 2144 2145 do { 2146 /* Copy down (to the stack) the count of perf counters */ 2147 vm_size_t size = perf_counters_count; 2148 2149 /* Allocate that sized chunk */ 2150 array = (pmc_t *)kalloc(sizeof(pmc_t) * size); 2151 if(!array) { 2152 return KERN_RESOURCE_SHORTAGE; 2153 } 2154 2155 /* Take the spin lock */ 2156 lck_spin_lock(&perf_counters_queue_spin); 2157 2158 /* verify the size didn't change while we were allocating */ 2159 if(size != perf_counters_count) { 2160 /* 2161 * queue size has changed between alloc and now - go back and 2162 * make another pass. 2163 */ 2164 2165 /* drop the lock */ 2166 lck_spin_unlock(&perf_counters_queue_spin); 2167 2168 /* free the block */ 2169 kfree(array, sizeof(pmc_t) * size); 2170 array = NULL; 2171 } 2172 2173 /* if we get here, and array is NULL, we try again. */ 2174 }while(!array); 2175 2176 /* copy the bits out */ 2177 queue_iterate(perf_counters_queue, pmc, pmc_t, link) { 2178 /* copy out the pointer */ 2179 array[count++] = pmc; 2180 } 2181 2182 lck_spin_unlock(&perf_counters_queue_spin); 2183 2184 /* return the list and the size */ 2185 *pmcs = array; 2186 *pmcCount = count; 2187 2188 return KERN_SUCCESS; 2189} 2190 2191/* 2192 * pmc_free_pmc_list frees an array of pmc_t that has been returned from 2193 * pmc_get_pmc_list. 2194 * 2195 * NOTE: This method is not interrupt safe. 2196 */ 2197void pmc_free_pmc_list(pmc_t *pmcs, size_t pmcCount) { 2198 if(pmcs && pmcCount) { 2199 COUNTER_DEBUG("pmcs: %p pmcCount: %lu\n", pmcs, pmcCount); 2200 2201 kfree(pmcs, pmcCount * sizeof(pmc_t)); 2202 } 2203} 2204 2205kern_return_t pmc_find_by_name(const char *name, pmc_t **pmcs, size_t *pmcCount) { 2206 kern_return_t ret = KERN_INVALID_ARGUMENT; 2207 2208 if(!name || !pmcs || !pmcCount) { 2209 return ret; 2210 } 2211 2212 pmc_t *list = NULL; 2213 size_t count = 0UL; 2214 2215 if(KERN_SUCCESS == (ret = pmc_get_pmc_list(&list, &count))) { 2216 size_t matchCount = 0UL, ii = 0UL, swapPtr = 0UL; 2217 size_t len = strlen(name); 2218 2219 for(ii = 0UL; ii < count; ii++) { 2220 const char *pmcName = pmc_get_name(list[ii]); 2221 2222 if(strlen(pmcName) < len) { 2223 /* 2224 * If the pmc name is shorter than the requested match, it's no 2225 * match, as we're looking for the most specific match(es). 2226 */ 2227 continue; 2228 } 2229 2230 if(0 == strncmp(name, pmcName, len)) { 2231 pmc_t temp = list[ii]; 2232 2233 // move matches to the head of the array. 2234 list[ii] = list[swapPtr]; 2235 list[swapPtr] = temp; 2236 swapPtr++; 2237 2238 // keep a count of the matches 2239 matchCount++; 2240 } 2241 } 2242 2243 if(matchCount) { 2244 /* 2245 * If we have matches, they are all at the head of the array, so 2246 * just allocate enough space for @matchCount pmc_t's, and copy the 2247 * head of the array to the new allocation. Then free the old 2248 * allocation. 2249 */ 2250 2251 pmc_t *result = (pmc_t *)kalloc(sizeof(pmc_t) * matchCount); 2252 if(result) { 2253 // copy the matches 2254 memcpy(result, list, sizeof(pmc_t) * matchCount); 2255 2256 ret = KERN_SUCCESS; 2257 } 2258 2259 pmc_free_pmc_list(list, count); 2260 2261 if(!result) { 2262 *pmcs = NULL; 2263 *pmcCount = 0UL; 2264 return KERN_RESOURCE_SHORTAGE; 2265 } 2266 2267 *pmcs = result; 2268 *pmcCount = matchCount; 2269 } else { 2270 *pmcs = NULL; 2271 *pmcCount = 0UL; 2272 } 2273 } 2274 2275 return ret; 2276} 2277 2278/* 2279 * pmc_get_name returns a pointer (not copied) to the human-readable name of the 2280 * given pmc. 2281 * 2282 * NOTE: Driver authors must take care to not allocate during this method, as 2283 * this method *IS* interrupt safe. 2284 */ 2285const char *pmc_get_name(pmc_t pmc) { 2286 assert(pmc); 2287 2288 const char *name = pmc->methods.get_name(pmc->object); 2289 2290 return name; 2291} 2292 2293/* 2294 * pmc_get_accessible_core_list returns a pointer to an array of logical core 2295 * numbers (as well as the size of that array) that represent the local cores 2296 * (hardware threads) from which the given @pmc can be accessed directly. 2297 * 2298 * NOTE: This method is interrupt safe. 2299 */ 2300kern_return_t pmc_get_accessible_core_list(pmc_t pmc, uint32_t **logicalCores, 2301 size_t *logicalCoreCt) { 2302 2303 kern_return_t ret = KERN_INVALID_ARGUMENT; 2304 2305 if(!pmc || !logicalCores || !logicalCoreCt) { 2306 return ret; 2307 } 2308 2309 ret = pmc->methods.accessible_cores(pmc->object, logicalCores, logicalCoreCt); 2310 2311 return ret; 2312} 2313 2314static boolean_t pmc_reservation_setup_pmi(pmc_reservation_t resv, pmc_config_t config) { 2315 assert(resv); 2316 assert(resv->pmc); 2317 assert(config); 2318 assert(config->object); 2319 2320 /* If there's no PMI to setup, return success */ 2321 if(config->interrupt_after_value && config->method) { 2322 2323 /* set the threshold */ 2324 kern_return_t ret = resv->pmc->methods.config_set_threshold(config->object, 2325 config->interrupt_after_value); 2326 2327 if(KERN_SUCCESS != ret) { 2328 /* 2329 * This is the most useful error message here, as this only happens 2330 * as a result of pmc_reserve*() 2331 */ 2332 COUNTER_DEBUG("Failed to set threshold for pmc %p\n", resv->pmc); 2333 return FALSE; 2334 } 2335 2336 if(KERN_SUCCESS != resv->pmc->methods.config_set_handler(config->object, 2337 (void *)resv, &pmc_reservation_interrupt, config->refCon)) { 2338 2339 COUNTER_DEBUG("Failed to set handler for pmc %p\n", resv->pmc); 2340 return FALSE; 2341 } 2342 } 2343 2344 return TRUE; 2345} 2346 2347/* 2348 * pmc_reserve will attempt to reserve the given @pmc, with a given 2349 * configuration object, for counting system-wide. This method will fail with 2350 * KERN_FAILURE if the given pmc is already reserved at any scope. 2351 * 2352 * This method consumes the given configuration object if it returns 2353 * KERN_SUCCESS. Any other return value indicates the caller 2354 * must free the config object via pmc_free_config(). 2355 * 2356 * NOTE: This method is NOT interrupt safe. 2357 */ 2358kern_return_t pmc_reserve(pmc_t pmc, pmc_config_t config, 2359 pmc_reservation_t *reservation) { 2360 2361 if(!pmc || !config || !reservation) { 2362 return KERN_INVALID_ARGUMENT; 2363 } 2364 2365 pmc_reservation_t resv = reservation_alloc(); 2366 if(!resv) { 2367 return KERN_RESOURCE_SHORTAGE; 2368 } 2369 2370 reservation_init(resv); 2371 2372 resv->flags |= PMC_FLAG_SCOPE_SYSTEM; 2373 resv->config = config; 2374 2375 if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) { 2376 resv->config = NULL; 2377 return KERN_FAILURE; 2378 } 2379 2380 /* enqueue reservation in proper place */ 2381 if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) { 2382 /* Prevent free of config object */ 2383 resv->config = NULL; 2384 2385 reservation_free(resv); 2386 return KERN_FAILURE; 2387 } 2388 2389 perf_monitor_reservation_add(pmc->monitor); 2390 2391 *reservation = resv; 2392 2393 return KERN_SUCCESS; 2394} 2395 2396/* 2397 * pmc_reserve_task will attempt to reserve the given @pmc with a given 2398 * configuration object, for counting when the given @task is running on any 2399 * logical core that can directly access the given @pmc. This method will fail 2400 * with KERN_FAILURE if the given pmc is already reserved at either system or 2401 * thread scope. 2402 * 2403 * This method consumes the given configuration object if it returns 2404 * KERN_SUCCESS. Any other return value indicates the caller 2405 * must free the config object via pmc_free_config(). 2406 * 2407 * NOTE: You can reserve the same pmc for N different tasks concurrently. 2408 * NOTE: This method is NOT interrupt safe. 2409 */ 2410kern_return_t pmc_reserve_task(pmc_t pmc, pmc_config_t config, 2411 task_t task, pmc_reservation_t *reservation) { 2412 2413 if(!pmc || !config || !reservation || !task) { 2414 return KERN_INVALID_ARGUMENT; 2415 } 2416 2417 if (!(pmc->monitor->methods.flags & PERFMON_FLAG_SUPPORTS_CONTEXT_SWITCHING)) { 2418 COUNTER_DEBUG("pmc %p cannot be context switched!\n", pmc); 2419 return KERN_INVALID_ARGUMENT; 2420 } 2421 2422 pmc_reservation_t resv = reservation_alloc(); 2423 if(!resv) { 2424 return KERN_RESOURCE_SHORTAGE; 2425 } 2426 2427 reservation_init(resv); 2428 2429 resv->flags |= PMC_FLAG_SCOPE_TASK; 2430 resv->task = task; 2431 2432 resv->config = config; 2433 2434 if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) { 2435 resv->config = NULL; 2436 return KERN_FAILURE; 2437 } 2438 2439 /* enqueue reservation in proper place */ 2440 if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) { 2441 /* Prevent free of config object */ 2442 resv->config = NULL; 2443 2444 reservation_free(resv); 2445 return KERN_FAILURE; 2446 } 2447 2448 perf_monitor_reservation_add(pmc->monitor); 2449 2450 *reservation = resv; 2451 2452 return KERN_SUCCESS; 2453} 2454 2455/* 2456 * pmc_reserve_thread will attempt to reserve the given @pmc with a given 2457 * configuration object, for counting when the given @thread is running on any 2458 * logical core that can directly access the given @pmc. This method will fail 2459 * with KERN_FAILURE if the given pmc is already reserved at either system or 2460 * task scope. 2461 * 2462 * This method consumes the given configuration object if it returns 2463 * KERN_SUCCESS. Any other return value indicates the caller 2464 * must free the config object via pmc_free_config(). 2465 * 2466 * NOTE: You can reserve the same pmc for N different threads concurrently. 2467 * NOTE: This method is NOT interrupt safe. 2468 */ 2469kern_return_t pmc_reserve_thread(pmc_t pmc, pmc_config_t config, 2470 thread_t thread, pmc_reservation_t *reservation) { 2471 if(!pmc || !config || !reservation || !thread) { 2472 return KERN_INVALID_ARGUMENT; 2473 } 2474 2475 if (!(pmc->monitor->methods.flags & PERFMON_FLAG_SUPPORTS_CONTEXT_SWITCHING)) { 2476 COUNTER_DEBUG("pmc %p cannot be context switched!\n", pmc); 2477 return KERN_INVALID_ARGUMENT; 2478 } 2479 2480 pmc_reservation_t resv = reservation_alloc(); 2481 if(!resv) { 2482 return KERN_RESOURCE_SHORTAGE; 2483 } 2484 2485 reservation_init(resv); 2486 2487 resv->flags |= PMC_FLAG_SCOPE_THREAD; 2488 resv->thread = thread; 2489 2490 resv->config = config; 2491 2492 if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) { 2493 resv->config = NULL; 2494 return KERN_FAILURE; 2495 } 2496 2497 /* enqueue reservation in proper place */ 2498 if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) { 2499 /* Prevent free of config object */ 2500 resv->config = NULL; 2501 2502 reservation_free(resv); 2503 return KERN_FAILURE; 2504 } 2505 2506 perf_monitor_reservation_add(pmc->monitor); 2507 2508 *reservation = resv; 2509 2510 return KERN_SUCCESS; 2511} 2512 2513/* 2514 * pmc_reservation_start instructs the given reservation to start counting as 2515 * soon as possible. 2516 * 2517 * NOTE: This method is interrupt safe. 2518 */ 2519kern_return_t pmc_reservation_start(pmc_reservation_t reservation) { 2520 pmc_state_t newState; 2521 2522 if(!reservation) { 2523 return KERN_INVALID_ARGUMENT; 2524 } 2525 2526 /* Move the state machine */ 2527 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_START, NULL))) { 2528 return KERN_FAILURE; 2529 } 2530 2531 /* If we are currently in an interrupt, don't bother to broadcast since it won't do anything now and the interrupt will 2532 * broadcast right before it leaves 2533 */ 2534 if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_INTERRUPT) { 2535 /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching 2536 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_start_cpu 2537 * on every cpu that can access the PMC. 2538 */ 2539 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_start_cpu); 2540 } 2541 2542 return KERN_SUCCESS; 2543} 2544 2545/* 2546 * pmc_reservation_stop instructs the given reservation to stop counting as 2547 * soon as possible. When this method returns, the pmc will be marked as stopping 2548 * and subsequent calls to pmc_reservation_start will succeed. This does not mean 2549 * that the pmc hardware has _actually_ stopped running. Assuming no other changes 2550 * to the reservation state, the pmc hardware _will_ stop shortly. 2551 * 2552 */ 2553kern_return_t pmc_reservation_stop(pmc_reservation_t reservation) { 2554 pmc_state_t newState; 2555 2556 if(!reservation) { 2557 return KERN_INVALID_ARGUMENT; 2558 } 2559 2560 /* Move the state machine */ 2561 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_STOP, NULL))) { 2562 return KERN_FAILURE; 2563 } 2564 2565 /* If we are currently in an interrupt, don't bother to broadcast since it won't do anything now and the interrupt will 2566 * broadcast right before it leaves. Similarly, if we just moved directly to STOP, don't bother broadcasting. 2567 */ 2568 if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_INTERRUPT && PMC_STATE_STATE(newState) != PMC_STATE_STATE_STOP) { 2569 /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching 2570 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu 2571 * on every cpu that can access the PMC. 2572 */ 2573 2574 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu); 2575 } 2576 2577 return KERN_SUCCESS; 2578} 2579 2580/* 2581 * pmc_reservation_read will read the event count associated with a reservation. 2582 * If the caller is current executing in a context that both a) matches the 2583 * reservation's context, and b) can access the reservation's pmc directly, the 2584 * value will be read from hardware. Otherwise, this returns the reservation's 2585 * stored value. 2586 * 2587 * NOTE: This method is interrupt safe. 2588 * NOTE: When not on the interrupt stack, this method may block. 2589 */ 2590kern_return_t pmc_reservation_read(pmc_reservation_t reservation, uint64_t *value) { 2591 kern_return_t ret = KERN_FAILURE; 2592 uint64_t timeout; 2593 uint32_t spins; 2594 2595 if(!reservation || !value) { 2596 return KERN_INVALID_ARGUMENT; 2597 } 2598 2599 nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout); 2600 timeout += mach_absolute_time(); 2601 spins = 0; 2602 do { 2603 uint32_t state = reservation->state; 2604 2605 if((PMC_STATE_STATE(state) == PMC_STATE_STATE_RUN)) { 2606 /* Attempt read from hardware via drivers. */ 2607 2608 assert(reservation->pmc); 2609 2610 ret = reservation->pmc->methods.get_count(reservation->pmc->object, value); 2611 2612 break; 2613 } else if ((PMC_STATE_STATE(state) == PMC_STATE_STATE_STORE) || 2614 (PMC_STATE_STATE(state) == PMC_STATE_STATE_LOAD)) { 2615 /* Spin */ 2616 /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */ 2617 if (++spins > PMC_SPIN_THRESHOLD) { 2618 if (mach_absolute_time() > timeout) { 2619 pmc_spin_timeout_count++; 2620 assert(0); 2621 } 2622 } 2623 2624 cpu_pause(); 2625 } else { 2626 break; 2627 } 2628 } while (1); 2629 2630 /* If the direct hardware read failed (for whatever reason) */ 2631 if(KERN_SUCCESS != ret) { 2632 /* Read stored value */ 2633 *value = reservation->value; 2634 } 2635 2636 return KERN_SUCCESS; 2637} 2638 2639/* 2640 * pmc_reservation_write will write the event count associated with a reservation. 2641 * If the caller is current executing in a context that both a) matches the 2642 * reservation's context, and b) can access the reservation's pmc directly, the 2643 * value will be written to hardware. Otherwise, this writes the reservation's 2644 * stored value. 2645 * 2646 * NOTE: This method is interrupt safe. 2647 * NOTE: When not on the interrupt stack, this method may block. 2648 */ 2649kern_return_t pmc_reservation_write(pmc_reservation_t reservation, uint64_t value) { 2650 kern_return_t ret = KERN_FAILURE; 2651 uint64_t timeout; 2652 uint32_t spins; 2653 2654 if(!reservation) { 2655 return KERN_INVALID_ARGUMENT; 2656 } 2657 2658 nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout); 2659 timeout += mach_absolute_time(); 2660 spins = 0; 2661 do { 2662 uint32_t state = reservation->state; 2663 2664 if((PMC_STATE_STATE(state) == PMC_STATE_STATE_RUN)) { 2665 /* Write to hardware via drivers. */ 2666 assert(reservation->pmc); 2667 2668 ret = reservation->pmc->methods.set_count(reservation->pmc->object, value); 2669 break; 2670 } else if ((PMC_STATE_STATE(state) == PMC_STATE_STATE_STORE) || 2671 (PMC_STATE_STATE(state) == PMC_STATE_STATE_LOAD)) { 2672 /* Spin */ 2673 /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */ 2674 if (++spins > PMC_SPIN_THRESHOLD) { 2675 if (mach_absolute_time() > timeout) { 2676 pmc_spin_timeout_count++; 2677 assert(0); 2678 } 2679 } 2680 2681 cpu_pause(); 2682 } else { 2683 break; 2684 } 2685 } while (1); 2686 2687 if(KERN_SUCCESS != ret) { 2688 /* Write stored value */ 2689 reservation->value = value; 2690 } 2691 2692 return KERN_SUCCESS; 2693} 2694 2695/* 2696 * pmc_reservation_free releases a reservation and all associated resources. 2697 * 2698 * NOTE: This method is NOT interrupt safe. 2699 */ 2700kern_return_t pmc_reservation_free(pmc_reservation_t reservation) { 2701 pmc_state_t newState; 2702 2703 if(!reservation) { 2704 return KERN_INVALID_ARGUMENT; 2705 } 2706 2707 perf_monitor_reservation_remove(reservation->pmc->monitor); 2708 2709 /* Move the state machine */ 2710 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_FREE, NULL))) { 2711 return KERN_FAILURE; 2712 } 2713 2714 /* If we didn't move directly to DEALLOC, help things along */ 2715 if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_DEALLOC) { 2716 /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching 2717 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu 2718 * on every cpu that can access the PMC. 2719 */ 2720 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu); 2721 } 2722 2723 /* Block until the reservation hits the <DEALLOC, 0, > state */ 2724 while (!(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(reservation->state) == 0 && PMC_STATE_FLAGS(reservation->state) == 0)) { 2725 assert_wait((event_t)reservation, THREAD_UNINT); 2726 thread_block(THREAD_CONTINUE_NULL); 2727 } 2728 2729 /* remove from queues */ 2730 pmc_internal_reservation_remove(reservation); 2731 2732 /* free reservation */ 2733 reservation_free(reservation); 2734 2735 return KERN_SUCCESS; 2736} 2737 2738/* 2739 * pmc_idle notifies eligible monitors of impending per-CPU idle, and can be used to save state. 2740 */ 2741boolean_t pmc_idle(void) { 2742 perf_monitor_t monitor = NULL; 2743 queue_head_t *cpu_queue; 2744 2745 lck_spin_lock(&perf_monitor_queue_spin); 2746 2747 if (cpu_monitor_queues) { 2748 cpu_queue = cpu_monitor_queues[cpu_number()]; 2749 2750 queue_iterate(cpu_queue, monitor, perf_monitor_t, cpu_link) { 2751 perf_monitor_methods_t *methods = &(monitor->methods); 2752 if ((methods->flags & PERFMON_FLAG_ALWAYS_ACTIVE) || (monitor->reservedCounters)) { 2753 methods->on_idle(monitor->object); 2754 } 2755 } 2756 } 2757 2758 lck_spin_unlock(&perf_monitor_queue_spin); 2759 2760 return TRUE; 2761} 2762 2763/* 2764 * pmc_idle_exit notifies eligible monitors of wake from idle; it can be used to restore state. 2765 */ 2766boolean_t pmc_idle_exit(void) { 2767 perf_monitor_t monitor = NULL; 2768 queue_head_t *cpu_queue; 2769 2770 lck_spin_lock(&perf_monitor_queue_spin); 2771 2772 if (cpu_monitor_queues) { 2773 cpu_queue = cpu_monitor_queues[cpu_number()]; 2774 2775 queue_iterate(cpu_queue, monitor, perf_monitor_t, cpu_link) { 2776 perf_monitor_methods_t *methods = &(monitor->methods); 2777 if ((methods->flags & PERFMON_FLAG_ALWAYS_ACTIVE) || (monitor->reservedCounters)) { 2778 methods->on_idle_exit(monitor->object); 2779 } 2780 } 2781 } 2782 2783 lck_spin_unlock(&perf_monitor_queue_spin); 2784 2785 return TRUE; 2786} 2787 2788/* 2789 * pmc_context_switch performs all context switching necessary to save all pmc 2790 * state associated with @oldThread (and the task to which @oldThread belongs), 2791 * as well as to restore all pmc state associated with @newThread (and the task 2792 * to which @newThread belongs). 2793 * 2794 * NOTE: This method IS interrupt safe. 2795 */ 2796boolean_t pmc_context_switch(thread_t oldThread, thread_t newThread) { 2797 pmc_reservation_t resv = NULL; 2798 uint32_t cpuNum = cpu_number(); 2799 2800 lck_spin_lock(&reservations_spin); 2801 2802 /* Save pmc states */ 2803 if (thread_reservation_count) { 2804 queue_iterate(thread_reservations, resv, pmc_reservation_t, link) { 2805 if ((oldThread == resv->thread) && pmc_accessible_from_core(resv->pmc, cpuNum)) { 2806 (void)pmc_internal_reservation_context_out(resv); 2807 } 2808 } 2809 } 2810 2811 if (task_reservation_count) { 2812 queue_iterate(task_reservations, resv, pmc_reservation_t, link) { 2813 if ((resv->task == oldThread->task) && pmc_accessible_from_core(resv->pmc, cpuNum)) { 2814 (void)pmc_internal_reservation_context_out(resv); 2815 } 2816 } 2817 } 2818 2819 /* Restore */ 2820 if (thread_reservation_count) { 2821 queue_iterate(thread_reservations, resv, pmc_reservation_t, link) { 2822 if ((resv->thread == newThread) && pmc_accessible_from_core(resv->pmc, cpuNum)) { 2823 (void)pmc_internal_reservation_context_in(resv); 2824 } 2825 } 2826 } 2827 2828 if (task_reservation_count) { 2829 queue_iterate(task_reservations, resv, pmc_reservation_t, link) { 2830 if ((resv->task == newThread->task) && pmc_accessible_from_core(resv->pmc, cpuNum)) { 2831 (void)pmc_internal_reservation_context_in(resv); 2832 } 2833 } 2834 } 2835 2836 lck_spin_unlock(&reservations_spin); 2837 2838 return TRUE; 2839} 2840 2841#else /* !CONFIG_COUNTERS */ 2842 2843#if 0 2844#pragma mark - 2845#pragma mark Dummy functions 2846#endif 2847 2848/* 2849 * In the case that someone has chosen not to include the PMC KPI in some 2850 * configuration, we still have exports for kexts, so we'll need to define stub 2851 * methods that return failures. 2852 */ 2853kern_return_t perf_monitor_register(perf_monitor_object_t monitor __unused, 2854 perf_monitor_methods_t *methods __unused) { 2855 return KERN_FAILURE; 2856} 2857 2858kern_return_t perf_monitor_unregister(perf_monitor_object_t monitor __unused) { 2859 return KERN_FAILURE; 2860} 2861 2862kern_return_t pmc_register(perf_monitor_object_t monitor __unused, 2863 pmc_object_t pmc __unused, pmc_methods_t *methods __unused, void *object __unused) { 2864 return KERN_FAILURE; 2865} 2866 2867kern_return_t pmc_unregister(perf_monitor_object_t monitor __unused, 2868 pmc_object_t pmc __unused) { 2869 return KERN_FAILURE; 2870} 2871 2872kern_return_t pmc_create_config(pmc_t pmc __unused, 2873 pmc_config_t *config __unused) { 2874 return KERN_FAILURE; 2875} 2876 2877void pmc_free_config(pmc_t pmc __unused, pmc_config_t config __unused) { 2878} 2879 2880kern_return_t pmc_config_set_value(pmc_t pmc __unused, 2881 pmc_config_t config __unused, uint8_t id __unused, 2882 uint64_t value __unused) { 2883 return KERN_FAILURE; 2884} 2885 2886kern_return_t pmc_config_set_interrupt_threshold(pmc_t pmc __unused, 2887 pmc_config_t config __unused, uint64_t threshold __unused, 2888 pmc_interrupt_method_t method __unused, void *refCon __unused) { 2889 return KERN_FAILURE; 2890} 2891 2892kern_return_t pmc_get_pmc_list(pmc_t **pmcs __unused, size_t *pmcCount __unused) { 2893 return KERN_FAILURE; 2894} 2895 2896void pmc_free_pmc_list(pmc_t *pmcs __unused, size_t pmcCount __unused) { 2897} 2898 2899kern_return_t pmc_find_by_name(const char *name __unused, pmc_t **pmcs __unused, 2900 size_t *pmcCount __unused) { 2901 return KERN_FAILURE; 2902} 2903 2904const char *pmc_get_name(pmc_t pmc __unused) { 2905 return ""; 2906} 2907 2908kern_return_t pmc_get_accessible_core_list(pmc_t pmc __unused, 2909 uint32_t **logicalCores __unused, size_t *logicalCoreCt __unused) { 2910 return KERN_FAILURE; 2911} 2912 2913kern_return_t pmc_reserve(pmc_t pmc __unused, 2914 pmc_config_t config __unused, pmc_reservation_t *reservation __unused) { 2915 return KERN_FAILURE; 2916} 2917 2918kern_return_t pmc_reserve_task(pmc_t pmc __unused, 2919 pmc_config_t config __unused, task_t task __unused, 2920 pmc_reservation_t *reservation __unused) { 2921 return KERN_FAILURE; 2922} 2923 2924kern_return_t pmc_reserve_thread(pmc_t pmc __unused, 2925 pmc_config_t config __unused, thread_t thread __unused, 2926 pmc_reservation_t *reservation __unused) { 2927 return KERN_FAILURE; 2928} 2929 2930kern_return_t pmc_reservation_start(pmc_reservation_t reservation __unused) { 2931 return KERN_FAILURE; 2932} 2933 2934kern_return_t pmc_reservation_stop(pmc_reservation_t reservation __unused) { 2935 return KERN_FAILURE; 2936} 2937 2938kern_return_t pmc_reservation_read(pmc_reservation_t reservation __unused, 2939 uint64_t *value __unused) { 2940 return KERN_FAILURE; 2941} 2942 2943kern_return_t pmc_reservation_write(pmc_reservation_t reservation __unused, 2944 uint64_t value __unused) { 2945 return KERN_FAILURE; 2946} 2947 2948kern_return_t pmc_reservation_free(pmc_reservation_t reservation __unused) { 2949 return KERN_FAILURE; 2950} 2951 2952 2953#endif /* !CONFIG_COUNTERS */ 2954