1/* 2 * Copyright (c) 2009 Apple Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. Please obtain a copy of the License at 10 * http://www.opensource.apple.com/apsl/ and read it before using this 11 * file. 12 * 13 * The Original Code and all software distributed under the License are 14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 18 * Please see the License for the specific language governing rights and 19 * limitations under the License. 20 * 21 * @APPLE_LICENSE_HEADER_END@ 22 */ 23 24#include <kern/kalloc.h> 25#include <kern/kern_types.h> 26#include <kern/locks.h> 27#include <kern/misc_protos.h> 28#include <kern/task.h> 29#include <kern/thread.h> 30#include <kern/zalloc.h> 31#include <machine/machine_cpu.h> 32 33#include <pmc/pmc.h> 34 35#include <libkern/OSAtomic.h> 36 37#if defined(__i386__) || defined(__x86_64__) 38#include <i386/mp.h> 39#endif 40 41#if CONFIG_COUNTERS 42 43/* various debug logging enable */ 44#undef DEBUG_COUNTERS 45 46typedef uint8_t pmc_state_event_t; 47 48#define PMC_STATE_EVENT_START 0 49#define PMC_STATE_EVENT_STOP 1 50#define PMC_STATE_EVENT_FREE 2 51#define PMC_STATE_EVENT_INTERRUPT 3 52#define PMC_STATE_EVENT_END_OF_INTERRUPT 4 53#define PMC_STATE_EVENT_CONTEXT_IN 5 54#define PMC_STATE_EVENT_CONTEXT_OUT 6 55#define PMC_STATE_EVENT_LOAD_FINISHED 7 56#define PMC_STATE_EVENT_STORE_FINISHED 8 57 58/* PMC spin timeouts */ 59#define PMC_SPIN_THRESHOLD 10 /* Number of spins to allow before checking mach_absolute_time() */ 60#define PMC_SPIN_TIMEOUT_US 10 /* Time in microseconds before the spin causes an assert */ 61 62uint64_t pmc_spin_timeout_count = 0; /* Number of times where a PMC spin loop causes a timeout */ 63 64#ifdef DEBUG_COUNTERS 65# include <pexpert/pexpert.h> 66# define COUNTER_DEBUG(...) \ 67 do { \ 68 kprintf("[%s:%s][%u] ", __FILE__, __PRETTY_FUNCTION__, cpu_number()); \ 69 kprintf(__VA_ARGS__); \ 70 } while(0) 71 72# define PRINT_PERF_MON(x) \ 73 do { \ 74 kprintf("perfmon: %p (obj: %p refCt: %u switchable: %u)\n", \ 75 x, x->object, x->useCount, \ 76 (x->methods.flags & PERFMON_FLAG_SUPPORTS_CONTEXT_SWITCHING) ? \ 77 1 : 0); \ 78 } while(0) 79 80static const char const * pmc_state_state_name(pmc_state_t state) { 81 switch (PMC_STATE_STATE(state)) { 82 case PMC_STATE_STATE_INVALID: 83 return "INVALID"; 84 case PMC_STATE_STATE_STOP: 85 return "STOP"; 86 case PMC_STATE_STATE_CAN_RUN: 87 return "CAN_RUN"; 88 case PMC_STATE_STATE_LOAD: 89 return "LOAD"; 90 case PMC_STATE_STATE_RUN: 91 return "RUN"; 92 case PMC_STATE_STATE_STORE: 93 return "STORE"; 94 case PMC_STATE_STATE_INTERRUPT: 95 return "INTERRUPT"; 96 case PMC_STATE_STATE_DEALLOC: 97 return "DEALLOC"; 98 default: 99 return "UNKNOWN"; 100 } 101} 102 103static const char const * pmc_state_event_name(pmc_state_event_t event) { 104 switch (event) { 105 case PMC_STATE_EVENT_START: 106 return "START"; 107 case PMC_STATE_EVENT_STOP: 108 return "STOP"; 109 case PMC_STATE_EVENT_FREE: 110 return "FREE"; 111 case PMC_STATE_EVENT_INTERRUPT: 112 return "INTERRUPT"; 113 case PMC_STATE_EVENT_END_OF_INTERRUPT: 114 return "END OF INTERRUPT"; 115 case PMC_STATE_EVENT_CONTEXT_IN: 116 return "CONTEXT IN"; 117 case PMC_STATE_EVENT_CONTEXT_OUT: 118 return "CONTEXT OUT"; 119 case PMC_STATE_EVENT_LOAD_FINISHED: 120 return "LOAD_FINISHED"; 121 case PMC_STATE_EVENT_STORE_FINISHED: 122 return "STORE_FINISHED"; 123 default: 124 return "UNKNOWN"; 125 } 126} 127 128# define PMC_STATE_FORMAT "<%s, %u, %s%s%s>" 129# define PMC_STATE_ARGS(x) pmc_state_state_name(x), PMC_STATE_CONTEXT_COUNT(x), ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_INTERRUPTING) ? "I" : ""), \ 130 ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_STOPPING) ? "S" : ""), ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_DEALLOCING) ? "D" : "") 131#else 132# define COUNTER_DEBUG(...) 133# define PRINT_PERF_MON(x) 134# define PMC_STATE_FORMAT 135# define PMC_STATE_ARGS(x) 136#endif 137 138/*!struct 139 * pmc_config is the data behind a pmc_config_t. 140 * @member object A pointer to an instance of IOPerformanceCounterConfiguration 141 * @member method A pointer to a method to call to handle PMI. 142 * @member interrupt_after_value Cause a PMI after the counter counts this many 143 * events. 144 * @member refCon Passed to the @method method as the refCon argument. 145 */ 146struct pmc_config { 147 pmc_config_object_t object; 148 volatile pmc_interrupt_method_t method; 149 uint64_t interrupt_after_value; 150 void *refCon; 151}; 152 153/* 154 * Allocation Zones 155 * 156 * Two allocation zones - Perf zone small and Perf zone big. 157 * Each zone has associated maximums, defined below. 158 * The small zone is the max of the smallest allocation objects (all sizes on 159 * K64): 160 * perf_monitor_t - 48 bytes 161 * perf_monitor_methods_t - 28 bytes 162 * pmc_reservation_t - 48 bytes 163 * pmc_config_t - 32 bytes 164 * perf_small_zone unit size is (on K64) 48 bytes 165 * perf_small_zone max count must be max number of perf monitors, plus (max 166 * number of reservations * 2). The "*2" is because each reservation has a 167 * pmc_config_t within. 168 * 169 * Big zone is max of the larger allocation units 170 * pmc_t - 144 bytes 171 * pmc_methods_t - 116 bytes 172 * perf_big_zone unit size is (on K64) 144 bytes 173 * perf_big_zone max count is the max number of PMCs we support. 174 */ 175 176static zone_t perf_small_zone = NULL; 177#define MAX_PERF_SMALLS (256 + 8196 + 8196) 178#define PERF_SMALL_UNIT_SZ (MAX(MAX(sizeof(struct perf_monitor), \ 179 sizeof(struct pmc_reservation)), sizeof(struct pmc_config))) 180 181static zone_t perf_big_zone = NULL; 182#define MAX_PERF_BIGS (1024) 183#define PERF_BIG_UNIT_SZ (sizeof(struct pmc)) 184 185/* 186 * Locks and Lock groups 187 */ 188static lck_grp_t *pmc_lock_grp = LCK_GRP_NULL; 189static lck_grp_attr_t *pmc_lock_grp_attr; 190static lck_attr_t *pmc_lock_attr; 191 192/* PMC tracking queue locks */ 193 194static lck_mtx_t cpu_monitor_queue_mutex; /* protects per-cpu queues at initialisation time */ 195static lck_spin_t perf_monitor_queue_spin; /* protects adding and removing from queue */ 196static lck_spin_t perf_counters_queue_spin; /* protects adding and removing from queue */ 197 198/* Reservation tracking queues lock */ 199static lck_spin_t reservations_spin; 200 201/* 202 * Tracking queues 203 * 204 * Keeps track of registered perf monitors and perf counters 205 */ 206 207static queue_head_t **cpu_monitor_queues = NULL; 208 209static queue_head_t *perf_monitors_queue = NULL; 210static volatile uint32_t perf_monitors_count = 0U; 211 212static queue_head_t *perf_counters_queue = NULL; 213static volatile uint32_t perf_counters_count = 0U; 214 215/* 216 * Reservation queues 217 * 218 * Keeps track of all system, task, and thread-level reservations (both active and 219 * inactive). 220 * 221 * We track them all here (rather than in their respective task or thread only) 222 * so that we can inspect our tracking data directly (rather than peeking at 223 * every task and thread) to determine if/when a new reservation would 224 * constitute a conflict. 225 */ 226 227static queue_head_t *system_reservations = NULL; 228static volatile uint32_t system_reservation_count = 0U; 229 230static queue_head_t *task_reservations = NULL; 231static volatile uint32_t task_reservation_count = 0U; 232 233static queue_head_t *thread_reservations = NULL; 234static volatile uint32_t thread_reservation_count = 0U; 235 236#if XNU_KERNEL_PRIVATE 237 238/* 239 * init_pmc_locks creates and initializes all the locks and lock groups and lock 240 * attributes required for the pmc sub-system. 241 */ 242static void init_pmc_locks(void) { 243 pmc_lock_attr = lck_attr_alloc_init(); 244 assert(pmc_lock_attr); 245 246 pmc_lock_grp_attr = lck_grp_attr_alloc_init(); 247 assert(pmc_lock_grp_attr); 248 249 pmc_lock_grp = lck_grp_alloc_init("pmc", pmc_lock_grp_attr); 250 assert(pmc_lock_grp); 251 252 lck_spin_init(&perf_monitor_queue_spin, pmc_lock_grp, pmc_lock_attr); 253 lck_spin_init(&perf_counters_queue_spin, pmc_lock_grp, pmc_lock_attr); 254 255 lck_spin_init(&reservations_spin, pmc_lock_grp, pmc_lock_attr); 256 257 lck_mtx_init(&cpu_monitor_queue_mutex, pmc_lock_grp, pmc_lock_attr); 258} 259 260/* 261 * init_pmc_zones initializes the allocation zones used by the pmc subsystem 262 */ 263static void init_pmc_zones(void) { 264 perf_small_zone = zinit(PERF_SMALL_UNIT_SZ, 265 MAX_PERF_SMALLS * PERF_SMALL_UNIT_SZ, MAX_PERF_SMALLS, 266 "pmc.small zone"); 267 268 assert(perf_small_zone); 269 270 perf_big_zone = zinit(PERF_BIG_UNIT_SZ, 271 MAX_PERF_BIGS * PERF_BIG_UNIT_SZ, MAX_PERF_BIGS, 272 "pmc.big zone"); 273 274 assert(perf_big_zone); 275} 276 277/* 278 * init_pmc_queues allocates and initializes the tracking queues for 279 * registering and reserving individual pmcs and perf monitors. 280 */ 281static void init_pmc_queues(void) { 282 283 perf_monitors_queue = (queue_head_t*)kalloc(sizeof(queue_head_t)); 284 assert(perf_monitors_queue); 285 286 queue_init(perf_monitors_queue); 287 288 perf_counters_queue = (queue_head_t*)kalloc(sizeof(queue_head_t)); 289 assert(perf_counters_queue); 290 291 queue_init(perf_counters_queue); 292 293 system_reservations = (queue_head_t*)kalloc(sizeof(queue_t)); 294 assert(system_reservations); 295 296 queue_init(system_reservations); 297 298 task_reservations = (queue_head_t*)kalloc(sizeof(queue_head_t)); 299 assert(task_reservations); 300 301 queue_init(task_reservations); 302 303 thread_reservations = (queue_head_t*)kalloc(sizeof(queue_head_t)); 304 assert(thread_reservations); 305 306 queue_init(thread_reservations); 307} 308 309/* 310 * pmc_bootstrap brings up all the necessary infrastructure required to use the 311 * pmc sub-system. 312 */ 313__private_extern__ 314void pmc_bootstrap(void) { 315 /* build our alloc zones */ 316 init_pmc_zones(); 317 318 /* build the locks */ 319 init_pmc_locks(); 320 321 /* build our tracking queues */ 322 init_pmc_queues(); 323} 324 325#endif /* XNU_KERNEL_PRIVATE */ 326 327/* 328 * Perf Monitor Internals 329 */ 330 331static perf_monitor_t perf_monitor_alloc(void) { 332 /* perf monitors come from the perf small zone */ 333 return (perf_monitor_t)zalloc(perf_small_zone); 334} 335 336static void perf_monitor_free(void *pm) { 337 zfree(perf_small_zone, pm); 338} 339 340static void perf_monitor_init(perf_monitor_t pm, int cpu) { 341 assert(pm); 342 343 pm->object = NULL; 344 345 bzero(&(pm->methods), sizeof(perf_monitor_methods_t)); 346 347 pm->useCount = 1; /* initial retain count of 1, for caller */ 348 349 pm->reservedCounters = 0; 350 351 pm->cpu = cpu; 352 353 pm->link.next = pm->link.prev = (queue_entry_t)NULL; 354 pm->cpu_link.next = pm->cpu_link.prev = (queue_entry_t)NULL; 355} 356 357/* 358 * perf_monitor_dequeue removes the given perf_monitor_t from the 359 * perf_monitor_queue, thereby unregistering it with the system. 360 */ 361static void perf_monitor_dequeue(perf_monitor_t pm) { 362 lck_spin_lock(&perf_monitor_queue_spin); 363 364 if (pm->methods.flags & PERFMON_FLAG_REQUIRES_IDLE_NOTIFICATIONS) { 365 /* If this flag is set, the monitor is already validated to be 366 * accessible from a single cpu only. 367 */ 368 queue_remove(cpu_monitor_queues[pm->cpu], pm, perf_monitor_t, cpu_link); 369 } 370 371 /* 372 * remove the @pm object from the @perf_monitor_queue queue (it is of type 373 * <perf_monitor_t> and has a field called @link that is the queue_link_t 374 */ 375 queue_remove(perf_monitors_queue, pm, perf_monitor_t, link); 376 377 perf_monitors_count--; 378 379 lck_spin_unlock(&perf_monitor_queue_spin); 380} 381 382/* 383 * perf_monitor_enqueue adds the given perf_monitor_t to the perf_monitor_queue, 384 * thereby registering it for use with the system. 385 */ 386static void perf_monitor_enqueue(perf_monitor_t pm) { 387 388 lck_mtx_lock(&cpu_monitor_queue_mutex); 389 lck_spin_lock(&perf_monitor_queue_spin); 390 391 if (pm->cpu >= 0) { 392 /* Deferred initialisation; saves memory and permits ml_get_max_cpus() 393 * to block until cpu initialisation is complete. 394 */ 395 if (!cpu_monitor_queues) { 396 uint32_t max_cpus; 397 queue_head_t **queues; 398 uint32_t i; 399 400 lck_spin_unlock(&perf_monitor_queue_spin); 401 402 max_cpus = ml_get_max_cpus(); 403 404 queues = (queue_head_t**)kalloc(sizeof(queue_head_t*) * max_cpus); 405 assert(queues); 406 for (i = 0; i < max_cpus; i++) { 407 queue_head_t *queue = (queue_head_t*)kalloc(sizeof(queue_head_t)); 408 assert(queue); 409 queue_init(queue); 410 queues[i] = queue; 411 } 412 413 lck_spin_lock(&perf_monitor_queue_spin); 414 415 cpu_monitor_queues = queues; 416 } 417 418 queue_enter(cpu_monitor_queues[pm->cpu], pm, perf_monitor_t, cpu_link); 419 } 420 421 queue_enter(perf_monitors_queue, pm, perf_monitor_t, link); 422 perf_monitors_count++; 423 424 lck_spin_unlock(&perf_monitor_queue_spin); 425 lck_mtx_unlock(&cpu_monitor_queue_mutex); 426} 427 428/* 429 * perf_monitor_reference increments the reference count for the given 430 * perf_monitor_t. 431 */ 432static void perf_monitor_reference(perf_monitor_t pm) { 433 assert(pm); 434 435 OSIncrementAtomic(&(pm->useCount)); 436} 437 438/* 439 * perf_monitor_deallocate decrements the reference count for the given 440 * perf_monitor_t. If the reference count hits 0, the object is released back 441 * to the perf_small_zone via a call to perf_monitor_free(). 442 */ 443static void perf_monitor_deallocate(perf_monitor_t pm) { 444 assert(pm); 445 446 /* If we just removed the last reference count */ 447 if(1 == OSDecrementAtomic(&(pm->useCount))) { 448 /* Free the object */ 449 perf_monitor_free(pm); 450 } 451} 452 453/* 454 * perf_monitor_find attempts to find a perf_monitor_t that corresponds to the 455 * given C++ object pointer that was used when registering with the subsystem. 456 * 457 * If found, the method returns the perf_monitor_t with an extra reference 458 * placed on the object (or NULL if not 459 * found). 460 * 461 * NOTE: Caller must use perf_monitor_deallocate to remove the extra reference after 462 * calling perf_monitor_find. 463 */ 464static perf_monitor_t perf_monitor_find(perf_monitor_object_t monitor) { 465 assert(monitor); 466 perf_monitor_t element = NULL; 467 perf_monitor_t found = NULL; 468 469 lck_spin_lock(&perf_monitor_queue_spin); 470 471 queue_iterate(perf_monitors_queue, element, perf_monitor_t, link) { 472 if(element->object == monitor) { 473 perf_monitor_reference(element); 474 found = element; 475 break; 476 } 477 } 478 479 lck_spin_unlock(&perf_monitor_queue_spin); 480 481 return found; 482} 483 484/* 485 * perf_monitor_add_pmc adds a newly registered PMC to the perf monitor it is 486 * associated with. 487 */ 488 489static void perf_monitor_add_pmc(perf_monitor_t pm, pmc_t pmc __unused) { 490 assert(pm); 491 assert(pmc); 492 493 /* Today, we merely add a reference count now that a new pmc is attached */ 494 perf_monitor_reference(pm); 495} 496 497/* 498 * perf_monitor_remove_pmc removes a newly *un*registered PMC from the perf 499 * monitor it is associated with. 500 */ 501static void perf_monitor_remove_pmc(perf_monitor_t pm, pmc_t pmc __unused) { 502 assert(pm); 503 assert(pmc); 504 505 /* Today, we merely remove a reference count now that the pmc is detached */ 506 perf_monitor_deallocate(pm); 507} 508 509/* 510 * Perf Counter internals 511 */ 512 513static pmc_t pmc_alloc(void) { 514 return (pmc_t)zalloc(perf_big_zone); 515} 516 517static void pmc_free(void *pmc) { 518 zfree(perf_big_zone, pmc); 519} 520 521/* 522 * pmc_init initializes a newly allocated pmc_t 523 */ 524static void pmc_init(pmc_t pmc) { 525 assert(pmc); 526 527 pmc->object = NULL; 528 pmc->monitor = NULL; 529 530 bzero(&pmc->methods, sizeof(pmc_methods_t)); 531 532 /* One reference for the caller */ 533 pmc->useCount = 1; 534} 535 536/* 537 * pmc_reference increments the reference count of the given pmc_t 538 */ 539static void pmc_reference(pmc_t pmc) { 540 assert(pmc); 541 542 OSIncrementAtomic(&(pmc->useCount)); 543} 544 545/* 546 * pmc_deallocate decrements the reference count of the given pmc_t. If the 547 * reference count hits zero, the given pmc_t is deallocated and released back 548 * to the allocation zone. 549 */ 550static void pmc_deallocate(pmc_t pmc) { 551 assert(pmc); 552 553 /* If we just removed the last reference count */ 554 if(1 == OSDecrementAtomic(&(pmc->useCount))) { 555 /* Free the pmc */ 556 pmc_free(pmc); 557 } 558} 559 560/* 561 * pmc_dequeue removes the given, newly *un*registered pmc from the 562 * perf_counters_queue. 563 */ 564static void pmc_dequeue(pmc_t pmc) { 565 lck_spin_lock(&perf_counters_queue_spin); 566 567 queue_remove(perf_counters_queue, pmc, pmc_t, link); 568 569 perf_counters_count--; 570 571 lck_spin_unlock(&perf_counters_queue_spin); 572} 573 574/* 575 * pmc_enqueue adds the given, newly registered pmc to the perf_counters_queue 576 */ 577static void pmc_enqueue(pmc_t pmc) { 578 lck_spin_lock(&perf_counters_queue_spin); 579 580 queue_enter(perf_counters_queue, pmc, pmc_t, link); 581 582 perf_counters_count++; 583 584 lck_spin_unlock(&perf_counters_queue_spin); 585} 586 587/* 588 * pmc_find attempts to locate a pmc_t that was registered with the given 589 * pmc_object_t pointer. If found, it returns the pmc_t with an extra reference 590 * which must be dropped by the caller by calling pmc_deallocate(). 591 */ 592static pmc_t pmc_find(pmc_object_t object) { 593 assert(object); 594 595 lck_spin_lock(&perf_counters_queue_spin); 596 597 pmc_t element = NULL; 598 pmc_t found = NULL; 599 600 queue_iterate(perf_counters_queue, element, pmc_t, link) { 601 if(element->object == object) { 602 pmc_reference(element); 603 found = element; 604 break; 605 } 606 } 607 608 lck_spin_unlock(&perf_counters_queue_spin); 609 610 return found; 611} 612 613/* 614 * Config internals 615 */ 616 617/* Allocate a pmc_config_t */ 618static pmc_config_t pmc_config_alloc(pmc_t pmc __unused) { 619 return (pmc_config_t)zalloc(perf_small_zone); 620} 621 622/* Free a pmc_config_t, and underlying pmc_config_object_t (if needed) */ 623static void pmc_config_free(pmc_t pmc, pmc_config_t config) { 624 assert(pmc); 625 assert(config); 626 627 if(config->object) { 628 pmc->methods.free_config(pmc->object, config->object); 629 config->object = NULL; 630 } 631 632 zfree(perf_small_zone, config); 633} 634 635static kern_return_t pmc_open(pmc_t pmc) { 636 assert(pmc); 637 assert(pmc->object); 638 assert(pmc->open_object); 639 640 return pmc->methods.open(pmc->object, pmc->open_object); 641} 642 643static kern_return_t pmc_close(pmc_t pmc) { 644 assert(pmc); 645 assert(pmc->object); 646 assert(pmc->open_object); 647 648 return pmc->methods.close(pmc->object, pmc->open_object); 649} 650 651/* 652 * Reservation Internals 653 */ 654 655static kern_return_t pmc_internal_reservation_set_pmc(pmc_reservation_t resv, pmc_t pmc); 656static void pmc_internal_reservation_store(pmc_reservation_t reservation); 657static void pmc_internal_reservation_load(pmc_reservation_t reservation); 658 659static pmc_reservation_t reservation_alloc(void) { 660 /* pmc reservations come from the perf small zone */ 661 return (pmc_reservation_t)zalloc(perf_small_zone); 662} 663 664/* 665 * reservation_free deallocates and releases all resources associated with the 666 * given pmc_reservation_t. This includes freeing the config used to create the 667 * reservation, decrementing the reference count for the pmc used to create the 668 * reservation, and deallocating the reservation's memory. 669 */ 670static void reservation_free(pmc_reservation_t resv) { 671 /* Free config */ 672 if(resv->config) { 673 assert(resv->pmc); 674 675 pmc_free_config(resv->pmc, resv->config); 676 677 resv->config = NULL; 678 } 679 680 /* release PMC */ 681 (void)pmc_internal_reservation_set_pmc(resv, NULL); 682 683 /* Free reservation */ 684 zfree(perf_small_zone, resv); 685} 686 687/* 688 * reservation_init initializes a newly created reservation. 689 */ 690static void reservation_init(pmc_reservation_t resv) { 691 assert(resv); 692 693 resv->pmc = NULL; 694 resv->config = NULL; 695 resv->value = 0ULL; 696 697 resv->flags = 0U; 698 resv->state = PMC_STATE(PMC_STATE_STATE_STOP, 0, 0); 699 resv->active_last_context_in = 0U; 700 701 /* 702 * Since this member is a union, we only need to set either the task 703 * or thread to NULL. 704 */ 705 resv->task = TASK_NULL; 706} 707 708/* 709 * pmc_internal_reservation_set_pmc sets the pmc associated with the reservation object. If 710 * there was one set already, it is deallocated (reference is dropped) before 711 * the new one is set. This methods increases the reference count of the given 712 * pmc_t. 713 * 714 * NOTE: It is okay to pass NULL as the pmc_t - this will have the effect of 715 * dropping the reference on any previously set pmc, and setting the reservation 716 * to having no pmc set. 717 */ 718static kern_return_t pmc_internal_reservation_set_pmc(pmc_reservation_t resv, pmc_t pmc) { 719 assert(resv); 720 721 if(resv->pmc) { 722 (void)pmc_close(resv->pmc); 723 pmc_deallocate(resv->pmc); 724 resv->pmc = NULL; 725 } 726 727 resv->pmc = pmc; 728 729 if(resv->pmc) { 730 pmc_reference(resv->pmc); 731 if(KERN_SUCCESS != pmc_open(resv->pmc)) { 732 pmc_deallocate(resv->pmc); 733 resv->pmc = NULL; 734 735 return KERN_FAILURE; 736 } 737 } 738 739 return KERN_SUCCESS; 740} 741 742/* 743 * Used to place reservation into one of the system, task, and thread queues 744 * Assumes the queue's spin lock is already held. 745 */ 746static void pmc_internal_reservation_enqueue(queue_t queue, pmc_reservation_t resv) { 747 assert(queue); 748 assert(resv); 749 750 queue_enter(queue, resv, pmc_reservation_t, link); 751} 752 753static void pmc_internal_reservation_dequeue(queue_t queue, pmc_reservation_t resv) { 754 assert(queue); 755 assert(resv); 756 757 queue_remove(queue, resv, pmc_reservation_t, link); 758} 759 760/* Returns TRUE if the reservation applies to the current execution context */ 761static boolean_t pmc_internal_reservation_matches_context(pmc_reservation_t resv) { 762 boolean_t ret = FALSE; 763 assert(resv); 764 765 if(PMC_FLAG_IS_SYSTEM_SCOPE(resv->flags)) { 766 ret = TRUE; 767 } else if(PMC_FLAG_IS_TASK_SCOPE(resv->flags)) { 768 if(current_task() == resv->task) { 769 ret = TRUE; 770 } 771 } else if(PMC_FLAG_IS_THREAD_SCOPE(resv->flags)) { 772 if(current_thread() == resv->thread) { 773 ret = TRUE; 774 } 775 } 776 777 return ret; 778} 779 780/* 781 * pmc_accessible_core_count returns the number of logical cores that can access 782 * a given @pmc. 0 means every core in the system. 783 */ 784static uint32_t pmc_accessible_core_count(pmc_t pmc) { 785 assert(pmc); 786 787 uint32_t *cores = NULL; 788 size_t coreCt = 0UL; 789 790 if(KERN_SUCCESS != pmc->methods.accessible_cores(pmc->object, 791 &cores, &coreCt)) { 792 coreCt = 0U; 793 } 794 795 return (uint32_t)coreCt; 796} 797 798/* spin lock for the queue must already be held */ 799/* 800 * This method will inspect the task/thread of the reservation to see if it 801 * matches the new incoming one (for thread/task reservations only). Will only 802 * return TRUE if the task/thread matches. 803 */ 804static boolean_t pmc_internal_reservation_queue_contains_pmc(queue_t queue, pmc_reservation_t resv) { 805 assert(queue); 806 assert(resv); 807 808 boolean_t ret = FALSE; 809 pmc_reservation_t tmp = NULL; 810 811 queue_iterate(queue, tmp, pmc_reservation_t, link) { 812 if(tmp->pmc == resv->pmc) { 813 /* PMC matches - make sure scope matches first */ 814 switch(PMC_FLAG_SCOPE(tmp->flags)) { 815 case PMC_FLAG_SCOPE_SYSTEM: 816 /* 817 * Found a reservation in system queue with same pmc - always a 818 * conflict. 819 */ 820 ret = TRUE; 821 break; 822 case PMC_FLAG_SCOPE_THREAD: 823 /* 824 * Found one in thread queue with the same PMC as the 825 * argument. Only a conflict if argument scope isn't 826 * thread or system, or the threads match. 827 */ 828 ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_THREAD) || 829 (tmp->thread == resv->thread); 830 831 if(!ret) { 832 /* 833 * so far, no conflict - check that the pmc that is 834 * being reserved isn't accessible from more than 835 * one core, if it is, we need to say it's already 836 * taken. 837 */ 838 if(1 != pmc_accessible_core_count(tmp->pmc)) { 839 ret = TRUE; 840 } 841 } 842 break; 843 case PMC_FLAG_SCOPE_TASK: 844 /* 845 * Follow similar semantics for task scope. 846 */ 847 848 ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_TASK) || 849 (tmp->task == resv->task); 850 if(!ret) { 851 /* 852 * so far, no conflict - check that the pmc that is 853 * being reserved isn't accessible from more than 854 * one core, if it is, we need to say it's already 855 * taken. 856 */ 857 if(1 != pmc_accessible_core_count(tmp->pmc)) { 858 ret = TRUE; 859 } 860 } 861 862 break; 863 } 864 865 if(ret) break; 866 } 867 } 868 869 return ret; 870} 871 872/* 873 * pmc_internal_reservation_validate_for_pmc returns TRUE if the given reservation can be 874 * added to its target queue without creating conflicts (target queue is 875 * determined by the reservation's scope flags). Further, this method returns 876 * FALSE if any level contains a reservation for a PMC that can be accessed from 877 * more than just 1 core, and the given reservation also wants the same PMC. 878 */ 879static boolean_t pmc_internal_reservation_validate_for_pmc(pmc_reservation_t resv) { 880 assert(resv); 881 boolean_t ret = TRUE; 882 883 if(pmc_internal_reservation_queue_contains_pmc(system_reservations, resv) || 884 pmc_internal_reservation_queue_contains_pmc(task_reservations, resv) || 885 pmc_internal_reservation_queue_contains_pmc(thread_reservations, resv)) { 886 ret = FALSE; 887 } 888 889 return ret; 890} 891 892static void pmc_internal_update_thread_flag(thread_t thread, boolean_t newFlag) { 893 assert(thread); 894 895 /* See if this thread needs it's PMC flag set */ 896 pmc_reservation_t tmp = NULL; 897 898 if(!newFlag) { 899 /* 900 * If the parent task just dropped its reservation, iterate the thread 901 * reservations to see if we need to keep the pmc flag set for the given 902 * thread or not. 903 */ 904 lck_spin_lock(&reservations_spin); 905 906 queue_iterate(thread_reservations, tmp, pmc_reservation_t, link) { 907 if(tmp->thread == thread) { 908 newFlag = TRUE; 909 break; 910 } 911 } 912 913 lck_spin_unlock(&reservations_spin); 914 } 915 916 if(newFlag) { 917 OSBitOrAtomic(THREAD_PMC_FLAG, &thread->t_chud); 918 } else { 919 OSBitAndAtomic(~(THREAD_PMC_FLAG), &thread->t_chud); 920 } 921} 922 923/* 924 * This operation is (worst case) O(N*M) where N is number of threads in the 925 * given task, and M is the number of thread reservations in our system. 926 */ 927static void pmc_internal_update_task_flag(task_t task, boolean_t newFlag) { 928 assert(task); 929 thread_t thread = NULL; 930 931 if(newFlag) { 932 OSBitOrAtomic(TASK_PMC_FLAG, &task->t_chud); 933 } else { 934 OSBitAndAtomic(~(TASK_PMC_FLAG), &task->t_chud); 935 } 936 937 task_lock(task); 938 939 queue_iterate(&task->threads, thread, thread_t, task_threads) { 940 /* propagate the task's mask down to each thread */ 941 pmc_internal_update_thread_flag(thread, newFlag); 942 } 943 944 task_unlock(task); 945} 946 947/* 948 * pmc_internal_reservation_add adds a reservation to the global tracking queues after 949 * ensuring there are no reservation conflicts. To do this, it takes all the 950 * spin locks for all the queue (to ensure no other core goes and adds a 951 * reservation for the same pmc to a queue that has already been checked). 952 */ 953static boolean_t pmc_internal_reservation_add(pmc_reservation_t resv) { 954 assert(resv); 955 956 boolean_t ret = FALSE; 957 958 /* always lock all three in the same order */ 959 lck_spin_lock(&reservations_spin); 960 961 /* Check if the reservation can be added without conflicts */ 962 if(pmc_internal_reservation_validate_for_pmc(resv)) { 963 964 /* add reservation to appropriate scope */ 965 switch(PMC_FLAG_SCOPE(resv->flags)) { 966 case PMC_FLAG_SCOPE_SYSTEM: 967 /* Simply add it to the system queue */ 968 pmc_internal_reservation_enqueue(system_reservations, resv); 969 system_reservation_count++; 970 971 lck_spin_unlock(&reservations_spin); 972 973 break; 974 975 case PMC_FLAG_SCOPE_TASK: 976 assert(resv->task); 977 978 /* Not only do we enqueue it in our local queue for tracking */ 979 pmc_internal_reservation_enqueue(task_reservations, resv); 980 task_reservation_count++; 981 982 lck_spin_unlock(&reservations_spin); 983 984 /* update the task mask, and propagate it to existing threads */ 985 pmc_internal_update_task_flag(resv->task, TRUE); 986 break; 987 988 /* Thread-switched counter */ 989 case PMC_FLAG_SCOPE_THREAD: 990 assert(resv->thread); 991 992 /* 993 * Works the same as a task-switched counter, only at 994 * thread-scope 995 */ 996 997 pmc_internal_reservation_enqueue(thread_reservations, resv); 998 thread_reservation_count++; 999 1000 lck_spin_unlock(&reservations_spin); 1001 1002 pmc_internal_update_thread_flag(resv->thread, TRUE); 1003 break; 1004 } 1005 1006 ret = TRUE; 1007 } else { 1008 lck_spin_unlock(&reservations_spin); 1009 } 1010 1011 return ret; 1012} 1013 1014static void pmc_internal_reservation_broadcast(pmc_reservation_t reservation, void (*action_func)(void *)) { 1015 uint32_t * cores; 1016 size_t core_cnt; 1017 1018 /* Get the list of accessible cores */ 1019 if (KERN_SUCCESS == pmc_get_accessible_core_list(reservation->pmc, &cores, &core_cnt)) { 1020 boolean_t intrs_enabled = ml_set_interrupts_enabled(FALSE); 1021 1022 /* Fast case: the PMC is only accessible from one core and we happen to be on it */ 1023 if (core_cnt == 1 && cores[0] == (uint32_t)cpu_number()) { 1024 action_func(reservation); 1025 } else { 1026 /* Call action_func on every accessible core */ 1027#if defined(__i386__) || defined(__x86_64__) 1028 size_t ii; 1029 cpumask_t mask = 0; 1030 1031 /* Build a mask for the accessible cores */ 1032 if (core_cnt > 0) { 1033 for (ii = 0; ii < core_cnt; ii++) { 1034 mask |= cpu_to_cpumask(cores[ii]); 1035 } 1036 } else { 1037 /* core_cnt = 0 really means all cpus */ 1038 mask = CPUMASK_ALL; 1039 } 1040 mp_cpus_call(mask, ASYNC, action_func, reservation); 1041#elif defined(__arm__) 1042 panic("Please implement me: pmc_internal_reservation_broadcast"); 1043#else 1044#error pmc_reservation_interrupt needs an inter-processor method invocation mechanism for this architecture 1045#endif 1046 } 1047 1048 ml_set_interrupts_enabled(intrs_enabled); 1049 } 1050 1051} 1052 1053/* 1054 * pmc_internal_reservation_remove removes the given reservation from the appropriate 1055 * reservation queue according to its scope. 1056 * 1057 * NOTE: The scope flag must have been set for this method to function. 1058 */ 1059static void pmc_internal_reservation_remove(pmc_reservation_t resv) { 1060 assert(resv); 1061 1062 /* 1063 * Due to the way the macros are written, we can't just blindly queue-remove 1064 * the reservation without knowing which queue it's in. We figure this out 1065 * using the reservation's scope flags. 1066 */ 1067 1068 /* Lock the global spin lock */ 1069 lck_spin_lock(&reservations_spin); 1070 1071 switch(PMC_FLAG_SCOPE(resv->flags)) { 1072 1073 case PMC_FLAG_SCOPE_SYSTEM: 1074 pmc_internal_reservation_dequeue(system_reservations, resv); 1075 system_reservation_count--; 1076 1077 lck_spin_unlock(&reservations_spin); 1078 1079 break; 1080 1081 case PMC_FLAG_SCOPE_TASK: 1082 /* remove from the global queue */ 1083 pmc_internal_reservation_dequeue(task_reservations, resv); 1084 task_reservation_count--; 1085 1086 /* unlock the global */ 1087 lck_spin_unlock(&reservations_spin); 1088 1089 /* Recalculate task's counter mask */ 1090 pmc_internal_update_task_flag(resv->task, FALSE); 1091 1092 break; 1093 1094 case PMC_FLAG_SCOPE_THREAD: 1095 pmc_internal_reservation_dequeue(thread_reservations, resv); 1096 thread_reservation_count--; 1097 1098 lck_spin_unlock(&reservations_spin); 1099 1100 /* recalculate the thread's counter mask */ 1101 pmc_internal_update_thread_flag(resv->thread, FALSE); 1102 1103 break; 1104 } 1105} 1106 1107/* Reservation State Machine 1108 * 1109 * The PMC subsystem uses a 3-tuple of state information packed into a 32-bit quantity and a 1110 * set of 9 events to provide MP-safe bookkeeping and control flow. The 3-tuple is comprised 1111 * of a state, a count of active contexts, and a set of modifier flags. A state machine defines 1112 * the possible transitions at each event point given the current 3-tuple. Atomicity is handled 1113 * by reading the current 3-tuple, applying the transformations indicated by the state machine 1114 * and then attempting to OSCompareAndSwap the transformed value. If the OSCompareAndSwap fails, 1115 * the process is repeated until either the OSCompareAndSwap succeeds or not valid transitions are 1116 * available. 1117 * 1118 * The state machine is described using tuple notation for the current state and a related notation 1119 * for describing the transformations. For concisness, the flag and state names are abbreviated as 1120 * follows: 1121 * 1122 * states: 1123 * S = STOP 1124 * CR = CAN_RUN 1125 * L = LOAD 1126 * R = RUN 1127 * ST = STORE 1128 * I = INTERRUPT 1129 * D = DEALLOC 1130 * 1131 * flags: 1132 * 1133 * S = STOPPING 1134 * D = DEALLOCING 1135 * I = INTERRUPTING 1136 * 1137 * The tuple notation is formed from the following pattern: 1138 * 1139 * tuple = < state, active-context-count, flags > 1140 * state = S | CR | L | R | ST | I | D 1141 * active-context-count = 0 | >0 | 1 | >1 1142 * flags = flags flag | blank 1143 * flag = S | D | I 1144 * 1145 * The transform notation is similar, but only describes the modifications made to the current state. 1146 * The notation is formed from the following pattern: 1147 * 1148 * transform = < state, active-context-count, flags > 1149 * state = S | CR | L | R | ST | I | D 1150 * active-context-count = + | - | blank 1151 * flags = flags flag | flags !flag | blank 1152 * flag = S | D | I 1153 * 1154 * And now for the state machine: 1155 * State Start Stop Free Interrupt End Interrupt Context In Context Out Load Finished Store Finished 1156 * <CR, 0, > <S, , > <D, , > <L, +, > 1157 * <D, 0, > 1158 * <D, 1, D> < , -, !D> 1159 * <D, >1, D> < , -, > 1160 * <I, 0, D> <D, , !D> 1161 * <I, 0, S> < , , !S> < , , !SD> <S, , !S> 1162 * <I, 0, > < , , S> < , , D> <CR, , > 1163 * <L, 1, D> <ST, -, > 1164 * <L, 1, ID> <ST, -, > 1165 * <L, 1, IS> < , , !SD> <ST, -, > 1166 * <L, 1, S> < , , !S> < , , !SD> <ST, -, > 1167 * <L, 1, > < , , S> < , , D> < , , IS> < , +, > <R, , > 1168 * <L, >1, D> < , -, > <R, -, > 1169 * <L, >1, ID> < , -, > <R, -, > 1170 * <L, >1, IS> < , , !SD> < , -, > <R, -, > 1171 * <L, >1, S> < , , !S> < , , !SD> < , -, > <R, -, > 1172 * <L, >1, > < , , S> < , , D> < , , IS> < , +, > < , -, > <R, , > 1173 * <R, 1, D> <ST, -, > 1174 * <R, 1, ID> <ST, -, > 1175 * <R, 1, IS> < , , !SD> <ST, -, > 1176 * <R, 1, S> < , , !S> < , , !SD> <ST, -, > 1177 * <R, 1, > < , , S> < , , D> < , , IS> < , +, > <ST, -, > 1178 * <R, >1, D> < , -, > 1179 * <R, >1, ID> < , -, > 1180 * <R, >1, IS> < , , !SD> < , -, > 1181 * <R, >1, S> < , , !S> < , , !SD> < , -, > 1182 * <R, >1, > < , , S> < , , D> < , , IS> < , +, > < , -, > 1183 * <S, 0, > <CR, , > <D, , > 1184 * <S, 1, ID> <I, -, !I> 1185 * <S, 1, IS> < , , !SD> <I, -, !I> 1186 * <S, 1, S> < , , !S> <D, , !SD> < , -, !S> 1187 * <S, 1, > < , , S> <D, , D> <L, +, > <CR, -, > 1188 * <S, >1, ID> < , -, > 1189 * <S, >1, IS> < , , !SD> < , -, > 1190 * <S, >1, S> < , , !S> <D, , !SD> < , -, > 1191 * <S, >1, > < , , S> <D, , D> <L, +, > < , -, > 1192 * <ST, 0, D> <D, , !D> 1193 * <ST, 0, ID> <I, , !I> 1194 * <ST, 0, IS> < , , !SD> <I, , !I> 1195 * <ST, 0, S> < , , !S> < , , !SD> <S, , !S> 1196 * <ST, 0, > < , , S> < , , D> < , , IS> < , +, > <CR, , > 1197 * <ST, >0, D> < , -, > <D, , > 1198 * <ST, >0, ID> < , -, > <S, , > 1199 * <ST, >0, IS> < , , !SD> < , -, > <S, , > 1200 * <ST, >0, S> < , , !S> < , , !SD> < , -, > <S, , > 1201 * <ST, >0, > < , , S> < , , D> < , , IS> < , +, > < , -, > <L, , > 1202 */ 1203 1204static uint32_t pmc_internal_reservation_next_state(uint32_t current_state, pmc_state_event_t event) { 1205 uint32_t new_state = PMC_STATE(PMC_STATE_STATE_INVALID, 0, 0); 1206 1207 switch (event) { 1208 case PMC_STATE_EVENT_START: 1209 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { 1210 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING): 1211 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING): 1212 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING): 1213 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING): 1214 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING): 1215 new_state = PMC_STATE_MODIFY(current_state, 0, 0, PMC_STATE_FLAGS_STOPPING); 1216 break; 1217 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0): 1218 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) { 1219 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0); 1220 } 1221 break; 1222 } 1223 break; 1224 case PMC_STATE_EVENT_STOP: 1225 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { 1226 case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0): 1227 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0); 1228 break; 1229 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0): 1230 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): 1231 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0): 1232 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): 1233 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_STOPPING, 0); 1234 break; 1235 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0): 1236 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) { 1237 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_STOPPING, 0); 1238 } 1239 break; 1240 } 1241 break; 1242 case PMC_STATE_EVENT_FREE: 1243 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { 1244 case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0): 1245 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0); 1246 break; 1247 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING): 1248 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): 1249 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING): 1250 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): 1251 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING): 1252 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): 1253 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): 1254 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING): 1255 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_DEALLOCING, PMC_STATE_FLAGS_STOPPING); 1256 break; 1257 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0): 1258 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): 1259 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0): 1260 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): 1261 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_DEALLOCING, 0); 1262 break; 1263 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING): 1264 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING, PMC_STATE_FLAGS_STOPPING); 1265 break; 1266 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0): 1267 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) { 1268 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING, 0); 1269 } else { 1270 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0); 1271 } 1272 break; 1273 } 1274 break; 1275 case PMC_STATE_EVENT_INTERRUPT: 1276 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { 1277 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): 1278 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0): 1279 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): 1280 new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING, 0); 1281 break; 1282 } 1283 break; 1284 case PMC_STATE_EVENT_END_OF_INTERRUPT: 1285 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { 1286 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_DEALLOCING): 1287 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, PMC_STATE_FLAGS_DEALLOCING); 1288 break; 1289 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING): 1290 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, PMC_STATE_FLAGS_STOPPING); 1291 break; 1292 case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0): 1293 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0); 1294 break; 1295 } 1296 break; 1297 case PMC_STATE_EVENT_CONTEXT_IN: 1298 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { 1299 case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0): 1300 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 1, 0, 0); 1301 break; 1302 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): 1303 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0): 1304 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): 1305 new_state = PMC_STATE_MODIFY(current_state, 1, 0, 0); 1306 break; 1307 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0): 1308 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) { 1309 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 1, 0, 0); 1310 } 1311 break; 1312 } 1313 break; 1314 case PMC_STATE_EVENT_CONTEXT_OUT: 1315 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { 1316 case PMC_STATE(PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING): 1317 if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) { 1318 new_state = PMC_STATE_MODIFY(current_state, -1, 0, PMC_STATE_FLAGS_DEALLOCING); 1319 } else { 1320 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); 1321 } 1322 break; 1323 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_DEALLOCING): 1324 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): 1325 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): 1326 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING): 1327 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): 1328 if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) { 1329 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); 1330 } 1331 break; 1332 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_DEALLOCING): 1333 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): 1334 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): 1335 case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING): 1336 case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0): 1337 if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) { 1338 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STORE, -1, 0, 0); 1339 } else { 1340 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); 1341 } 1342 break; 1343 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): 1344 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): 1345 if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) { 1346 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_INTERRUPT, -1, 0, PMC_STATE_FLAGS_INTERRUPTING); 1347 } else { 1348 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); 1349 } 1350 break; 1351 case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING): 1352 if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) { 1353 new_state = PMC_STATE_MODIFY(current_state, -1, 0, PMC_STATE_FLAGS_STOPPING); 1354 } else { 1355 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); 1356 } 1357 break; 1358 case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0): 1359 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) { 1360 if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) { 1361 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, -1, 0, 0); 1362 } else { 1363 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); 1364 } 1365 } 1366 break; 1367 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_DEALLOCING): 1368 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): 1369 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): 1370 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING): 1371 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): 1372 if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) { 1373 new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); 1374 } 1375 break; 1376 } 1377 break; 1378 case PMC_STATE_EVENT_LOAD_FINISHED: 1379 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { 1380 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_DEALLOCING): 1381 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): 1382 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): 1383 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING): 1384 if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) { 1385 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_RUN, -1, 0, 0); 1386 } else { 1387 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STORE, -1, 0, 0); 1388 } 1389 break; 1390 case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): 1391 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_RUN, 0, 0, 0); 1392 break; 1393 } 1394 break; 1395 case PMC_STATE_EVENT_STORE_FINISHED: 1396 switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { 1397 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_DEALLOCING): 1398 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) { 1399 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, PMC_STATE_FLAGS_DEALLOCING); 1400 } else { 1401 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0); 1402 } 1403 break; 1404 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): 1405 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): 1406 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) { 1407 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_INTERRUPT, 0, 0, PMC_STATE_FLAGS_INTERRUPTING); 1408 } else { 1409 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0); 1410 } 1411 break; 1412 case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING): 1413 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) { 1414 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, PMC_STATE_FLAGS_STOPPING); 1415 } else { 1416 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0); 1417 } 1418 break; 1419 case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): 1420 if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) { 1421 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0); 1422 } else { 1423 new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 0, 0, 0); 1424 } 1425 break; 1426 } 1427 break; 1428 } 1429 1430 return new_state; 1431} 1432 1433static uint32_t pmc_internal_reservation_move_for_event(pmc_reservation_t reservation, pmc_state_event_t event, pmc_state_t *old_state_out) { 1434 pmc_state_t oldState; 1435 pmc_state_t newState; 1436 1437 assert(reservation); 1438 1439 /* Determine what state change, if any, we need to do. Keep trying until either we succeed doing a transition 1440 * or the there is no valid move. 1441 */ 1442 do { 1443 oldState = reservation->state; 1444 newState = pmc_internal_reservation_next_state(oldState, event); 1445 } while (newState != PMC_STATE_INVALID && !OSCompareAndSwap(oldState, newState, &(reservation->state))); 1446 1447 if (newState != PMC_STATE_INVALID) { 1448 COUNTER_DEBUG("Moved reservation %p from state "PMC_STATE_FORMAT" to state "PMC_STATE_FORMAT" for event %s\n", reservation, PMC_STATE_ARGS(oldState), PMC_STATE_ARGS(newState), pmc_state_event_name(event)); 1449 } else { 1450 COUNTER_DEBUG("No valid moves for reservation %p in state "PMC_STATE_FORMAT" for event %s\n", reservation, PMC_STATE_ARGS(oldState), pmc_state_event_name(event)); 1451 } 1452 1453 if (old_state_out != NULL) { 1454 *old_state_out = oldState; 1455 } 1456 1457 return newState; 1458} 1459 1460static void pmc_internal_reservation_context_out(pmc_reservation_t reservation) { 1461 assert(reservation); 1462 pmc_state_t newState; 1463 pmc_state_t oldState; 1464 1465 /* Clear that the this reservation was active when this cpu did its last context in */ 1466 OSBitAndAtomic(~(1U << cpu_number()), &(reservation->active_last_context_in)); 1467 1468 /* Move the state machine */ 1469 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_CONTEXT_OUT, &oldState))) { 1470 return; 1471 } 1472 1473 /* Do any actions required based on the state change */ 1474 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_STORE && PMC_STATE_STATE(oldState) != PMC_STATE_STATE_STORE) { 1475 /* Just moved into STORE, so store the reservation. */ 1476 pmc_internal_reservation_store(reservation); 1477 } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) { 1478 /* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */ 1479 thread_wakeup((event_t)reservation); 1480 } 1481 1482} 1483 1484static void pmc_internal_reservation_context_in(pmc_reservation_t reservation) { 1485 assert(reservation); 1486 pmc_state_t oldState; 1487 pmc_state_t newState; 1488 1489 /* Move the state machine */ 1490 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_CONTEXT_IN, &oldState))) { 1491 return; 1492 } 1493 1494 /* Mark that the reservation was active when this cpu did its last context in */ 1495 OSBitOrAtomic(1U << cpu_number(), &(reservation->active_last_context_in)); 1496 1497 /* Do any actions required based on the state change */ 1498 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_LOAD && PMC_STATE_STATE(oldState) != PMC_STATE_STATE_LOAD) { 1499 /* Just moved into LOAD, so load the reservation. */ 1500 pmc_internal_reservation_load(reservation); 1501 } 1502 1503} 1504 1505static void pmc_internal_reservation_store(pmc_reservation_t reservation) { 1506 assert(reservation); 1507 assert(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_STORE); 1508 1509 assert(reservation->pmc); 1510 assert(reservation->config); 1511 1512 pmc_state_t newState; 1513 kern_return_t ret = KERN_SUCCESS; 1514 1515 pmc_t store_pmc = reservation->pmc; 1516 pmc_object_t store_pmc_obj = store_pmc->object; 1517 perf_monitor_t store_pm = store_pmc->monitor; 1518 1519 /* 1520 * Instruct the Perf Monitor that contains this counter to turn 1521 * off the global disable for this counter. 1522 */ 1523 ret = store_pm->methods.disable_counters(store_pm->object, &store_pmc_obj, 1); 1524 if(KERN_SUCCESS != ret) { 1525 COUNTER_DEBUG(" [error] disable_counters: 0x%x\n", ret); 1526 return; 1527 } 1528 1529 /* Instruct the counter to disable itself */ 1530 ret = store_pmc->methods.disable(store_pmc_obj); 1531 if(KERN_SUCCESS != ret) { 1532 COUNTER_DEBUG(" [error] disable: 0x%x\n", ret); 1533 } 1534 1535 /* store the counter value into the reservation's stored count */ 1536 ret = store_pmc->methods.get_count(store_pmc_obj, &reservation->value); 1537 if(KERN_SUCCESS != ret) { 1538 COUNTER_DEBUG(" [error] get_count: 0x%x\n", ret); 1539 return; 1540 } 1541 1542 /* Advance the state machine now that the STORE is finished */ 1543 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_STORE_FINISHED, NULL))) { 1544 return; 1545 } 1546 1547 /* Do any actions required based on the state change */ 1548 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_LOAD) { 1549 /* Just moved into LOAD, so load the reservation. */ 1550 pmc_internal_reservation_load(reservation); 1551 } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) { 1552 /* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */ 1553 thread_wakeup((event_t)reservation); 1554 } 1555 1556} 1557 1558static void pmc_internal_reservation_load(pmc_reservation_t reservation) { 1559 assert(reservation); 1560 assert(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_LOAD); 1561 1562 pmc_state_t newState; 1563 kern_return_t ret = KERN_SUCCESS; 1564 1565 assert(reservation->pmc); 1566 assert(reservation->config); 1567 1568 pmc_t load_pmc = reservation->pmc; 1569 pmc_object_t load_pmc_obj = load_pmc->object; 1570 perf_monitor_t load_pm = load_pmc->monitor; 1571 1572 /* Set the control register up with the stored configuration */ 1573 ret = load_pmc->methods.set_config(load_pmc_obj, reservation->config->object); 1574 if(KERN_SUCCESS != ret) { 1575 COUNTER_DEBUG(" [error] set_config: 0x%x\n", ret); 1576 return; 1577 } 1578 1579 /* load the counter value */ 1580 ret = load_pmc->methods.set_count(load_pmc_obj, reservation->value); 1581 if(KERN_SUCCESS != ret) { 1582 COUNTER_DEBUG(" [error] set_count: 0x%x\n", ret); 1583 return; 1584 } 1585 1586 /* Locally enable the counter */ 1587 ret = load_pmc->methods.enable(load_pmc_obj); 1588 if(KERN_SUCCESS != ret) { 1589 COUNTER_DEBUG(" [error] enable: 0x%x\n", ret); 1590 return; 1591 } 1592 1593 /* 1594 * Instruct the Perf Monitor containing the pmc to enable the 1595 * counter. 1596 */ 1597 ret = load_pm->methods.enable_counters(load_pm->object, &load_pmc_obj, 1); 1598 if(KERN_SUCCESS != ret) { 1599 COUNTER_DEBUG(" [error] enable_counters: 0x%x\n", ret); 1600 /* not on the hardware. */ 1601 return; 1602 } 1603 1604 /* Advance the state machine now that the STORE is finished */ 1605 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_LOAD_FINISHED, NULL))) { 1606 return; 1607 } 1608 1609 /* Do any actions required based on the state change */ 1610 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_STORE) { 1611 /* Just moved into STORE, so store the reservation. */ 1612 pmc_internal_reservation_store(reservation); 1613 } 1614 1615} 1616 1617/* 1618 * pmc_accessible_from_core will return TRUE if the given @pmc is directly 1619 * (e.g., hardware) readable from the given logical core. 1620 * 1621 * NOTE: This method is interrupt safe. 1622 */ 1623static inline boolean_t pmc_accessible_from_core(pmc_t pmc, uint32_t logicalCore) { 1624 boolean_t ret = FALSE; 1625 1626 assert(pmc); 1627 1628 ret = pmc->methods.accessible_from_core(pmc->object, logicalCore); 1629 1630 return ret; 1631} 1632 1633static void pmc_internal_reservation_start_cpu(void * arg) { 1634 pmc_reservation_t reservation = (pmc_reservation_t)arg; 1635 1636 assert(reservation); 1637 1638 1639 if (pmc_internal_reservation_matches_context(reservation)) { 1640 /* We are in context, but the reservation may have already had the context_in method run. Attempt 1641 * to set this cpu's bit in the active_last_context_in mask. If we set it, call context_in. 1642 */ 1643 uint32_t oldMask = OSBitOrAtomic(1U << cpu_number(), &(reservation->active_last_context_in)); 1644 1645 if ((oldMask & (1U << cpu_number())) == 0) { 1646 COUNTER_DEBUG("Starting already in-context reservation %p for cpu %d\n", reservation, cpu_number()); 1647 1648 pmc_internal_reservation_context_in(reservation); 1649 } 1650 } 1651} 1652 1653static void pmc_internal_reservation_stop_cpu(void * arg) { 1654 pmc_reservation_t reservation = (pmc_reservation_t)arg; 1655 1656 assert(reservation); 1657 1658 1659 if (pmc_internal_reservation_matches_context(reservation)) { 1660 COUNTER_DEBUG("Stopping in-context reservation %p for cpu %d\n", reservation, cpu_number()); 1661 1662 pmc_internal_reservation_context_out(reservation); 1663 } 1664} 1665 1666/*!fn 1667 * pmc_reservation_interrupt is called when a PMC reservation which was setup 1668 * with an interrupt threshold counts the requested number of events. When the 1669 * underlying counter hits the threshold, an interrupt is generated, and this 1670 * method is called. This method marks the reservation as stopped, and passes 1671 * control off to the user-registered callback method, along with the 1672 * reservation (so that the user can, for example, write a 0 to the counter, and 1673 * restart the reservation). 1674 * This method assumes the reservation has a valid pmc_config_t within. 1675 * 1676 * @param target The pmc_reservation_t that caused the interrupt. 1677 * @param refCon User specified reference constant. 1678 */ 1679static void pmc_reservation_interrupt(void *target, void *refCon) { 1680 pmc_reservation_t reservation = (pmc_reservation_t)target; 1681 pmc_state_t newState; 1682 uint64_t timeout; 1683 uint32_t spins; 1684 1685 assert(reservation); 1686 1687 /* Move the state machine */ 1688 if (PMC_STATE_INVALID == pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_INTERRUPT, NULL)) { 1689 return; 1690 } 1691 1692 /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching 1693 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu 1694 * on every cpu that can access the PMC. 1695 */ 1696 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu); 1697 1698 /* Spin waiting for the state to turn to INTERRUPT */ 1699 nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout); 1700 timeout += mach_absolute_time(); 1701 spins = 0; 1702 while (PMC_STATE_STATE(reservation->state) != PMC_STATE_STATE_INTERRUPT) { 1703 /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */ 1704 if (++spins > PMC_SPIN_THRESHOLD) { 1705 if (mach_absolute_time() > timeout) { 1706 pmc_spin_timeout_count++; 1707 assert(0); 1708 } 1709 } 1710 1711 cpu_pause(); 1712 } 1713 1714 assert(reservation->config); 1715 assert(reservation->config->method); 1716 1717 /* Call the registered callback handler */ 1718#if DEBUG_COUNTERS 1719 uint64_t start = mach_absolute_time(); 1720#endif /* DEBUG */ 1721 1722 (void)reservation->config->method(reservation, refCon); 1723 1724#if DEBUG_COUNTERS 1725 uint64_t end = mach_absolute_time(); 1726 if((end - start) > 5000ULL) { 1727 kprintf("%s - user method %p took %llu ns\n", __FUNCTION__, 1728 reservation->config->method, (end - start)); 1729 } 1730#endif 1731 1732 /* Move the state machine */ 1733 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_END_OF_INTERRUPT, NULL))) { 1734 return; 1735 } 1736 1737 /* Do any post-move actions necessary */ 1738 if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_CAN_RUN) { 1739 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_start_cpu); 1740 } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) { 1741 /* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */ 1742 thread_wakeup((event_t)reservation); 1743 } 1744} 1745 1746/* 1747 * Apple-private KPI for Apple kext's (IOProfileFamily) only 1748 */ 1749 1750#if 0 1751#pragma mark - 1752#pragma mark IOProfileFamily private KPI 1753#endif 1754 1755/* 1756 * perf_monitor_register registers a new Performance Monitor, and its associated 1757 * callback methods. The given perf_monitor_object_t is the first argument to 1758 * each callback when they are called. 1759 */ 1760kern_return_t perf_monitor_register(perf_monitor_object_t monitor, 1761 perf_monitor_methods_t *methods) { 1762 int cpu = -1; 1763 1764 COUNTER_DEBUG("registering perf monitor %p\n", monitor); 1765 1766 if(!monitor || !methods) { 1767 return KERN_INVALID_ARGUMENT; 1768 } 1769 1770 /* Protect against out-of-date driver kexts */ 1771 if(MACH_PERFMON_METHODS_VERSION != methods->perf_monitor_methods_version) { 1772 return KERN_INVALID_ARGUMENT; 1773 } 1774 1775 /* If the monitor requires idle notifications, ensure that it is 1776 * accessible from a single core only. 1777 */ 1778 if (methods->flags & PERFMON_FLAG_REQUIRES_IDLE_NOTIFICATIONS) { 1779 uint32_t *cores; 1780 size_t core_cnt; 1781 1782 if (KERN_SUCCESS == methods->accessible_cores(monitor, &cores, &core_cnt)) { 1783 /* 1784 * Guard against disabled cores - monitors will always match and 1785 * attempt registration, irrespective of 'cpus=x' boot-arg. 1786 */ 1787 if ((core_cnt == 1) && (cores[0] < (uint32_t)ml_get_max_cpus())) { 1788 cpu = cores[0]; 1789 } else { 1790 return KERN_INVALID_ARGUMENT; 1791 } 1792 } 1793 } 1794 1795 /* All methods are required */ 1796 if(!methods->accessible_cores | 1797 !methods->enable_counters || !methods->disable_counters || 1798 !methods->on_idle || !methods->on_idle_exit) { 1799 return KERN_INVALID_ARGUMENT; 1800 } 1801 1802 /* prevent dupes. */ 1803 perf_monitor_t dupe = perf_monitor_find(monitor); 1804 if(dupe) { 1805 COUNTER_DEBUG("Duplicate registration for %p\n", monitor); 1806 perf_monitor_deallocate(dupe); 1807 return KERN_FAILURE; 1808 } 1809 1810 perf_monitor_t pm = perf_monitor_alloc(); 1811 if(!pm) { 1812 return KERN_RESOURCE_SHORTAGE; 1813 } 1814 1815 /* initialize the object */ 1816 perf_monitor_init(pm, cpu); 1817 1818 /* copy in the registration info */ 1819 pm->object = monitor; 1820 memcpy(&(pm->methods), methods, sizeof(perf_monitor_methods_t)); 1821 1822 /* place it in the tracking queues */ 1823 perf_monitor_enqueue(pm); 1824 1825 /* debug it */ 1826 PRINT_PERF_MON(pm); 1827 1828 return KERN_SUCCESS; 1829} 1830 1831/* 1832 * perf_monitor_unregister unregisters a previously registered Perf Monitor, 1833 * looking it up by reference pointer (the same that was used in 1834 * perf_monitor_register()). 1835 */ 1836kern_return_t perf_monitor_unregister(perf_monitor_object_t monitor) { 1837 kern_return_t ret = KERN_FAILURE; 1838 1839 COUNTER_DEBUG("unregistering perf monitor %p\n", monitor); 1840 1841 if(!monitor) { 1842 return KERN_INVALID_ARGUMENT; 1843 } 1844 1845 perf_monitor_t pm = perf_monitor_find(monitor); 1846 if(pm) { 1847 /* Remove it from the queues. */ 1848 perf_monitor_dequeue(pm); 1849 1850 /* drop extra retain from find */ 1851 perf_monitor_deallocate(pm); 1852 1853 /* and release the object */ 1854 perf_monitor_deallocate(pm); 1855 1856 ret = KERN_SUCCESS; 1857 } else { 1858 COUNTER_DEBUG("could not find a registered pm that matches!\n"); 1859 } 1860 1861 return ret; 1862} 1863 1864/* 1865 * pmc_register registers a new PMC for use with the pmc subsystem. Each PMC is 1866 * associated with a Perf Monitor. Perf Monitors are looked up by the reference 1867 * pointer that was used to previously register them. 1868 * 1869 * PMCs are registered with a reference pointer (@pmc_object), and a set of 1870 * callback methods. When the given callback methods are called from xnu, the 1871 * first argument will always be the reference pointer used to register the PMC. 1872 * 1873 * NOTE: @monitor must have been successfully registered via 1874 * perf_monitor_register before this method will succeed. 1875 */ 1876kern_return_t pmc_register(perf_monitor_object_t monitor, pmc_object_t pmc_object, 1877 pmc_methods_t *methods, void *object) { 1878 1879 COUNTER_DEBUG("%p %p\n", monitor, pmc_object); 1880 1881 if(!monitor || !pmc_object || !methods || !object) { 1882 return KERN_INVALID_ARGUMENT; 1883 } 1884 1885 /* Prevent version mismatches */ 1886 if(MACH_PMC_METHODS_VERSION != methods->pmc_methods_version) { 1887 COUNTER_DEBUG("version mismatch\n"); 1888 return KERN_INVALID_ARGUMENT; 1889 } 1890 1891 /* All methods are required. */ 1892 if(!methods->create_config || 1893 !methods->free_config || 1894 !methods->config_set_value || 1895 !methods->config_set_threshold || 1896 !methods->config_set_handler || 1897 !methods->set_config || 1898 !methods->get_monitor || 1899 !methods->get_name || 1900 !methods->accessible_from_core || 1901 !methods->accessible_cores || 1902 !methods->get_count || 1903 !methods->set_count || 1904 !methods->disable || 1905 !methods->enable || 1906 !methods->open || 1907 !methods->close) { 1908 return KERN_INVALID_ARGUMENT; 1909 } 1910 1911 /* make sure this perf monitor object is already registered */ 1912 /* 1913 * NOTE: this adds a reference to the parent, so we'll have to drop it in 1914 * any failure code paths from here on out. 1915 */ 1916 perf_monitor_t pm = perf_monitor_find(monitor); 1917 if(!pm) { 1918 COUNTER_DEBUG("Could not find perf monitor for %p\n", monitor); 1919 return KERN_INVALID_ARGUMENT; 1920 } 1921 1922 /* make a new pmc */ 1923 pmc_t pmc = pmc_alloc(); 1924 if(!pmc) { 1925 /* drop the extra reference from perf_monitor_find() */ 1926 perf_monitor_deallocate(pm); 1927 return KERN_RESOURCE_SHORTAGE; 1928 } 1929 1930 /* init it */ 1931 pmc_init(pmc); 1932 1933 pmc->object = pmc_object; 1934 pmc->open_object = object; 1935 1936 /* copy the callbacks in */ 1937 memcpy(&(pmc->methods), methods, sizeof(pmc_methods_t)); 1938 1939 pmc->monitor = pm; 1940 1941 perf_monitor_add_pmc(pmc->monitor, pmc); 1942 1943 /* enqueue it in our tracking queue */ 1944 pmc_enqueue(pmc); 1945 1946 /* drop extra reference from perf_monitor_find() */ 1947 perf_monitor_deallocate(pm); 1948 1949 return KERN_SUCCESS; 1950} 1951 1952/* 1953 * pmc_unregister unregisters a previously registered PMC, looking it up by 1954 * reference point to *both* the Perf Monitor it was created with, and the PMC's 1955 * reference pointer itself. 1956 */ 1957kern_return_t pmc_unregister(perf_monitor_object_t monitor, pmc_object_t pmc_object) { 1958 COUNTER_DEBUG("%p %p\n", monitor, pmc_object); 1959 1960 if(!monitor || !pmc_object) { 1961 return KERN_INVALID_ARGUMENT; 1962 } 1963 1964 pmc_t pmc = pmc_find(pmc_object); 1965 if(!pmc) { 1966 COUNTER_DEBUG("Could not find a matching pmc.\n"); 1967 return KERN_FAILURE; 1968 } 1969 1970 /* remove it from the global queue */ 1971 pmc_dequeue(pmc); 1972 1973 perf_monitor_remove_pmc(pmc->monitor, pmc); 1974 1975 /* remove extra reference count from pmc_find() */ 1976 pmc_deallocate(pmc); 1977 1978 /* dealloc the pmc */ 1979 pmc_deallocate(pmc); 1980 1981 return KERN_SUCCESS; 1982} 1983 1984static void perf_monitor_reservation_add(perf_monitor_t monitor) { 1985 assert(monitor); 1986 OSIncrementAtomic(&(monitor->reservedCounters)); 1987} 1988 1989static void perf_monitor_reservation_remove(perf_monitor_t monitor) { 1990 assert(monitor); 1991 OSDecrementAtomic(&(monitor->reservedCounters)); 1992} 1993 1994#if 0 1995#pragma mark - 1996#pragma mark KPI 1997#endif 1998 1999/* 2000 * Begin in-kernel and in-kext KPI methods 2001 */ 2002 2003/* 2004 * pmc_create_config creates a new configuration area from a given @pmc. 2005 * 2006 * NOTE: This method is not interrupt safe. 2007 */ 2008kern_return_t pmc_create_config(pmc_t pmc, pmc_config_t *config) { 2009 pmc_config_t tmp = NULL; 2010 2011 if(!pmc || !config) { 2012 return KERN_INVALID_ARGUMENT; 2013 } 2014 2015 pmc_reference(pmc); 2016 2017 tmp = pmc_config_alloc(pmc); 2018 if(tmp) { 2019 tmp->object = pmc->methods.create_config(pmc->object); 2020 2021 if(!tmp->object) { 2022 pmc_config_free(pmc, tmp); 2023 tmp = NULL; 2024 } else { 2025 tmp->interrupt_after_value = 0ULL; 2026 tmp->method = NULL; 2027 tmp->refCon = NULL; 2028 } 2029 } 2030 2031 pmc_deallocate(pmc); 2032 2033 if(!tmp) { 2034 return KERN_RESOURCE_SHORTAGE; 2035 } 2036 2037 *config = tmp; 2038 2039 return KERN_SUCCESS; 2040} 2041 2042/* 2043 * pmc_free_config frees a configuration area created from a given @pmc 2044 * 2045 * NOTE: This method is not interrupt safe. 2046 */ 2047void pmc_free_config(pmc_t pmc, pmc_config_t config) { 2048 assert(pmc); 2049 assert(config); 2050 2051 pmc_reference(pmc); 2052 2053 pmc_config_free(pmc, config); 2054 2055 pmc_deallocate(pmc); 2056} 2057 2058/* 2059 * pmc_config_set_value sets up configuration area key-value pairs. These pairs 2060 * are to be either pre-known, or looked up via CoreProfile.framework. 2061 * 2062 * NOTE: This method is not interrupt safe. 2063 */ 2064kern_return_t pmc_config_set_value(pmc_t pmc, pmc_config_t config, 2065 uint8_t id, uint64_t value) { 2066 2067 kern_return_t ret = KERN_INVALID_ARGUMENT; 2068 2069 if(!pmc || !config) { 2070 return ret; 2071 } 2072 2073 pmc_reference(pmc); 2074 2075 ret = pmc->methods.config_set_value(config->object, id, value); 2076 2077 pmc_deallocate(pmc); 2078 2079 return ret; 2080} 2081 2082/* 2083 * pmc_config_set_interrupt_threshold modifies a config object, instructing 2084 * the pmc that it should generate a call to the given pmc_interrupt_method_t 2085 * after the counter counts @threshold events. 2086 * 2087 * PMC Threshold handler methods will have the pmc_reservation_t that generated the interrupt 2088 * as the first argument when the interrupt handler is invoked, and the given 2089 * @refCon (which may be NULL) as the second. 2090 * 2091 * See pmc_interrupt_method_t. 2092 * 2093 * NOTE: This method is not interrupt safe. 2094 */ 2095kern_return_t pmc_config_set_interrupt_threshold(pmc_t pmc, pmc_config_t config, 2096 uint64_t threshold, pmc_interrupt_method_t method, void *refCon) { 2097 kern_return_t ret = KERN_INVALID_ARGUMENT; 2098 2099 if(!config || !pmc) { 2100 return ret; 2101 } 2102 2103 assert(config); 2104 assert(pmc); 2105 2106 pmc_reference(pmc); 2107 2108 do { 2109 /* 2110 * We have a minor annoyance to side-step here. The driver layer expects 2111 * the config to never change once a reservation has been taken out with 2112 * it. However, in order to have the PMI method have the reservation as 2113 * the first argument (in order to allow the user-method to, for 2114 * example, write a 0 to it, and restart it), we need to create the 2115 * pmc_reservation_t before setting it up in the config object. 2116 * We overcome this by caching the method in the pmc_config_t stand-in, 2117 * and mutating the pmc_config_object_t just before returning a 2118 * reservation (in pmc_reserve() and friends, below). 2119 */ 2120 2121 /* might as well stash this away too. */ 2122 config->interrupt_after_value = threshold; 2123 config->method = method; 2124 config->refCon = refCon; 2125 2126 ret = KERN_SUCCESS; 2127 2128 }while(0); 2129 2130 pmc_deallocate(pmc); 2131 2132 return ret; 2133} 2134 2135/* 2136 * pmc_get_pmc_list returns an allocated list of pmc_t's, as well as the number 2137 * of pmc_t's returned. Callers should free this list with a call to 2138 * pmc_free_pmc_list(). 2139 * 2140 * NOTE: This method is not interrupt safe. 2141 */ 2142kern_return_t pmc_get_pmc_list(pmc_t **pmcs, size_t *pmcCount) { 2143 pmc_t *array = NULL; 2144 pmc_t pmc = NULL; 2145 size_t count = 0UL; 2146 2147 do { 2148 /* Copy down (to the stack) the count of perf counters */ 2149 vm_size_t size = perf_counters_count; 2150 2151 /* Allocate that sized chunk */ 2152 array = (pmc_t *)kalloc(sizeof(pmc_t) * size); 2153 if(!array) { 2154 return KERN_RESOURCE_SHORTAGE; 2155 } 2156 2157 /* Take the spin lock */ 2158 lck_spin_lock(&perf_counters_queue_spin); 2159 2160 /* verify the size didn't change while we were allocating */ 2161 if(size != perf_counters_count) { 2162 /* 2163 * queue size has changed between alloc and now - go back and 2164 * make another pass. 2165 */ 2166 2167 /* drop the lock */ 2168 lck_spin_unlock(&perf_counters_queue_spin); 2169 2170 /* free the block */ 2171 kfree(array, sizeof(pmc_t) * size); 2172 array = NULL; 2173 } 2174 2175 /* if we get here, and array is NULL, we try again. */ 2176 }while(!array); 2177 2178 /* copy the bits out */ 2179 queue_iterate(perf_counters_queue, pmc, pmc_t, link) { 2180 /* copy out the pointer */ 2181 array[count++] = pmc; 2182 } 2183 2184 lck_spin_unlock(&perf_counters_queue_spin); 2185 2186 /* return the list and the size */ 2187 *pmcs = array; 2188 *pmcCount = count; 2189 2190 return KERN_SUCCESS; 2191} 2192 2193/* 2194 * pmc_free_pmc_list frees an array of pmc_t that has been returned from 2195 * pmc_get_pmc_list. 2196 * 2197 * NOTE: This method is not interrupt safe. 2198 */ 2199void pmc_free_pmc_list(pmc_t *pmcs, size_t pmcCount) { 2200 if(pmcs && pmcCount) { 2201 COUNTER_DEBUG("pmcs: %p pmcCount: %lu\n", pmcs, pmcCount); 2202 2203 kfree(pmcs, pmcCount * sizeof(pmc_t)); 2204 } 2205} 2206 2207kern_return_t pmc_find_by_name(const char *name, pmc_t **pmcs, size_t *pmcCount) { 2208 kern_return_t ret = KERN_INVALID_ARGUMENT; 2209 2210 if(!name || !pmcs || !pmcCount) { 2211 return ret; 2212 } 2213 2214 pmc_t *list = NULL; 2215 size_t count = 0UL; 2216 2217 if(KERN_SUCCESS == (ret = pmc_get_pmc_list(&list, &count))) { 2218 size_t matchCount = 0UL, ii = 0UL, swapPtr = 0UL; 2219 size_t len = strlen(name); 2220 2221 for(ii = 0UL; ii < count; ii++) { 2222 const char *pmcName = pmc_get_name(list[ii]); 2223 2224 if(strlen(pmcName) < len) { 2225 /* 2226 * If the pmc name is shorter than the requested match, it's no 2227 * match, as we're looking for the most specific match(es). 2228 */ 2229 continue; 2230 } 2231 2232 if(0 == strncmp(name, pmcName, len)) { 2233 pmc_t temp = list[ii]; 2234 2235 // move matches to the head of the array. 2236 list[ii] = list[swapPtr]; 2237 list[swapPtr] = temp; 2238 swapPtr++; 2239 2240 // keep a count of the matches 2241 matchCount++; 2242 } 2243 } 2244 2245 if(matchCount) { 2246 /* 2247 * If we have matches, they are all at the head of the array, so 2248 * just allocate enough space for @matchCount pmc_t's, and copy the 2249 * head of the array to the new allocation. Then free the old 2250 * allocation. 2251 */ 2252 2253 pmc_t *result = (pmc_t *)kalloc(sizeof(pmc_t) * matchCount); 2254 if(result) { 2255 // copy the matches 2256 memcpy(result, list, sizeof(pmc_t) * matchCount); 2257 2258 ret = KERN_SUCCESS; 2259 } 2260 2261 pmc_free_pmc_list(list, count); 2262 2263 if(!result) { 2264 *pmcs = NULL; 2265 *pmcCount = 0UL; 2266 return KERN_RESOURCE_SHORTAGE; 2267 } 2268 2269 *pmcs = result; 2270 *pmcCount = matchCount; 2271 } else { 2272 *pmcs = NULL; 2273 *pmcCount = 0UL; 2274 } 2275 } 2276 2277 return ret; 2278} 2279 2280/* 2281 * pmc_get_name returns a pointer (not copied) to the human-readable name of the 2282 * given pmc. 2283 * 2284 * NOTE: Driver authors must take care to not allocate during this method, as 2285 * this method *IS* interrupt safe. 2286 */ 2287const char *pmc_get_name(pmc_t pmc) { 2288 assert(pmc); 2289 2290 const char *name = pmc->methods.get_name(pmc->object); 2291 2292 return name; 2293} 2294 2295/* 2296 * pmc_get_accessible_core_list returns a pointer to an array of logical core 2297 * numbers (as well as the size of that array) that represent the local cores 2298 * (hardware threads) from which the given @pmc can be accessed directly. 2299 * 2300 * NOTE: This method is interrupt safe. 2301 */ 2302kern_return_t pmc_get_accessible_core_list(pmc_t pmc, uint32_t **logicalCores, 2303 size_t *logicalCoreCt) { 2304 2305 kern_return_t ret = KERN_INVALID_ARGUMENT; 2306 2307 if(!pmc || !logicalCores || !logicalCoreCt) { 2308 return ret; 2309 } 2310 2311 ret = pmc->methods.accessible_cores(pmc->object, logicalCores, logicalCoreCt); 2312 2313 return ret; 2314} 2315 2316static boolean_t pmc_reservation_setup_pmi(pmc_reservation_t resv, pmc_config_t config) { 2317 assert(resv); 2318 assert(resv->pmc); 2319 assert(config); 2320 assert(config->object); 2321 2322 /* If there's no PMI to setup, return success */ 2323 if(config->interrupt_after_value && config->method) { 2324 2325 /* set the threshold */ 2326 kern_return_t ret = resv->pmc->methods.config_set_threshold(config->object, 2327 config->interrupt_after_value); 2328 2329 if(KERN_SUCCESS != ret) { 2330 /* 2331 * This is the most useful error message here, as this only happens 2332 * as a result of pmc_reserve*() 2333 */ 2334 COUNTER_DEBUG("Failed to set threshold for pmc %p\n", resv->pmc); 2335 return FALSE; 2336 } 2337 2338 if(KERN_SUCCESS != resv->pmc->methods.config_set_handler(config->object, 2339 (void *)resv, &pmc_reservation_interrupt, config->refCon)) { 2340 2341 COUNTER_DEBUG("Failed to set handler for pmc %p\n", resv->pmc); 2342 return FALSE; 2343 } 2344 } 2345 2346 return TRUE; 2347} 2348 2349/* 2350 * pmc_reserve will attempt to reserve the given @pmc, with a given 2351 * configuration object, for counting system-wide. This method will fail with 2352 * KERN_FAILURE if the given pmc is already reserved at any scope. 2353 * 2354 * This method consumes the given configuration object if it returns 2355 * KERN_SUCCESS. Any other return value indicates the caller 2356 * must free the config object via pmc_free_config(). 2357 * 2358 * NOTE: This method is NOT interrupt safe. 2359 */ 2360kern_return_t pmc_reserve(pmc_t pmc, pmc_config_t config, 2361 pmc_reservation_t *reservation) { 2362 2363 if(!pmc || !config || !reservation) { 2364 return KERN_INVALID_ARGUMENT; 2365 } 2366 2367 pmc_reservation_t resv = reservation_alloc(); 2368 if(!resv) { 2369 return KERN_RESOURCE_SHORTAGE; 2370 } 2371 2372 reservation_init(resv); 2373 2374 resv->flags |= PMC_FLAG_SCOPE_SYSTEM; 2375 resv->config = config; 2376 2377 if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) { 2378 resv->config = NULL; 2379 return KERN_FAILURE; 2380 } 2381 2382 /* enqueue reservation in proper place */ 2383 if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) { 2384 /* Prevent free of config object */ 2385 resv->config = NULL; 2386 2387 reservation_free(resv); 2388 return KERN_FAILURE; 2389 } 2390 2391 perf_monitor_reservation_add(pmc->monitor); 2392 2393 *reservation = resv; 2394 2395 return KERN_SUCCESS; 2396} 2397 2398/* 2399 * pmc_reserve_task will attempt to reserve the given @pmc with a given 2400 * configuration object, for counting when the given @task is running on any 2401 * logical core that can directly access the given @pmc. This method will fail 2402 * with KERN_FAILURE if the given pmc is already reserved at either system or 2403 * thread scope. 2404 * 2405 * This method consumes the given configuration object if it returns 2406 * KERN_SUCCESS. Any other return value indicates the caller 2407 * must free the config object via pmc_free_config(). 2408 * 2409 * NOTE: You can reserve the same pmc for N different tasks concurrently. 2410 * NOTE: This method is NOT interrupt safe. 2411 */ 2412kern_return_t pmc_reserve_task(pmc_t pmc, pmc_config_t config, 2413 task_t task, pmc_reservation_t *reservation) { 2414 2415 if(!pmc || !config || !reservation || !task) { 2416 return KERN_INVALID_ARGUMENT; 2417 } 2418 2419 if (!(pmc->monitor->methods.flags & PERFMON_FLAG_SUPPORTS_CONTEXT_SWITCHING)) { 2420 COUNTER_DEBUG("pmc %p cannot be context switched!\n", pmc); 2421 return KERN_INVALID_ARGUMENT; 2422 } 2423 2424 pmc_reservation_t resv = reservation_alloc(); 2425 if(!resv) { 2426 return KERN_RESOURCE_SHORTAGE; 2427 } 2428 2429 reservation_init(resv); 2430 2431 resv->flags |= PMC_FLAG_SCOPE_TASK; 2432 resv->task = task; 2433 2434 resv->config = config; 2435 2436 if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) { 2437 resv->config = NULL; 2438 return KERN_FAILURE; 2439 } 2440 2441 /* enqueue reservation in proper place */ 2442 if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) { 2443 /* Prevent free of config object */ 2444 resv->config = NULL; 2445 2446 reservation_free(resv); 2447 return KERN_FAILURE; 2448 } 2449 2450 perf_monitor_reservation_add(pmc->monitor); 2451 2452 *reservation = resv; 2453 2454 return KERN_SUCCESS; 2455} 2456 2457/* 2458 * pmc_reserve_thread will attempt to reserve the given @pmc with a given 2459 * configuration object, for counting when the given @thread is running on any 2460 * logical core that can directly access the given @pmc. This method will fail 2461 * with KERN_FAILURE if the given pmc is already reserved at either system or 2462 * task scope. 2463 * 2464 * This method consumes the given configuration object if it returns 2465 * KERN_SUCCESS. Any other return value indicates the caller 2466 * must free the config object via pmc_free_config(). 2467 * 2468 * NOTE: You can reserve the same pmc for N different threads concurrently. 2469 * NOTE: This method is NOT interrupt safe. 2470 */ 2471kern_return_t pmc_reserve_thread(pmc_t pmc, pmc_config_t config, 2472 thread_t thread, pmc_reservation_t *reservation) { 2473 if(!pmc || !config || !reservation || !thread) { 2474 return KERN_INVALID_ARGUMENT; 2475 } 2476 2477 if (!(pmc->monitor->methods.flags & PERFMON_FLAG_SUPPORTS_CONTEXT_SWITCHING)) { 2478 COUNTER_DEBUG("pmc %p cannot be context switched!\n", pmc); 2479 return KERN_INVALID_ARGUMENT; 2480 } 2481 2482 pmc_reservation_t resv = reservation_alloc(); 2483 if(!resv) { 2484 return KERN_RESOURCE_SHORTAGE; 2485 } 2486 2487 reservation_init(resv); 2488 2489 resv->flags |= PMC_FLAG_SCOPE_THREAD; 2490 resv->thread = thread; 2491 2492 resv->config = config; 2493 2494 if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) { 2495 resv->config = NULL; 2496 return KERN_FAILURE; 2497 } 2498 2499 /* enqueue reservation in proper place */ 2500 if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) { 2501 /* Prevent free of config object */ 2502 resv->config = NULL; 2503 2504 reservation_free(resv); 2505 return KERN_FAILURE; 2506 } 2507 2508 perf_monitor_reservation_add(pmc->monitor); 2509 2510 *reservation = resv; 2511 2512 return KERN_SUCCESS; 2513} 2514 2515/* 2516 * pmc_reservation_start instructs the given reservation to start counting as 2517 * soon as possible. 2518 * 2519 * NOTE: This method is interrupt safe. 2520 */ 2521kern_return_t pmc_reservation_start(pmc_reservation_t reservation) { 2522 pmc_state_t newState; 2523 2524 if(!reservation) { 2525 return KERN_INVALID_ARGUMENT; 2526 } 2527 2528 /* Move the state machine */ 2529 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_START, NULL))) { 2530 return KERN_FAILURE; 2531 } 2532 2533 /* If we are currently in an interrupt, don't bother to broadcast since it won't do anything now and the interrupt will 2534 * broadcast right before it leaves 2535 */ 2536 if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_INTERRUPT) { 2537 /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching 2538 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_start_cpu 2539 * on every cpu that can access the PMC. 2540 */ 2541 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_start_cpu); 2542 } 2543 2544 return KERN_SUCCESS; 2545} 2546 2547/* 2548 * pmc_reservation_stop instructs the given reservation to stop counting as 2549 * soon as possible. When this method returns, the pmc will be marked as stopping 2550 * and subsequent calls to pmc_reservation_start will succeed. This does not mean 2551 * that the pmc hardware has _actually_ stopped running. Assuming no other changes 2552 * to the reservation state, the pmc hardware _will_ stop shortly. 2553 * 2554 */ 2555kern_return_t pmc_reservation_stop(pmc_reservation_t reservation) { 2556 pmc_state_t newState; 2557 2558 if(!reservation) { 2559 return KERN_INVALID_ARGUMENT; 2560 } 2561 2562 /* Move the state machine */ 2563 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_STOP, NULL))) { 2564 return KERN_FAILURE; 2565 } 2566 2567 /* If we are currently in an interrupt, don't bother to broadcast since it won't do anything now and the interrupt will 2568 * broadcast right before it leaves. Similarly, if we just moved directly to STOP, don't bother broadcasting. 2569 */ 2570 if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_INTERRUPT && PMC_STATE_STATE(newState) != PMC_STATE_STATE_STOP) { 2571 /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching 2572 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu 2573 * on every cpu that can access the PMC. 2574 */ 2575 2576 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu); 2577 } 2578 2579 return KERN_SUCCESS; 2580} 2581 2582/* 2583 * pmc_reservation_read will read the event count associated with a reservation. 2584 * If the caller is current executing in a context that both a) matches the 2585 * reservation's context, and b) can access the reservation's pmc directly, the 2586 * value will be read from hardware. Otherwise, this returns the reservation's 2587 * stored value. 2588 * 2589 * NOTE: This method is interrupt safe. 2590 * NOTE: When not on the interrupt stack, this method may block. 2591 */ 2592kern_return_t pmc_reservation_read(pmc_reservation_t reservation, uint64_t *value) { 2593 kern_return_t ret = KERN_FAILURE; 2594 uint64_t timeout; 2595 uint32_t spins; 2596 2597 if(!reservation || !value) { 2598 return KERN_INVALID_ARGUMENT; 2599 } 2600 2601 nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout); 2602 timeout += mach_absolute_time(); 2603 spins = 0; 2604 do { 2605 uint32_t state = reservation->state; 2606 2607 if((PMC_STATE_STATE(state) == PMC_STATE_STATE_RUN)) { 2608 /* Attempt read from hardware via drivers. */ 2609 2610 assert(reservation->pmc); 2611 2612 ret = reservation->pmc->methods.get_count(reservation->pmc->object, value); 2613 2614 break; 2615 } else if ((PMC_STATE_STATE(state) == PMC_STATE_STATE_STORE) || 2616 (PMC_STATE_STATE(state) == PMC_STATE_STATE_LOAD)) { 2617 /* Spin */ 2618 /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */ 2619 if (++spins > PMC_SPIN_THRESHOLD) { 2620 if (mach_absolute_time() > timeout) { 2621 pmc_spin_timeout_count++; 2622 assert(0); 2623 } 2624 } 2625 2626 cpu_pause(); 2627 } else { 2628 break; 2629 } 2630 } while (1); 2631 2632 /* If the direct hardware read failed (for whatever reason) */ 2633 if(KERN_SUCCESS != ret) { 2634 /* Read stored value */ 2635 *value = reservation->value; 2636 } 2637 2638 return KERN_SUCCESS; 2639} 2640 2641/* 2642 * pmc_reservation_write will write the event count associated with a reservation. 2643 * If the caller is current executing in a context that both a) matches the 2644 * reservation's context, and b) can access the reservation's pmc directly, the 2645 * value will be written to hardware. Otherwise, this writes the reservation's 2646 * stored value. 2647 * 2648 * NOTE: This method is interrupt safe. 2649 * NOTE: When not on the interrupt stack, this method may block. 2650 */ 2651kern_return_t pmc_reservation_write(pmc_reservation_t reservation, uint64_t value) { 2652 kern_return_t ret = KERN_FAILURE; 2653 uint64_t timeout; 2654 uint32_t spins; 2655 2656 if(!reservation) { 2657 return KERN_INVALID_ARGUMENT; 2658 } 2659 2660 nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout); 2661 timeout += mach_absolute_time(); 2662 spins = 0; 2663 do { 2664 uint32_t state = reservation->state; 2665 2666 if((PMC_STATE_STATE(state) == PMC_STATE_STATE_RUN)) { 2667 /* Write to hardware via drivers. */ 2668 assert(reservation->pmc); 2669 2670 ret = reservation->pmc->methods.set_count(reservation->pmc->object, value); 2671 break; 2672 } else if ((PMC_STATE_STATE(state) == PMC_STATE_STATE_STORE) || 2673 (PMC_STATE_STATE(state) == PMC_STATE_STATE_LOAD)) { 2674 /* Spin */ 2675 /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */ 2676 if (++spins > PMC_SPIN_THRESHOLD) { 2677 if (mach_absolute_time() > timeout) { 2678 pmc_spin_timeout_count++; 2679 assert(0); 2680 } 2681 } 2682 2683 cpu_pause(); 2684 } else { 2685 break; 2686 } 2687 } while (1); 2688 2689 if(KERN_SUCCESS != ret) { 2690 /* Write stored value */ 2691 reservation->value = value; 2692 } 2693 2694 return KERN_SUCCESS; 2695} 2696 2697/* 2698 * pmc_reservation_free releases a reservation and all associated resources. 2699 * 2700 * NOTE: This method is NOT interrupt safe. 2701 */ 2702kern_return_t pmc_reservation_free(pmc_reservation_t reservation) { 2703 pmc_state_t newState; 2704 2705 if(!reservation) { 2706 return KERN_INVALID_ARGUMENT; 2707 } 2708 2709 perf_monitor_reservation_remove(reservation->pmc->monitor); 2710 2711 /* Move the state machine */ 2712 if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_FREE, NULL))) { 2713 return KERN_FAILURE; 2714 } 2715 2716 /* If we didn't move directly to DEALLOC, help things along */ 2717 if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_DEALLOC) { 2718 /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching 2719 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu 2720 * on every cpu that can access the PMC. 2721 */ 2722 pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu); 2723 } 2724 2725 /* Block until the reservation hits the <DEALLOC, 0, > state */ 2726 while (!(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(reservation->state) == 0 && PMC_STATE_FLAGS(reservation->state) == 0)) { 2727 assert_wait((event_t)reservation, THREAD_UNINT); 2728 thread_block(THREAD_CONTINUE_NULL); 2729 } 2730 2731 /* remove from queues */ 2732 pmc_internal_reservation_remove(reservation); 2733 2734 /* free reservation */ 2735 reservation_free(reservation); 2736 2737 return KERN_SUCCESS; 2738} 2739 2740/* 2741 * pmc_idle notifies eligible monitors of impending per-CPU idle, and can be used to save state. 2742 */ 2743boolean_t pmc_idle(void) { 2744 perf_monitor_t monitor = NULL; 2745 queue_head_t *cpu_queue; 2746 2747 lck_spin_lock(&perf_monitor_queue_spin); 2748 2749 if (cpu_monitor_queues) { 2750 cpu_queue = cpu_monitor_queues[cpu_number()]; 2751 2752 queue_iterate(cpu_queue, monitor, perf_monitor_t, cpu_link) { 2753 perf_monitor_methods_t *methods = &(monitor->methods); 2754 if ((methods->flags & PERFMON_FLAG_ALWAYS_ACTIVE) || (monitor->reservedCounters)) { 2755 methods->on_idle(monitor->object); 2756 } 2757 } 2758 } 2759 2760 lck_spin_unlock(&perf_monitor_queue_spin); 2761 2762 return TRUE; 2763} 2764 2765/* 2766 * pmc_idle_exit notifies eligible monitors of wake from idle; it can be used to restore state. 2767 */ 2768boolean_t pmc_idle_exit(void) { 2769 perf_monitor_t monitor = NULL; 2770 queue_head_t *cpu_queue; 2771 2772 lck_spin_lock(&perf_monitor_queue_spin); 2773 2774 if (cpu_monitor_queues) { 2775 cpu_queue = cpu_monitor_queues[cpu_number()]; 2776 2777 queue_iterate(cpu_queue, monitor, perf_monitor_t, cpu_link) { 2778 perf_monitor_methods_t *methods = &(monitor->methods); 2779 if ((methods->flags & PERFMON_FLAG_ALWAYS_ACTIVE) || (monitor->reservedCounters)) { 2780 methods->on_idle_exit(monitor->object); 2781 } 2782 } 2783 } 2784 2785 lck_spin_unlock(&perf_monitor_queue_spin); 2786 2787 return TRUE; 2788} 2789 2790/* 2791 * pmc_context_switch performs all context switching necessary to save all pmc 2792 * state associated with @oldThread (and the task to which @oldThread belongs), 2793 * as well as to restore all pmc state associated with @newThread (and the task 2794 * to which @newThread belongs). 2795 * 2796 * NOTE: This method IS interrupt safe. 2797 */ 2798boolean_t pmc_context_switch(thread_t oldThread, thread_t newThread) { 2799 pmc_reservation_t resv = NULL; 2800 uint32_t cpuNum = cpu_number(); 2801 2802 lck_spin_lock(&reservations_spin); 2803 2804 /* Save pmc states */ 2805 if (thread_reservation_count) { 2806 queue_iterate(thread_reservations, resv, pmc_reservation_t, link) { 2807 if ((oldThread == resv->thread) && pmc_accessible_from_core(resv->pmc, cpuNum)) { 2808 (void)pmc_internal_reservation_context_out(resv); 2809 } 2810 } 2811 } 2812 2813 if (task_reservation_count) { 2814 queue_iterate(task_reservations, resv, pmc_reservation_t, link) { 2815 if ((resv->task == oldThread->task) && pmc_accessible_from_core(resv->pmc, cpuNum)) { 2816 (void)pmc_internal_reservation_context_out(resv); 2817 } 2818 } 2819 } 2820 2821 /* Restore */ 2822 if (thread_reservation_count) { 2823 queue_iterate(thread_reservations, resv, pmc_reservation_t, link) { 2824 if ((resv->thread == newThread) && pmc_accessible_from_core(resv->pmc, cpuNum)) { 2825 (void)pmc_internal_reservation_context_in(resv); 2826 } 2827 } 2828 } 2829 2830 if (task_reservation_count) { 2831 queue_iterate(task_reservations, resv, pmc_reservation_t, link) { 2832 if ((resv->task == newThread->task) && pmc_accessible_from_core(resv->pmc, cpuNum)) { 2833 (void)pmc_internal_reservation_context_in(resv); 2834 } 2835 } 2836 } 2837 2838 lck_spin_unlock(&reservations_spin); 2839 2840 return TRUE; 2841} 2842 2843#else /* !CONFIG_COUNTERS */ 2844 2845#if 0 2846#pragma mark - 2847#pragma mark Dummy functions 2848#endif 2849 2850/* 2851 * In the case that someone has chosen not to include the PMC KPI in some 2852 * configuration, we still have exports for kexts, so we'll need to define stub 2853 * methods that return failures. 2854 */ 2855kern_return_t perf_monitor_register(perf_monitor_object_t monitor __unused, 2856 perf_monitor_methods_t *methods __unused) { 2857 return KERN_FAILURE; 2858} 2859 2860kern_return_t perf_monitor_unregister(perf_monitor_object_t monitor __unused) { 2861 return KERN_FAILURE; 2862} 2863 2864kern_return_t pmc_register(perf_monitor_object_t monitor __unused, 2865 pmc_object_t pmc __unused, pmc_methods_t *methods __unused, void *object __unused) { 2866 return KERN_FAILURE; 2867} 2868 2869kern_return_t pmc_unregister(perf_monitor_object_t monitor __unused, 2870 pmc_object_t pmc __unused) { 2871 return KERN_FAILURE; 2872} 2873 2874kern_return_t pmc_create_config(pmc_t pmc __unused, 2875 pmc_config_t *config __unused) { 2876 return KERN_FAILURE; 2877} 2878 2879void pmc_free_config(pmc_t pmc __unused, pmc_config_t config __unused) { 2880} 2881 2882kern_return_t pmc_config_set_value(pmc_t pmc __unused, 2883 pmc_config_t config __unused, uint8_t id __unused, 2884 uint64_t value __unused) { 2885 return KERN_FAILURE; 2886} 2887 2888kern_return_t pmc_config_set_interrupt_threshold(pmc_t pmc __unused, 2889 pmc_config_t config __unused, uint64_t threshold __unused, 2890 pmc_interrupt_method_t method __unused, void *refCon __unused) { 2891 return KERN_FAILURE; 2892} 2893 2894kern_return_t pmc_get_pmc_list(pmc_t **pmcs __unused, size_t *pmcCount __unused) { 2895 return KERN_FAILURE; 2896} 2897 2898void pmc_free_pmc_list(pmc_t *pmcs __unused, size_t pmcCount __unused) { 2899} 2900 2901kern_return_t pmc_find_by_name(const char *name __unused, pmc_t **pmcs __unused, 2902 size_t *pmcCount __unused) { 2903 return KERN_FAILURE; 2904} 2905 2906const char *pmc_get_name(pmc_t pmc __unused) { 2907 return ""; 2908} 2909 2910kern_return_t pmc_get_accessible_core_list(pmc_t pmc __unused, 2911 uint32_t **logicalCores __unused, size_t *logicalCoreCt __unused) { 2912 return KERN_FAILURE; 2913} 2914 2915kern_return_t pmc_reserve(pmc_t pmc __unused, 2916 pmc_config_t config __unused, pmc_reservation_t *reservation __unused) { 2917 return KERN_FAILURE; 2918} 2919 2920kern_return_t pmc_reserve_task(pmc_t pmc __unused, 2921 pmc_config_t config __unused, task_t task __unused, 2922 pmc_reservation_t *reservation __unused) { 2923 return KERN_FAILURE; 2924} 2925 2926kern_return_t pmc_reserve_thread(pmc_t pmc __unused, 2927 pmc_config_t config __unused, thread_t thread __unused, 2928 pmc_reservation_t *reservation __unused) { 2929 return KERN_FAILURE; 2930} 2931 2932kern_return_t pmc_reservation_start(pmc_reservation_t reservation __unused) { 2933 return KERN_FAILURE; 2934} 2935 2936kern_return_t pmc_reservation_stop(pmc_reservation_t reservation __unused) { 2937 return KERN_FAILURE; 2938} 2939 2940kern_return_t pmc_reservation_read(pmc_reservation_t reservation __unused, 2941 uint64_t *value __unused) { 2942 return KERN_FAILURE; 2943} 2944 2945kern_return_t pmc_reservation_write(pmc_reservation_t reservation __unused, 2946 uint64_t value __unused) { 2947 return KERN_FAILURE; 2948} 2949 2950kern_return_t pmc_reservation_free(pmc_reservation_t reservation __unused) { 2951 return KERN_FAILURE; 2952} 2953 2954 2955#endif /* !CONFIG_COUNTERS */ 2956