/* * Copyright (c) 2009 Apple Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this * file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_LICENSE_HEADER_END@ */ #include #include #include #include #include #include #include #include #include #include #if defined(__i386__) || defined(__x86_64__) #include #endif #if CONFIG_COUNTERS /* various debug logging enable */ #undef DEBUG_COUNTERS typedef uint8_t pmc_state_event_t; #define PMC_STATE_EVENT_START 0 #define PMC_STATE_EVENT_STOP 1 #define PMC_STATE_EVENT_FREE 2 #define PMC_STATE_EVENT_INTERRUPT 3 #define PMC_STATE_EVENT_END_OF_INTERRUPT 4 #define PMC_STATE_EVENT_CONTEXT_IN 5 #define PMC_STATE_EVENT_CONTEXT_OUT 6 #define PMC_STATE_EVENT_LOAD_FINISHED 7 #define PMC_STATE_EVENT_STORE_FINISHED 8 /* PMC spin timeouts */ #define PMC_SPIN_THRESHOLD 10 /* Number of spins to allow before checking mach_absolute_time() */ #define PMC_SPIN_TIMEOUT_US 10 /* Time in microseconds before the spin causes an assert */ uint64_t pmc_spin_timeout_count = 0; /* Number of times where a PMC spin loop causes a timeout */ #ifdef DEBUG_COUNTERS # include # define COUNTER_DEBUG(...) \ do { \ kprintf("[%s:%s][%u] ", __FILE__, __PRETTY_FUNCTION__, cpu_number()); \ kprintf(__VA_ARGS__); \ } while(0) # define PRINT_PERF_MON(x) \ do { \ kprintf("perfmon: %p (obj: %p refCt: %u switchable: %u)\n", \ x, x->object, x->useCount, \ (x->methods.flags & PERFMON_FLAG_SUPPORTS_CONTEXT_SWITCHING) ? \ 1 : 0); \ } while(0) static const char const * pmc_state_state_name(pmc_state_t state) { switch (PMC_STATE_STATE(state)) { case PMC_STATE_STATE_INVALID: return "INVALID"; case PMC_STATE_STATE_STOP: return "STOP"; case PMC_STATE_STATE_CAN_RUN: return "CAN_RUN"; case PMC_STATE_STATE_LOAD: return "LOAD"; case PMC_STATE_STATE_RUN: return "RUN"; case PMC_STATE_STATE_STORE: return "STORE"; case PMC_STATE_STATE_INTERRUPT: return "INTERRUPT"; case PMC_STATE_STATE_DEALLOC: return "DEALLOC"; default: return "UNKNOWN"; } } static const char const * pmc_state_event_name(pmc_state_event_t event) { switch (event) { case PMC_STATE_EVENT_START: return "START"; case PMC_STATE_EVENT_STOP: return "STOP"; case PMC_STATE_EVENT_FREE: return "FREE"; case PMC_STATE_EVENT_INTERRUPT: return "INTERRUPT"; case PMC_STATE_EVENT_END_OF_INTERRUPT: return "END OF INTERRUPT"; case PMC_STATE_EVENT_CONTEXT_IN: return "CONTEXT IN"; case PMC_STATE_EVENT_CONTEXT_OUT: return "CONTEXT OUT"; case PMC_STATE_EVENT_LOAD_FINISHED: return "LOAD_FINISHED"; case PMC_STATE_EVENT_STORE_FINISHED: return "STORE_FINISHED"; default: return "UNKNOWN"; } } # define PMC_STATE_FORMAT "<%s, %u, %s%s%s>" # define PMC_STATE_ARGS(x) pmc_state_state_name(x), PMC_STATE_CONTEXT_COUNT(x), ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_INTERRUPTING) ? "I" : ""), \ ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_STOPPING) ? "S" : ""), ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_DEALLOCING) ? "D" : "") #else # define COUNTER_DEBUG(...) # define PRINT_PERF_MON(x) # define PMC_STATE_FORMAT # define PMC_STATE_ARGS(x) #endif /*!struct * pmc_config is the data behind a pmc_config_t. * @member object A pointer to an instance of IOPerformanceCounterConfiguration * @member method A pointer to a method to call to handle PMI. * @member interrupt_after_value Cause a PMI after the counter counts this many * events. * @member refCon Passed to the @method method as the refCon argument. */ struct pmc_config { pmc_config_object_t object; volatile pmc_interrupt_method_t method; uint64_t interrupt_after_value; void *refCon; }; /* * Allocation Zones * * Two allocation zones - Perf zone small and Perf zone big. * Each zone has associated maximums, defined below. * The small zone is the max of the smallest allocation objects (all sizes on * K64): * perf_monitor_t - 48 bytes * perf_monitor_methods_t - 28 bytes * pmc_reservation_t - 48 bytes * pmc_config_t - 32 bytes * perf_small_zone unit size is (on K64) 48 bytes * perf_small_zone max count must be max number of perf monitors, plus (max * number of reservations * 2). The "*2" is because each reservation has a * pmc_config_t within. * * Big zone is max of the larger allocation units * pmc_t - 144 bytes * pmc_methods_t - 116 bytes * perf_big_zone unit size is (on K64) 144 bytes * perf_big_zone max count is the max number of PMCs we support. */ static zone_t perf_small_zone = NULL; #define MAX_PERF_SMALLS (256 + 8196 + 8196) #define PERF_SMALL_UNIT_SZ (MAX(MAX(sizeof(struct perf_monitor), \ sizeof(struct pmc_reservation)), sizeof(struct pmc_config))) static zone_t perf_big_zone = NULL; #define MAX_PERF_BIGS (1024) #define PERF_BIG_UNIT_SZ (sizeof(struct pmc)) /* * Locks and Lock groups */ static lck_grp_t *pmc_lock_grp = LCK_GRP_NULL; static lck_grp_attr_t *pmc_lock_grp_attr; static lck_attr_t *pmc_lock_attr; /* PMC tracking queue locks */ static lck_mtx_t cpu_monitor_queue_mutex; /* protects per-cpu queues at initialisation time */ static lck_spin_t perf_monitor_queue_spin; /* protects adding and removing from queue */ static lck_spin_t perf_counters_queue_spin; /* protects adding and removing from queue */ /* Reservation tracking queues lock */ static lck_spin_t reservations_spin; /* * Tracking queues * * Keeps track of registered perf monitors and perf counters */ static queue_head_t **cpu_monitor_queues = NULL; static queue_head_t *perf_monitors_queue = NULL; static volatile uint32_t perf_monitors_count = 0U; static queue_head_t *perf_counters_queue = NULL; static volatile uint32_t perf_counters_count = 0U; /* * Reservation queues * * Keeps track of all system, task, and thread-level reservations (both active and * inactive). * * We track them all here (rather than in their respective task or thread only) * so that we can inspect our tracking data directly (rather than peeking at * every task and thread) to determine if/when a new reservation would * constitute a conflict. */ static queue_head_t *system_reservations = NULL; static volatile uint32_t system_reservation_count = 0U; static queue_head_t *task_reservations = NULL; static volatile uint32_t task_reservation_count = 0U; static queue_head_t *thread_reservations = NULL; static volatile uint32_t thread_reservation_count = 0U; #if XNU_KERNEL_PRIVATE /* * init_pmc_locks creates and initializes all the locks and lock groups and lock * attributes required for the pmc sub-system. */ static void init_pmc_locks(void) { pmc_lock_attr = lck_attr_alloc_init(); assert(pmc_lock_attr); pmc_lock_grp_attr = lck_grp_attr_alloc_init(); assert(pmc_lock_grp_attr); pmc_lock_grp = lck_grp_alloc_init("pmc", pmc_lock_grp_attr); assert(pmc_lock_grp); lck_spin_init(&perf_monitor_queue_spin, pmc_lock_grp, pmc_lock_attr); lck_spin_init(&perf_counters_queue_spin, pmc_lock_grp, pmc_lock_attr); lck_spin_init(&reservations_spin, pmc_lock_grp, pmc_lock_attr); lck_mtx_init(&cpu_monitor_queue_mutex, pmc_lock_grp, pmc_lock_attr); } /* * init_pmc_zones initializes the allocation zones used by the pmc subsystem */ static void init_pmc_zones(void) { perf_small_zone = zinit(PERF_SMALL_UNIT_SZ, MAX_PERF_SMALLS * PERF_SMALL_UNIT_SZ, MAX_PERF_SMALLS, "pmc.small zone"); assert(perf_small_zone); perf_big_zone = zinit(PERF_BIG_UNIT_SZ, MAX_PERF_BIGS * PERF_BIG_UNIT_SZ, MAX_PERF_BIGS, "pmc.big zone"); assert(perf_big_zone); } /* * init_pmc_queues allocates and initializes the tracking queues for * registering and reserving individual pmcs and perf monitors. */ static void init_pmc_queues(void) { perf_monitors_queue = (queue_head_t*)kalloc(sizeof(queue_head_t)); assert(perf_monitors_queue); queue_init(perf_monitors_queue); perf_counters_queue = (queue_head_t*)kalloc(sizeof(queue_head_t)); assert(perf_counters_queue); queue_init(perf_counters_queue); system_reservations = (queue_head_t*)kalloc(sizeof(queue_t)); assert(system_reservations); queue_init(system_reservations); task_reservations = (queue_head_t*)kalloc(sizeof(queue_head_t)); assert(task_reservations); queue_init(task_reservations); thread_reservations = (queue_head_t*)kalloc(sizeof(queue_head_t)); assert(thread_reservations); queue_init(thread_reservations); } /* * pmc_bootstrap brings up all the necessary infrastructure required to use the * pmc sub-system. */ __private_extern__ void pmc_bootstrap(void) { /* build our alloc zones */ init_pmc_zones(); /* build the locks */ init_pmc_locks(); /* build our tracking queues */ init_pmc_queues(); } #endif /* XNU_KERNEL_PRIVATE */ /* * Perf Monitor Internals */ static perf_monitor_t perf_monitor_alloc(void) { /* perf monitors come from the perf small zone */ return (perf_monitor_t)zalloc(perf_small_zone); } static void perf_monitor_free(void *pm) { zfree(perf_small_zone, pm); } static void perf_monitor_init(perf_monitor_t pm, int cpu) { assert(pm); pm->object = NULL; bzero(&(pm->methods), sizeof(perf_monitor_methods_t)); pm->useCount = 1; /* initial retain count of 1, for caller */ pm->reservedCounters = 0; pm->cpu = cpu; pm->link.next = pm->link.prev = (queue_entry_t)NULL; pm->cpu_link.next = pm->cpu_link.prev = (queue_entry_t)NULL; } /* * perf_monitor_dequeue removes the given perf_monitor_t from the * perf_monitor_queue, thereby unregistering it with the system. */ static void perf_monitor_dequeue(perf_monitor_t pm) { lck_spin_lock(&perf_monitor_queue_spin); if (pm->methods.flags & PERFMON_FLAG_REQUIRES_IDLE_NOTIFICATIONS) { /* If this flag is set, the monitor is already validated to be * accessible from a single cpu only. */ queue_remove(cpu_monitor_queues[pm->cpu], pm, perf_monitor_t, cpu_link); } /* * remove the @pm object from the @perf_monitor_queue queue (it is of type * and has a field called @link that is the queue_link_t */ queue_remove(perf_monitors_queue, pm, perf_monitor_t, link); perf_monitors_count--; lck_spin_unlock(&perf_monitor_queue_spin); } /* * perf_monitor_enqueue adds the given perf_monitor_t to the perf_monitor_queue, * thereby registering it for use with the system. */ static void perf_monitor_enqueue(perf_monitor_t pm) { lck_mtx_lock(&cpu_monitor_queue_mutex); lck_spin_lock(&perf_monitor_queue_spin); if (pm->cpu >= 0) { /* Deferred initialisation; saves memory and permits ml_get_max_cpus() * to block until cpu initialisation is complete. */ if (!cpu_monitor_queues) { uint32_t max_cpus; queue_head_t **queues; uint32_t i; lck_spin_unlock(&perf_monitor_queue_spin); max_cpus = ml_get_max_cpus(); queues = (queue_head_t**)kalloc(sizeof(queue_head_t*) * max_cpus); assert(queues); for (i = 0; i < max_cpus; i++) { queue_head_t *queue = (queue_head_t*)kalloc(sizeof(queue_head_t)); assert(queue); queue_init(queue); queues[i] = queue; } lck_spin_lock(&perf_monitor_queue_spin); cpu_monitor_queues = queues; } queue_enter(cpu_monitor_queues[pm->cpu], pm, perf_monitor_t, cpu_link); } queue_enter(perf_monitors_queue, pm, perf_monitor_t, link); perf_monitors_count++; lck_spin_unlock(&perf_monitor_queue_spin); lck_mtx_unlock(&cpu_monitor_queue_mutex); } /* * perf_monitor_reference increments the reference count for the given * perf_monitor_t. */ static void perf_monitor_reference(perf_monitor_t pm) { assert(pm); OSIncrementAtomic(&(pm->useCount)); } /* * perf_monitor_deallocate decrements the reference count for the given * perf_monitor_t. If the reference count hits 0, the object is released back * to the perf_small_zone via a call to perf_monitor_free(). */ static void perf_monitor_deallocate(perf_monitor_t pm) { assert(pm); /* If we just removed the last reference count */ if(1 == OSDecrementAtomic(&(pm->useCount))) { /* Free the object */ perf_monitor_free(pm); } } /* * perf_monitor_find attempts to find a perf_monitor_t that corresponds to the * given C++ object pointer that was used when registering with the subsystem. * * If found, the method returns the perf_monitor_t with an extra reference * placed on the object (or NULL if not * found). * * NOTE: Caller must use perf_monitor_deallocate to remove the extra reference after * calling perf_monitor_find. */ static perf_monitor_t perf_monitor_find(perf_monitor_object_t monitor) { assert(monitor); perf_monitor_t element = NULL; perf_monitor_t found = NULL; lck_spin_lock(&perf_monitor_queue_spin); queue_iterate(perf_monitors_queue, element, perf_monitor_t, link) { if(element->object == monitor) { perf_monitor_reference(element); found = element; break; } } lck_spin_unlock(&perf_monitor_queue_spin); return found; } /* * perf_monitor_add_pmc adds a newly registered PMC to the perf monitor it is * associated with. */ static void perf_monitor_add_pmc(perf_monitor_t pm, pmc_t pmc __unused) { assert(pm); assert(pmc); /* Today, we merely add a reference count now that a new pmc is attached */ perf_monitor_reference(pm); } /* * perf_monitor_remove_pmc removes a newly *un*registered PMC from the perf * monitor it is associated with. */ static void perf_monitor_remove_pmc(perf_monitor_t pm, pmc_t pmc __unused) { assert(pm); assert(pmc); /* Today, we merely remove a reference count now that the pmc is detached */ perf_monitor_deallocate(pm); } /* * Perf Counter internals */ static pmc_t pmc_alloc(void) { return (pmc_t)zalloc(perf_big_zone); } static void pmc_free(void *pmc) { zfree(perf_big_zone, pmc); } /* * pmc_init initializes a newly allocated pmc_t */ static void pmc_init(pmc_t pmc) { assert(pmc); pmc->object = NULL; pmc->monitor = NULL; bzero(&pmc->methods, sizeof(pmc_methods_t)); /* One reference for the caller */ pmc->useCount = 1; } /* * pmc_reference increments the reference count of the given pmc_t */ static void pmc_reference(pmc_t pmc) { assert(pmc); OSIncrementAtomic(&(pmc->useCount)); } /* * pmc_deallocate decrements the reference count of the given pmc_t. If the * reference count hits zero, the given pmc_t is deallocated and released back * to the allocation zone. */ static void pmc_deallocate(pmc_t pmc) { assert(pmc); /* If we just removed the last reference count */ if(1 == OSDecrementAtomic(&(pmc->useCount))) { /* Free the pmc */ pmc_free(pmc); } } /* * pmc_dequeue removes the given, newly *un*registered pmc from the * perf_counters_queue. */ static void pmc_dequeue(pmc_t pmc) { lck_spin_lock(&perf_counters_queue_spin); queue_remove(perf_counters_queue, pmc, pmc_t, link); perf_counters_count--; lck_spin_unlock(&perf_counters_queue_spin); } /* * pmc_enqueue adds the given, newly registered pmc to the perf_counters_queue */ static void pmc_enqueue(pmc_t pmc) { lck_spin_lock(&perf_counters_queue_spin); queue_enter(perf_counters_queue, pmc, pmc_t, link); perf_counters_count++; lck_spin_unlock(&perf_counters_queue_spin); } /* * pmc_find attempts to locate a pmc_t that was registered with the given * pmc_object_t pointer. If found, it returns the pmc_t with an extra reference * which must be dropped by the caller by calling pmc_deallocate(). */ static pmc_t pmc_find(pmc_object_t object) { assert(object); lck_spin_lock(&perf_counters_queue_spin); pmc_t element = NULL; pmc_t found = NULL; queue_iterate(perf_counters_queue, element, pmc_t, link) { if(element->object == object) { pmc_reference(element); found = element; break; } } lck_spin_unlock(&perf_counters_queue_spin); return found; } /* * Config internals */ /* Allocate a pmc_config_t */ static pmc_config_t pmc_config_alloc(pmc_t pmc __unused) { return (pmc_config_t)zalloc(perf_small_zone); } /* Free a pmc_config_t, and underlying pmc_config_object_t (if needed) */ static void pmc_config_free(pmc_t pmc, pmc_config_t config) { assert(pmc); assert(config); if(config->object) { pmc->methods.free_config(pmc->object, config->object); config->object = NULL; } zfree(perf_small_zone, config); } static kern_return_t pmc_open(pmc_t pmc) { assert(pmc); assert(pmc->object); assert(pmc->open_object); return pmc->methods.open(pmc->object, pmc->open_object); } static kern_return_t pmc_close(pmc_t pmc) { assert(pmc); assert(pmc->object); assert(pmc->open_object); return pmc->methods.close(pmc->object, pmc->open_object); } /* * Reservation Internals */ static kern_return_t pmc_internal_reservation_set_pmc(pmc_reservation_t resv, pmc_t pmc); static void pmc_internal_reservation_store(pmc_reservation_t reservation); static void pmc_internal_reservation_load(pmc_reservation_t reservation); static pmc_reservation_t reservation_alloc(void) { /* pmc reservations come from the perf small zone */ return (pmc_reservation_t)zalloc(perf_small_zone); } /* * reservation_free deallocates and releases all resources associated with the * given pmc_reservation_t. This includes freeing the config used to create the * reservation, decrementing the reference count for the pmc used to create the * reservation, and deallocating the reservation's memory. */ static void reservation_free(pmc_reservation_t resv) { /* Free config */ if(resv->config) { assert(resv->pmc); pmc_free_config(resv->pmc, resv->config); resv->config = NULL; } /* release PMC */ (void)pmc_internal_reservation_set_pmc(resv, NULL); /* Free reservation */ zfree(perf_small_zone, resv); } /* * reservation_init initializes a newly created reservation. */ static void reservation_init(pmc_reservation_t resv) { assert(resv); resv->pmc = NULL; resv->config = NULL; resv->value = 0ULL; resv->flags = 0U; resv->state = PMC_STATE(PMC_STATE_STATE_STOP, 0, 0); resv->active_last_context_in = 0U; /* * Since this member is a union, we only need to set either the task * or thread to NULL. */ resv->task = TASK_NULL; } /* * pmc_internal_reservation_set_pmc sets the pmc associated with the reservation object. If * there was one set already, it is deallocated (reference is dropped) before * the new one is set. This methods increases the reference count of the given * pmc_t. * * NOTE: It is okay to pass NULL as the pmc_t - this will have the effect of * dropping the reference on any previously set pmc, and setting the reservation * to having no pmc set. */ static kern_return_t pmc_internal_reservation_set_pmc(pmc_reservation_t resv, pmc_t pmc) { assert(resv); if(resv->pmc) { (void)pmc_close(resv->pmc); pmc_deallocate(resv->pmc); resv->pmc = NULL; } resv->pmc = pmc; if(resv->pmc) { pmc_reference(resv->pmc); if(KERN_SUCCESS != pmc_open(resv->pmc)) { pmc_deallocate(resv->pmc); resv->pmc = NULL; return KERN_FAILURE; } } return KERN_SUCCESS; } /* * Used to place reservation into one of the system, task, and thread queues * Assumes the queue's spin lock is already held. */ static void pmc_internal_reservation_enqueue(queue_t queue, pmc_reservation_t resv) { assert(queue); assert(resv); queue_enter(queue, resv, pmc_reservation_t, link); } static void pmc_internal_reservation_dequeue(queue_t queue, pmc_reservation_t resv) { assert(queue); assert(resv); queue_remove(queue, resv, pmc_reservation_t, link); } /* Returns TRUE if the reservation applies to the current execution context */ static boolean_t pmc_internal_reservation_matches_context(pmc_reservation_t resv) { boolean_t ret = FALSE; assert(resv); if(PMC_FLAG_IS_SYSTEM_SCOPE(resv->flags)) { ret = TRUE; } else if(PMC_FLAG_IS_TASK_SCOPE(resv->flags)) { if(current_task() == resv->task) { ret = TRUE; } } else if(PMC_FLAG_IS_THREAD_SCOPE(resv->flags)) { if(current_thread() == resv->thread) { ret = TRUE; } } return ret; } /* * pmc_accessible_core_count returns the number of logical cores that can access * a given @pmc. 0 means every core in the system. */ static uint32_t pmc_accessible_core_count(pmc_t pmc) { assert(pmc); uint32_t *cores = NULL; size_t coreCt = 0UL; if(KERN_SUCCESS != pmc->methods.accessible_cores(pmc->object, &cores, &coreCt)) { coreCt = 0U; } return (uint32_t)coreCt; } /* spin lock for the queue must already be held */ /* * This method will inspect the task/thread of the reservation to see if it * matches the new incoming one (for thread/task reservations only). Will only * return TRUE if the task/thread matches. */ static boolean_t pmc_internal_reservation_queue_contains_pmc(queue_t queue, pmc_reservation_t resv) { assert(queue); assert(resv); boolean_t ret = FALSE; pmc_reservation_t tmp = NULL; queue_iterate(queue, tmp, pmc_reservation_t, link) { if(tmp->pmc == resv->pmc) { /* PMC matches - make sure scope matches first */ switch(PMC_FLAG_SCOPE(tmp->flags)) { case PMC_FLAG_SCOPE_SYSTEM: /* * Found a reservation in system queue with same pmc - always a * conflict. */ ret = TRUE; break; case PMC_FLAG_SCOPE_THREAD: /* * Found one in thread queue with the same PMC as the * argument. Only a conflict if argument scope isn't * thread or system, or the threads match. */ ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_THREAD) || (tmp->thread == resv->thread); if(!ret) { /* * so far, no conflict - check that the pmc that is * being reserved isn't accessible from more than * one core, if it is, we need to say it's already * taken. */ if(1 != pmc_accessible_core_count(tmp->pmc)) { ret = TRUE; } } break; case PMC_FLAG_SCOPE_TASK: /* * Follow similar semantics for task scope. */ ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_TASK) || (tmp->task == resv->task); if(!ret) { /* * so far, no conflict - check that the pmc that is * being reserved isn't accessible from more than * one core, if it is, we need to say it's already * taken. */ if(1 != pmc_accessible_core_count(tmp->pmc)) { ret = TRUE; } } break; } if(ret) break; } } return ret; } /* * pmc_internal_reservation_validate_for_pmc returns TRUE if the given reservation can be * added to its target queue without creating conflicts (target queue is * determined by the reservation's scope flags). Further, this method returns * FALSE if any level contains a reservation for a PMC that can be accessed from * more than just 1 core, and the given reservation also wants the same PMC. */ static boolean_t pmc_internal_reservation_validate_for_pmc(pmc_reservation_t resv) { assert(resv); boolean_t ret = TRUE; if(pmc_internal_reservation_queue_contains_pmc(system_reservations, resv) || pmc_internal_reservation_queue_contains_pmc(task_reservations, resv) || pmc_internal_reservation_queue_contains_pmc(thread_reservations, resv)) { ret = FALSE; } return ret; } static void pmc_internal_update_thread_flag(thread_t thread, boolean_t newFlag) { assert(thread); /* See if this thread needs it's PMC flag set */ pmc_reservation_t tmp = NULL; if(!newFlag) { /* * If the parent task just dropped its reservation, iterate the thread * reservations to see if we need to keep the pmc flag set for the given * thread or not. */ lck_spin_lock(&reservations_spin); queue_iterate(thread_reservations, tmp, pmc_reservation_t, link) { if(tmp->thread == thread) { newFlag = TRUE; break; } } lck_spin_unlock(&reservations_spin); } if(newFlag) { OSBitOrAtomic(THREAD_PMC_FLAG, &thread->t_chud); } else { OSBitAndAtomic(~(THREAD_PMC_FLAG), &thread->t_chud); } } /* * This operation is (worst case) O(N*M) where N is number of threads in the * given task, and M is the number of thread reservations in our system. */ static void pmc_internal_update_task_flag(task_t task, boolean_t newFlag) { assert(task); thread_t thread = NULL; if(newFlag) { OSBitOrAtomic(TASK_PMC_FLAG, &task->t_chud); } else { OSBitAndAtomic(~(TASK_PMC_FLAG), &task->t_chud); } task_lock(task); queue_iterate(&task->threads, thread, thread_t, task_threads) { /* propagate the task's mask down to each thread */ pmc_internal_update_thread_flag(thread, newFlag); } task_unlock(task); } /* * pmc_internal_reservation_add adds a reservation to the global tracking queues after * ensuring there are no reservation conflicts. To do this, it takes all the * spin locks for all the queue (to ensure no other core goes and adds a * reservation for the same pmc to a queue that has already been checked). */ static boolean_t pmc_internal_reservation_add(pmc_reservation_t resv) { assert(resv); boolean_t ret = FALSE; /* always lock all three in the same order */ lck_spin_lock(&reservations_spin); /* Check if the reservation can be added without conflicts */ if(pmc_internal_reservation_validate_for_pmc(resv)) { /* add reservation to appropriate scope */ switch(PMC_FLAG_SCOPE(resv->flags)) { case PMC_FLAG_SCOPE_SYSTEM: /* Simply add it to the system queue */ pmc_internal_reservation_enqueue(system_reservations, resv); system_reservation_count++; lck_spin_unlock(&reservations_spin); break; case PMC_FLAG_SCOPE_TASK: assert(resv->task); /* Not only do we enqueue it in our local queue for tracking */ pmc_internal_reservation_enqueue(task_reservations, resv); task_reservation_count++; lck_spin_unlock(&reservations_spin); /* update the task mask, and propagate it to existing threads */ pmc_internal_update_task_flag(resv->task, TRUE); break; /* Thread-switched counter */ case PMC_FLAG_SCOPE_THREAD: assert(resv->thread); /* * Works the same as a task-switched counter, only at * thread-scope */ pmc_internal_reservation_enqueue(thread_reservations, resv); thread_reservation_count++; lck_spin_unlock(&reservations_spin); pmc_internal_update_thread_flag(resv->thread, TRUE); break; } ret = TRUE; } else { lck_spin_unlock(&reservations_spin); } return ret; } static void pmc_internal_reservation_broadcast(pmc_reservation_t reservation, void (*action_func)(void *)) { uint32_t * cores; size_t core_cnt; /* Get the list of accessible cores */ if (KERN_SUCCESS == pmc_get_accessible_core_list(reservation->pmc, &cores, &core_cnt)) { boolean_t intrs_enabled = ml_set_interrupts_enabled(FALSE); /* Fast case: the PMC is only accessible from one core and we happen to be on it */ if (core_cnt == 1 && cores[0] == (uint32_t)cpu_number()) { action_func(reservation); } else { /* Call action_func on every accessible core */ #if defined(__i386__) || defined(__x86_64__) size_t ii; cpumask_t mask = 0; /* Build a mask for the accessible cores */ if (core_cnt > 0) { for (ii = 0; ii < core_cnt; ii++) { mask |= cpu_to_cpumask(cores[ii]); } } else { /* core_cnt = 0 really means all cpus */ mask = CPUMASK_ALL; } mp_cpus_call(mask, ASYNC, action_func, reservation); #else #error pmc_reservation_interrupt needs an inter-processor method invocation mechanism for this architecture #endif } ml_set_interrupts_enabled(intrs_enabled); } } /* * pmc_internal_reservation_remove removes the given reservation from the appropriate * reservation queue according to its scope. * * NOTE: The scope flag must have been set for this method to function. */ static void pmc_internal_reservation_remove(pmc_reservation_t resv) { assert(resv); /* * Due to the way the macros are written, we can't just blindly queue-remove * the reservation without knowing which queue it's in. We figure this out * using the reservation's scope flags. */ /* Lock the global spin lock */ lck_spin_lock(&reservations_spin); switch(PMC_FLAG_SCOPE(resv->flags)) { case PMC_FLAG_SCOPE_SYSTEM: pmc_internal_reservation_dequeue(system_reservations, resv); system_reservation_count--; lck_spin_unlock(&reservations_spin); break; case PMC_FLAG_SCOPE_TASK: /* remove from the global queue */ pmc_internal_reservation_dequeue(task_reservations, resv); task_reservation_count--; /* unlock the global */ lck_spin_unlock(&reservations_spin); /* Recalculate task's counter mask */ pmc_internal_update_task_flag(resv->task, FALSE); break; case PMC_FLAG_SCOPE_THREAD: pmc_internal_reservation_dequeue(thread_reservations, resv); thread_reservation_count--; lck_spin_unlock(&reservations_spin); /* recalculate the thread's counter mask */ pmc_internal_update_thread_flag(resv->thread, FALSE); break; } } /* Reservation State Machine * * The PMC subsystem uses a 3-tuple of state information packed into a 32-bit quantity and a * set of 9 events to provide MP-safe bookkeeping and control flow. The 3-tuple is comprised * of a state, a count of active contexts, and a set of modifier flags. A state machine defines * the possible transitions at each event point given the current 3-tuple. Atomicity is handled * by reading the current 3-tuple, applying the transformations indicated by the state machine * and then attempting to OSCompareAndSwap the transformed value. If the OSCompareAndSwap fails, * the process is repeated until either the OSCompareAndSwap succeeds or not valid transitions are * available. * * The state machine is described using tuple notation for the current state and a related notation * for describing the transformations. For concisness, the flag and state names are abbreviated as * follows: * * states: * S = STOP * CR = CAN_RUN * L = LOAD * R = RUN * ST = STORE * I = INTERRUPT * D = DEALLOC * * flags: * * S = STOPPING * D = DEALLOCING * I = INTERRUPTING * * The tuple notation is formed from the following pattern: * * tuple = < state, active-context-count, flags > * state = S | CR | L | R | ST | I | D * active-context-count = 0 | >0 | 1 | >1 * flags = flags flag | blank * flag = S | D | I * * The transform notation is similar, but only describes the modifications made to the current state. * The notation is formed from the following pattern: * * transform = < state, active-context-count, flags > * state = S | CR | L | R | ST | I | D * active-context-count = + | - | blank * flags = flags flag | flags !flag | blank * flag = S | D | I * * And now for the state machine: * State Start Stop Free Interrupt End Interrupt Context In Context Out Load Finished Store Finished * * * < , -, !D> * 1, D> < , -, > * * < , , !S> < , , !SD> * < , , S> < , , D> * * * < , , !SD> * < , , !S> < , , !SD> * < , , S> < , , D> < , , IS> < , +, > * 1, D> < , -, > * 1, ID> < , -, > * 1, IS> < , , !SD> < , -, > * 1, S> < , , !S> < , , !SD> < , -, > * 1, > < , , S> < , , D> < , , IS> < , +, > < , -, > * * * < , , !SD> * < , , !S> < , , !SD> * < , , S> < , , D> < , , IS> < , +, > * 1, D> < , -, > * 1, ID> < , -, > * 1, IS> < , , !SD> < , -, > * 1, S> < , , !S> < , , !SD> < , -, > * 1, > < , , S> < , , D> < , , IS> < , +, > < , -, > * * * < , , !SD> * < , , !S> < , -, !S> * < , , S> * 1, ID> < , -, > * 1, IS> < , , !SD> < , -, > * 1, S> < , , !S> < , -, > * 1, > < , , S> < , -, > * * * < , , !SD> * < , , !S> < , , !SD> * < , , S> < , , D> < , , IS> < , +, > * 0, D> < , -, > * 0, ID> < , -, > * 0, IS> < , , !SD> < , -, > * 0, S> < , , !S> < , , !SD> < , -, > * 0, > < , , S> < , , D> < , , IS> < , +, > < , -, > */ static uint32_t pmc_internal_reservation_next_state(uint32_t current_state, pmc_state_event_t event) { uint32_t new_state = PMC_STATE(PMC_STATE_STATE_INVALID, 0, 0); switch (event) { case PMC_STATE_EVENT_START: switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING): case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING): case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING): case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING): case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING): new_state = PMC_STATE_MODIFY(current_state, 0, 0, PMC_STATE_FLAGS_STOPPING); break; case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0): if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) { new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0); } break; } break; case PMC_STATE_EVENT_STOP: switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0): new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0); break; case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0): case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0): case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_STOPPING, 0); break; case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0): if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) { new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_STOPPING, 0); } break; } break; case PMC_STATE_EVENT_FREE: switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0): new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0); break; case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING): case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING): case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING): case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING): new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_DEALLOCING, PMC_STATE_FLAGS_STOPPING); break; case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0): case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0): case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_DEALLOCING, 0); break; case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING): new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING, PMC_STATE_FLAGS_STOPPING); break; case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0): if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) { new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING, 0); } else { new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0); } break; } break; case PMC_STATE_EVENT_INTERRUPT: switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0): case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING, 0); break; } break; case PMC_STATE_EVENT_END_OF_INTERRUPT: switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_DEALLOCING): new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, PMC_STATE_FLAGS_DEALLOCING); break; case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING): new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, PMC_STATE_FLAGS_STOPPING); break; case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0): new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0); break; } break; case PMC_STATE_EVENT_CONTEXT_IN: switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0): new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 1, 0, 0); break; case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0): case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): new_state = PMC_STATE_MODIFY(current_state, 1, 0, 0); break; case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0): if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) { new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 1, 0, 0); } break; } break; case PMC_STATE_EVENT_CONTEXT_OUT: switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { case PMC_STATE(PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING): if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) { new_state = PMC_STATE_MODIFY(current_state, -1, 0, PMC_STATE_FLAGS_DEALLOCING); } else { new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); } break; case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_DEALLOCING): case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING): case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) { new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); } break; case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_DEALLOCING): case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING): case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0): if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) { new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STORE, -1, 0, 0); } else { new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); } break; case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) { new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_INTERRUPT, -1, 0, PMC_STATE_FLAGS_INTERRUPTING); } else { new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); } break; case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING): if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) { new_state = PMC_STATE_MODIFY(current_state, -1, 0, PMC_STATE_FLAGS_STOPPING); } else { new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); } break; case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0): if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) { if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) { new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, -1, 0, 0); } else { new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); } } break; case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_DEALLOCING): case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING): case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) { new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); } break; } break; case PMC_STATE_EVENT_LOAD_FINISHED: switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_DEALLOCING): case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING): if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) { new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_RUN, -1, 0, 0); } else { new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STORE, -1, 0, 0); } break; case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_RUN, 0, 0, 0); break; } break; case PMC_STATE_EVENT_STORE_FINISHED: switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_DEALLOCING): if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) { new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, PMC_STATE_FLAGS_DEALLOCING); } else { new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0); } break; case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) { new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_INTERRUPT, 0, 0, PMC_STATE_FLAGS_INTERRUPTING); } else { new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0); } break; case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING): if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) { new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, PMC_STATE_FLAGS_STOPPING); } else { new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0); } break; case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) { new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0); } else { new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 0, 0, 0); } break; } break; } return new_state; } static uint32_t pmc_internal_reservation_move_for_event(pmc_reservation_t reservation, pmc_state_event_t event, pmc_state_t *old_state_out) { pmc_state_t oldState; pmc_state_t newState; assert(reservation); /* Determine what state change, if any, we need to do. Keep trying until either we succeed doing a transition * or the there is no valid move. */ do { oldState = reservation->state; newState = pmc_internal_reservation_next_state(oldState, event); } while (newState != PMC_STATE_INVALID && !OSCompareAndSwap(oldState, newState, &(reservation->state))); if (newState != PMC_STATE_INVALID) { COUNTER_DEBUG("Moved reservation %p from state "PMC_STATE_FORMAT" to state "PMC_STATE_FORMAT" for event %s\n", reservation, PMC_STATE_ARGS(oldState), PMC_STATE_ARGS(newState), pmc_state_event_name(event)); } else { COUNTER_DEBUG("No valid moves for reservation %p in state "PMC_STATE_FORMAT" for event %s\n", reservation, PMC_STATE_ARGS(oldState), pmc_state_event_name(event)); } if (old_state_out != NULL) { *old_state_out = oldState; } return newState; } static void pmc_internal_reservation_context_out(pmc_reservation_t reservation) { assert(reservation); pmc_state_t newState; pmc_state_t oldState; /* Clear that the this reservation was active when this cpu did its last context in */ OSBitAndAtomic(~(1U << cpu_number()), &(reservation->active_last_context_in)); /* Move the state machine */ if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_CONTEXT_OUT, &oldState))) { return; } /* Do any actions required based on the state change */ if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_STORE && PMC_STATE_STATE(oldState) != PMC_STATE_STATE_STORE) { /* Just moved into STORE, so store the reservation. */ pmc_internal_reservation_store(reservation); } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) { /* Wakeup any thread blocking for this reservation to hit */ thread_wakeup((event_t)reservation); } } static void pmc_internal_reservation_context_in(pmc_reservation_t reservation) { assert(reservation); pmc_state_t oldState; pmc_state_t newState; /* Move the state machine */ if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_CONTEXT_IN, &oldState))) { return; } /* Mark that the reservation was active when this cpu did its last context in */ OSBitOrAtomic(1U << cpu_number(), &(reservation->active_last_context_in)); /* Do any actions required based on the state change */ if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_LOAD && PMC_STATE_STATE(oldState) != PMC_STATE_STATE_LOAD) { /* Just moved into LOAD, so load the reservation. */ pmc_internal_reservation_load(reservation); } } static void pmc_internal_reservation_store(pmc_reservation_t reservation) { assert(reservation); assert(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_STORE); assert(reservation->pmc); assert(reservation->config); pmc_state_t newState; kern_return_t ret = KERN_SUCCESS; pmc_t store_pmc = reservation->pmc; pmc_object_t store_pmc_obj = store_pmc->object; perf_monitor_t store_pm = store_pmc->monitor; /* * Instruct the Perf Monitor that contains this counter to turn * off the global disable for this counter. */ ret = store_pm->methods.disable_counters(store_pm->object, &store_pmc_obj, 1); if(KERN_SUCCESS != ret) { COUNTER_DEBUG(" [error] disable_counters: 0x%x\n", ret); return; } /* Instruct the counter to disable itself */ ret = store_pmc->methods.disable(store_pmc_obj); if(KERN_SUCCESS != ret) { COUNTER_DEBUG(" [error] disable: 0x%x\n", ret); } /* store the counter value into the reservation's stored count */ ret = store_pmc->methods.get_count(store_pmc_obj, &reservation->value); if(KERN_SUCCESS != ret) { COUNTER_DEBUG(" [error] get_count: 0x%x\n", ret); return; } /* Advance the state machine now that the STORE is finished */ if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_STORE_FINISHED, NULL))) { return; } /* Do any actions required based on the state change */ if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_LOAD) { /* Just moved into LOAD, so load the reservation. */ pmc_internal_reservation_load(reservation); } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) { /* Wakeup any thread blocking for this reservation to hit */ thread_wakeup((event_t)reservation); } } static void pmc_internal_reservation_load(pmc_reservation_t reservation) { assert(reservation); assert(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_LOAD); pmc_state_t newState; kern_return_t ret = KERN_SUCCESS; assert(reservation->pmc); assert(reservation->config); pmc_t load_pmc = reservation->pmc; pmc_object_t load_pmc_obj = load_pmc->object; perf_monitor_t load_pm = load_pmc->monitor; /* Set the control register up with the stored configuration */ ret = load_pmc->methods.set_config(load_pmc_obj, reservation->config->object); if(KERN_SUCCESS != ret) { COUNTER_DEBUG(" [error] set_config: 0x%x\n", ret); return; } /* load the counter value */ ret = load_pmc->methods.set_count(load_pmc_obj, reservation->value); if(KERN_SUCCESS != ret) { COUNTER_DEBUG(" [error] set_count: 0x%x\n", ret); return; } /* Locally enable the counter */ ret = load_pmc->methods.enable(load_pmc_obj); if(KERN_SUCCESS != ret) { COUNTER_DEBUG(" [error] enable: 0x%x\n", ret); return; } /* * Instruct the Perf Monitor containing the pmc to enable the * counter. */ ret = load_pm->methods.enable_counters(load_pm->object, &load_pmc_obj, 1); if(KERN_SUCCESS != ret) { COUNTER_DEBUG(" [error] enable_counters: 0x%x\n", ret); /* not on the hardware. */ return; } /* Advance the state machine now that the STORE is finished */ if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_LOAD_FINISHED, NULL))) { return; } /* Do any actions required based on the state change */ if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_STORE) { /* Just moved into STORE, so store the reservation. */ pmc_internal_reservation_store(reservation); } } /* * pmc_accessible_from_core will return TRUE if the given @pmc is directly * (e.g., hardware) readable from the given logical core. * * NOTE: This method is interrupt safe. */ static inline boolean_t pmc_accessible_from_core(pmc_t pmc, uint32_t logicalCore) { boolean_t ret = FALSE; assert(pmc); ret = pmc->methods.accessible_from_core(pmc->object, logicalCore); return ret; } static void pmc_internal_reservation_start_cpu(void * arg) { pmc_reservation_t reservation = (pmc_reservation_t)arg; assert(reservation); if (pmc_internal_reservation_matches_context(reservation)) { /* We are in context, but the reservation may have already had the context_in method run. Attempt * to set this cpu's bit in the active_last_context_in mask. If we set it, call context_in. */ uint32_t oldMask = OSBitOrAtomic(1U << cpu_number(), &(reservation->active_last_context_in)); if ((oldMask & (1U << cpu_number())) == 0) { COUNTER_DEBUG("Starting already in-context reservation %p for cpu %d\n", reservation, cpu_number()); pmc_internal_reservation_context_in(reservation); } } } static void pmc_internal_reservation_stop_cpu(void * arg) { pmc_reservation_t reservation = (pmc_reservation_t)arg; assert(reservation); if (pmc_internal_reservation_matches_context(reservation)) { COUNTER_DEBUG("Stopping in-context reservation %p for cpu %d\n", reservation, cpu_number()); pmc_internal_reservation_context_out(reservation); } } /*!fn * pmc_reservation_interrupt is called when a PMC reservation which was setup * with an interrupt threshold counts the requested number of events. When the * underlying counter hits the threshold, an interrupt is generated, and this * method is called. This method marks the reservation as stopped, and passes * control off to the user-registered callback method, along with the * reservation (so that the user can, for example, write a 0 to the counter, and * restart the reservation). * This method assumes the reservation has a valid pmc_config_t within. * * @param target The pmc_reservation_t that caused the interrupt. * @param refCon User specified reference constant. */ static void pmc_reservation_interrupt(void *target, void *refCon) { pmc_reservation_t reservation = (pmc_reservation_t)target; pmc_state_t newState; uint64_t timeout; uint32_t spins; assert(reservation); /* Move the state machine */ if (PMC_STATE_INVALID == pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_INTERRUPT, NULL)) { return; } /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu * on every cpu that can access the PMC. */ pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu); /* Spin waiting for the state to turn to INTERRUPT */ nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout); timeout += mach_absolute_time(); spins = 0; while (PMC_STATE_STATE(reservation->state) != PMC_STATE_STATE_INTERRUPT) { /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */ if (++spins > PMC_SPIN_THRESHOLD) { if (mach_absolute_time() > timeout) { pmc_spin_timeout_count++; assert(0); } } cpu_pause(); } assert(reservation->config); assert(reservation->config->method); /* Call the registered callback handler */ #if DEBUG_COUNTERS uint64_t start = mach_absolute_time(); #endif /* DEBUG */ (void)reservation->config->method(reservation, refCon); #if DEBUG_COUNTERS uint64_t end = mach_absolute_time(); if((end - start) > 5000ULL) { kprintf("%s - user method %p took %llu ns\n", __FUNCTION__, reservation->config->method, (end - start)); } #endif /* Move the state machine */ if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_END_OF_INTERRUPT, NULL))) { return; } /* Do any post-move actions necessary */ if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_CAN_RUN) { pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_start_cpu); } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) { /* Wakeup any thread blocking for this reservation to hit */ thread_wakeup((event_t)reservation); } } /* * Apple-private KPI for Apple kext's (IOProfileFamily) only */ #if 0 #pragma mark - #pragma mark IOProfileFamily private KPI #endif /* * perf_monitor_register registers a new Performance Monitor, and its associated * callback methods. The given perf_monitor_object_t is the first argument to * each callback when they are called. */ kern_return_t perf_monitor_register(perf_monitor_object_t monitor, perf_monitor_methods_t *methods) { int cpu = -1; COUNTER_DEBUG("registering perf monitor %p\n", monitor); if(!monitor || !methods) { return KERN_INVALID_ARGUMENT; } /* Protect against out-of-date driver kexts */ if(MACH_PERFMON_METHODS_VERSION != methods->perf_monitor_methods_version) { return KERN_INVALID_ARGUMENT; } /* If the monitor requires idle notifications, ensure that it is * accessible from a single core only. */ if (methods->flags & PERFMON_FLAG_REQUIRES_IDLE_NOTIFICATIONS) { uint32_t *cores; size_t core_cnt; if (KERN_SUCCESS == methods->accessible_cores(monitor, &cores, &core_cnt)) { /* * Guard against disabled cores - monitors will always match and * attempt registration, irrespective of 'cpus=x' boot-arg. */ if ((core_cnt == 1) && (cores[0] < (uint32_t)ml_get_max_cpus())) { cpu = cores[0]; } else { return KERN_INVALID_ARGUMENT; } } } /* All methods are required */ if(!methods->accessible_cores | !methods->enable_counters || !methods->disable_counters || !methods->on_idle || !methods->on_idle_exit) { return KERN_INVALID_ARGUMENT; } /* prevent dupes. */ perf_monitor_t dupe = perf_monitor_find(monitor); if(dupe) { COUNTER_DEBUG("Duplicate registration for %p\n", monitor); perf_monitor_deallocate(dupe); return KERN_FAILURE; } perf_monitor_t pm = perf_monitor_alloc(); if(!pm) { return KERN_RESOURCE_SHORTAGE; } /* initialize the object */ perf_monitor_init(pm, cpu); /* copy in the registration info */ pm->object = monitor; memcpy(&(pm->methods), methods, sizeof(perf_monitor_methods_t)); /* place it in the tracking queues */ perf_monitor_enqueue(pm); /* debug it */ PRINT_PERF_MON(pm); return KERN_SUCCESS; } /* * perf_monitor_unregister unregisters a previously registered Perf Monitor, * looking it up by reference pointer (the same that was used in * perf_monitor_register()). */ kern_return_t perf_monitor_unregister(perf_monitor_object_t monitor) { kern_return_t ret = KERN_FAILURE; COUNTER_DEBUG("unregistering perf monitor %p\n", monitor); if(!monitor) { return KERN_INVALID_ARGUMENT; } perf_monitor_t pm = perf_monitor_find(monitor); if(pm) { /* Remove it from the queues. */ perf_monitor_dequeue(pm); /* drop extra retain from find */ perf_monitor_deallocate(pm); /* and release the object */ perf_monitor_deallocate(pm); ret = KERN_SUCCESS; } else { COUNTER_DEBUG("could not find a registered pm that matches!\n"); } return ret; } /* * pmc_register registers a new PMC for use with the pmc subsystem. Each PMC is * associated with a Perf Monitor. Perf Monitors are looked up by the reference * pointer that was used to previously register them. * * PMCs are registered with a reference pointer (@pmc_object), and a set of * callback methods. When the given callback methods are called from xnu, the * first argument will always be the reference pointer used to register the PMC. * * NOTE: @monitor must have been successfully registered via * perf_monitor_register before this method will succeed. */ kern_return_t pmc_register(perf_monitor_object_t monitor, pmc_object_t pmc_object, pmc_methods_t *methods, void *object) { COUNTER_DEBUG("%p %p\n", monitor, pmc_object); if(!monitor || !pmc_object || !methods || !object) { return KERN_INVALID_ARGUMENT; } /* Prevent version mismatches */ if(MACH_PMC_METHODS_VERSION != methods->pmc_methods_version) { COUNTER_DEBUG("version mismatch\n"); return KERN_INVALID_ARGUMENT; } /* All methods are required. */ if(!methods->create_config || !methods->free_config || !methods->config_set_value || !methods->config_set_threshold || !methods->config_set_handler || !methods->set_config || !methods->get_monitor || !methods->get_name || !methods->accessible_from_core || !methods->accessible_cores || !methods->get_count || !methods->set_count || !methods->disable || !methods->enable || !methods->open || !methods->close) { return KERN_INVALID_ARGUMENT; } /* make sure this perf monitor object is already registered */ /* * NOTE: this adds a reference to the parent, so we'll have to drop it in * any failure code paths from here on out. */ perf_monitor_t pm = perf_monitor_find(monitor); if(!pm) { COUNTER_DEBUG("Could not find perf monitor for %p\n", monitor); return KERN_INVALID_ARGUMENT; } /* make a new pmc */ pmc_t pmc = pmc_alloc(); if(!pmc) { /* drop the extra reference from perf_monitor_find() */ perf_monitor_deallocate(pm); return KERN_RESOURCE_SHORTAGE; } /* init it */ pmc_init(pmc); pmc->object = pmc_object; pmc->open_object = object; /* copy the callbacks in */ memcpy(&(pmc->methods), methods, sizeof(pmc_methods_t)); pmc->monitor = pm; perf_monitor_add_pmc(pmc->monitor, pmc); /* enqueue it in our tracking queue */ pmc_enqueue(pmc); /* drop extra reference from perf_monitor_find() */ perf_monitor_deallocate(pm); return KERN_SUCCESS; } /* * pmc_unregister unregisters a previously registered PMC, looking it up by * reference point to *both* the Perf Monitor it was created with, and the PMC's * reference pointer itself. */ kern_return_t pmc_unregister(perf_monitor_object_t monitor, pmc_object_t pmc_object) { COUNTER_DEBUG("%p %p\n", monitor, pmc_object); if(!monitor || !pmc_object) { return KERN_INVALID_ARGUMENT; } pmc_t pmc = pmc_find(pmc_object); if(!pmc) { COUNTER_DEBUG("Could not find a matching pmc.\n"); return KERN_FAILURE; } /* remove it from the global queue */ pmc_dequeue(pmc); perf_monitor_remove_pmc(pmc->monitor, pmc); /* remove extra reference count from pmc_find() */ pmc_deallocate(pmc); /* dealloc the pmc */ pmc_deallocate(pmc); return KERN_SUCCESS; } static void perf_monitor_reservation_add(perf_monitor_t monitor) { assert(monitor); OSIncrementAtomic(&(monitor->reservedCounters)); } static void perf_monitor_reservation_remove(perf_monitor_t monitor) { assert(monitor); OSDecrementAtomic(&(monitor->reservedCounters)); } #if 0 #pragma mark - #pragma mark KPI #endif /* * Begin in-kernel and in-kext KPI methods */ /* * pmc_create_config creates a new configuration area from a given @pmc. * * NOTE: This method is not interrupt safe. */ kern_return_t pmc_create_config(pmc_t pmc, pmc_config_t *config) { pmc_config_t tmp = NULL; if(!pmc || !config) { return KERN_INVALID_ARGUMENT; } pmc_reference(pmc); tmp = pmc_config_alloc(pmc); if(tmp) { tmp->object = pmc->methods.create_config(pmc->object); if(!tmp->object) { pmc_config_free(pmc, tmp); tmp = NULL; } else { tmp->interrupt_after_value = 0ULL; tmp->method = NULL; tmp->refCon = NULL; } } pmc_deallocate(pmc); if(!tmp) { return KERN_RESOURCE_SHORTAGE; } *config = tmp; return KERN_SUCCESS; } /* * pmc_free_config frees a configuration area created from a given @pmc * * NOTE: This method is not interrupt safe. */ void pmc_free_config(pmc_t pmc, pmc_config_t config) { assert(pmc); assert(config); pmc_reference(pmc); pmc_config_free(pmc, config); pmc_deallocate(pmc); } /* * pmc_config_set_value sets up configuration area key-value pairs. These pairs * are to be either pre-known, or looked up via CoreProfile.framework. * * NOTE: This method is not interrupt safe. */ kern_return_t pmc_config_set_value(pmc_t pmc, pmc_config_t config, uint8_t id, uint64_t value) { kern_return_t ret = KERN_INVALID_ARGUMENT; if(!pmc || !config) { return ret; } pmc_reference(pmc); ret = pmc->methods.config_set_value(config->object, id, value); pmc_deallocate(pmc); return ret; } /* * pmc_config_set_interrupt_threshold modifies a config object, instructing * the pmc that it should generate a call to the given pmc_interrupt_method_t * after the counter counts @threshold events. * * PMC Threshold handler methods will have the pmc_reservation_t that generated the interrupt * as the first argument when the interrupt handler is invoked, and the given * @refCon (which may be NULL) as the second. * * See pmc_interrupt_method_t. * * NOTE: This method is not interrupt safe. */ kern_return_t pmc_config_set_interrupt_threshold(pmc_t pmc, pmc_config_t config, uint64_t threshold, pmc_interrupt_method_t method, void *refCon) { kern_return_t ret = KERN_INVALID_ARGUMENT; if(!config || !pmc) { return ret; } assert(config); assert(pmc); pmc_reference(pmc); do { /* * We have a minor annoyance to side-step here. The driver layer expects * the config to never change once a reservation has been taken out with * it. However, in order to have the PMI method have the reservation as * the first argument (in order to allow the user-method to, for * example, write a 0 to it, and restart it), we need to create the * pmc_reservation_t before setting it up in the config object. * We overcome this by caching the method in the pmc_config_t stand-in, * and mutating the pmc_config_object_t just before returning a * reservation (in pmc_reserve() and friends, below). */ /* might as well stash this away too. */ config->interrupt_after_value = threshold; config->method = method; config->refCon = refCon; ret = KERN_SUCCESS; }while(0); pmc_deallocate(pmc); return ret; } /* * pmc_get_pmc_list returns an allocated list of pmc_t's, as well as the number * of pmc_t's returned. Callers should free this list with a call to * pmc_free_pmc_list(). * * NOTE: This method is not interrupt safe. */ kern_return_t pmc_get_pmc_list(pmc_t **pmcs, size_t *pmcCount) { pmc_t *array = NULL; pmc_t pmc = NULL; size_t count = 0UL; do { /* Copy down (to the stack) the count of perf counters */ vm_size_t size = perf_counters_count; /* Allocate that sized chunk */ array = (pmc_t *)kalloc(sizeof(pmc_t) * size); if(!array) { return KERN_RESOURCE_SHORTAGE; } /* Take the spin lock */ lck_spin_lock(&perf_counters_queue_spin); /* verify the size didn't change while we were allocating */ if(size != perf_counters_count) { /* * queue size has changed between alloc and now - go back and * make another pass. */ /* drop the lock */ lck_spin_unlock(&perf_counters_queue_spin); /* free the block */ kfree(array, sizeof(pmc_t) * size); array = NULL; } /* if we get here, and array is NULL, we try again. */ }while(!array); /* copy the bits out */ queue_iterate(perf_counters_queue, pmc, pmc_t, link) { /* copy out the pointer */ array[count++] = pmc; } lck_spin_unlock(&perf_counters_queue_spin); /* return the list and the size */ *pmcs = array; *pmcCount = count; return KERN_SUCCESS; } /* * pmc_free_pmc_list frees an array of pmc_t that has been returned from * pmc_get_pmc_list. * * NOTE: This method is not interrupt safe. */ void pmc_free_pmc_list(pmc_t *pmcs, size_t pmcCount) { if(pmcs && pmcCount) { COUNTER_DEBUG("pmcs: %p pmcCount: %lu\n", pmcs, pmcCount); kfree(pmcs, pmcCount * sizeof(pmc_t)); } } kern_return_t pmc_find_by_name(const char *name, pmc_t **pmcs, size_t *pmcCount) { kern_return_t ret = KERN_INVALID_ARGUMENT; if(!name || !pmcs || !pmcCount) { return ret; } pmc_t *list = NULL; size_t count = 0UL; if(KERN_SUCCESS == (ret = pmc_get_pmc_list(&list, &count))) { size_t matchCount = 0UL, ii = 0UL, swapPtr = 0UL; size_t len = strlen(name); for(ii = 0UL; ii < count; ii++) { const char *pmcName = pmc_get_name(list[ii]); if(strlen(pmcName) < len) { /* * If the pmc name is shorter than the requested match, it's no * match, as we're looking for the most specific match(es). */ continue; } if(0 == strncmp(name, pmcName, len)) { pmc_t temp = list[ii]; // move matches to the head of the array. list[ii] = list[swapPtr]; list[swapPtr] = temp; swapPtr++; // keep a count of the matches matchCount++; } } if(matchCount) { /* * If we have matches, they are all at the head of the array, so * just allocate enough space for @matchCount pmc_t's, and copy the * head of the array to the new allocation. Then free the old * allocation. */ pmc_t *result = (pmc_t *)kalloc(sizeof(pmc_t) * matchCount); if(result) { // copy the matches memcpy(result, list, sizeof(pmc_t) * matchCount); ret = KERN_SUCCESS; } pmc_free_pmc_list(list, count); if(!result) { *pmcs = NULL; *pmcCount = 0UL; return KERN_RESOURCE_SHORTAGE; } *pmcs = result; *pmcCount = matchCount; } else { *pmcs = NULL; *pmcCount = 0UL; } } return ret; } /* * pmc_get_name returns a pointer (not copied) to the human-readable name of the * given pmc. * * NOTE: Driver authors must take care to not allocate during this method, as * this method *IS* interrupt safe. */ const char *pmc_get_name(pmc_t pmc) { assert(pmc); const char *name = pmc->methods.get_name(pmc->object); return name; } /* * pmc_get_accessible_core_list returns a pointer to an array of logical core * numbers (as well as the size of that array) that represent the local cores * (hardware threads) from which the given @pmc can be accessed directly. * * NOTE: This method is interrupt safe. */ kern_return_t pmc_get_accessible_core_list(pmc_t pmc, uint32_t **logicalCores, size_t *logicalCoreCt) { kern_return_t ret = KERN_INVALID_ARGUMENT; if(!pmc || !logicalCores || !logicalCoreCt) { return ret; } ret = pmc->methods.accessible_cores(pmc->object, logicalCores, logicalCoreCt); return ret; } static boolean_t pmc_reservation_setup_pmi(pmc_reservation_t resv, pmc_config_t config) { assert(resv); assert(resv->pmc); assert(config); assert(config->object); /* If there's no PMI to setup, return success */ if(config->interrupt_after_value && config->method) { /* set the threshold */ kern_return_t ret = resv->pmc->methods.config_set_threshold(config->object, config->interrupt_after_value); if(KERN_SUCCESS != ret) { /* * This is the most useful error message here, as this only happens * as a result of pmc_reserve*() */ COUNTER_DEBUG("Failed to set threshold for pmc %p\n", resv->pmc); return FALSE; } if(KERN_SUCCESS != resv->pmc->methods.config_set_handler(config->object, (void *)resv, &pmc_reservation_interrupt, config->refCon)) { COUNTER_DEBUG("Failed to set handler for pmc %p\n", resv->pmc); return FALSE; } } return TRUE; } /* * pmc_reserve will attempt to reserve the given @pmc, with a given * configuration object, for counting system-wide. This method will fail with * KERN_FAILURE if the given pmc is already reserved at any scope. * * This method consumes the given configuration object if it returns * KERN_SUCCESS. Any other return value indicates the caller * must free the config object via pmc_free_config(). * * NOTE: This method is NOT interrupt safe. */ kern_return_t pmc_reserve(pmc_t pmc, pmc_config_t config, pmc_reservation_t *reservation) { if(!pmc || !config || !reservation) { return KERN_INVALID_ARGUMENT; } pmc_reservation_t resv = reservation_alloc(); if(!resv) { return KERN_RESOURCE_SHORTAGE; } reservation_init(resv); resv->flags |= PMC_FLAG_SCOPE_SYSTEM; resv->config = config; if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) { resv->config = NULL; return KERN_FAILURE; } /* enqueue reservation in proper place */ if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) { /* Prevent free of config object */ resv->config = NULL; reservation_free(resv); return KERN_FAILURE; } perf_monitor_reservation_add(pmc->monitor); *reservation = resv; return KERN_SUCCESS; } /* * pmc_reserve_task will attempt to reserve the given @pmc with a given * configuration object, for counting when the given @task is running on any * logical core that can directly access the given @pmc. This method will fail * with KERN_FAILURE if the given pmc is already reserved at either system or * thread scope. * * This method consumes the given configuration object if it returns * KERN_SUCCESS. Any other return value indicates the caller * must free the config object via pmc_free_config(). * * NOTE: You can reserve the same pmc for N different tasks concurrently. * NOTE: This method is NOT interrupt safe. */ kern_return_t pmc_reserve_task(pmc_t pmc, pmc_config_t config, task_t task, pmc_reservation_t *reservation) { if(!pmc || !config || !reservation || !task) { return KERN_INVALID_ARGUMENT; } if (!(pmc->monitor->methods.flags & PERFMON_FLAG_SUPPORTS_CONTEXT_SWITCHING)) { COUNTER_DEBUG("pmc %p cannot be context switched!\n", pmc); return KERN_INVALID_ARGUMENT; } pmc_reservation_t resv = reservation_alloc(); if(!resv) { return KERN_RESOURCE_SHORTAGE; } reservation_init(resv); resv->flags |= PMC_FLAG_SCOPE_TASK; resv->task = task; resv->config = config; if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) { resv->config = NULL; return KERN_FAILURE; } /* enqueue reservation in proper place */ if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) { /* Prevent free of config object */ resv->config = NULL; reservation_free(resv); return KERN_FAILURE; } perf_monitor_reservation_add(pmc->monitor); *reservation = resv; return KERN_SUCCESS; } /* * pmc_reserve_thread will attempt to reserve the given @pmc with a given * configuration object, for counting when the given @thread is running on any * logical core that can directly access the given @pmc. This method will fail * with KERN_FAILURE if the given pmc is already reserved at either system or * task scope. * * This method consumes the given configuration object if it returns * KERN_SUCCESS. Any other return value indicates the caller * must free the config object via pmc_free_config(). * * NOTE: You can reserve the same pmc for N different threads concurrently. * NOTE: This method is NOT interrupt safe. */ kern_return_t pmc_reserve_thread(pmc_t pmc, pmc_config_t config, thread_t thread, pmc_reservation_t *reservation) { if(!pmc || !config || !reservation || !thread) { return KERN_INVALID_ARGUMENT; } if (!(pmc->monitor->methods.flags & PERFMON_FLAG_SUPPORTS_CONTEXT_SWITCHING)) { COUNTER_DEBUG("pmc %p cannot be context switched!\n", pmc); return KERN_INVALID_ARGUMENT; } pmc_reservation_t resv = reservation_alloc(); if(!resv) { return KERN_RESOURCE_SHORTAGE; } reservation_init(resv); resv->flags |= PMC_FLAG_SCOPE_THREAD; resv->thread = thread; resv->config = config; if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) { resv->config = NULL; return KERN_FAILURE; } /* enqueue reservation in proper place */ if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) { /* Prevent free of config object */ resv->config = NULL; reservation_free(resv); return KERN_FAILURE; } perf_monitor_reservation_add(pmc->monitor); *reservation = resv; return KERN_SUCCESS; } /* * pmc_reservation_start instructs the given reservation to start counting as * soon as possible. * * NOTE: This method is interrupt safe. */ kern_return_t pmc_reservation_start(pmc_reservation_t reservation) { pmc_state_t newState; if(!reservation) { return KERN_INVALID_ARGUMENT; } /* Move the state machine */ if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_START, NULL))) { return KERN_FAILURE; } /* If we are currently in an interrupt, don't bother to broadcast since it won't do anything now and the interrupt will * broadcast right before it leaves */ if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_INTERRUPT) { /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_start_cpu * on every cpu that can access the PMC. */ pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_start_cpu); } return KERN_SUCCESS; } /* * pmc_reservation_stop instructs the given reservation to stop counting as * soon as possible. When this method returns, the pmc will be marked as stopping * and subsequent calls to pmc_reservation_start will succeed. This does not mean * that the pmc hardware has _actually_ stopped running. Assuming no other changes * to the reservation state, the pmc hardware _will_ stop shortly. * */ kern_return_t pmc_reservation_stop(pmc_reservation_t reservation) { pmc_state_t newState; if(!reservation) { return KERN_INVALID_ARGUMENT; } /* Move the state machine */ if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_STOP, NULL))) { return KERN_FAILURE; } /* If we are currently in an interrupt, don't bother to broadcast since it won't do anything now and the interrupt will * broadcast right before it leaves. Similarly, if we just moved directly to STOP, don't bother broadcasting. */ if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_INTERRUPT && PMC_STATE_STATE(newState) != PMC_STATE_STATE_STOP) { /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu * on every cpu that can access the PMC. */ pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu); } return KERN_SUCCESS; } /* * pmc_reservation_read will read the event count associated with a reservation. * If the caller is current executing in a context that both a) matches the * reservation's context, and b) can access the reservation's pmc directly, the * value will be read from hardware. Otherwise, this returns the reservation's * stored value. * * NOTE: This method is interrupt safe. * NOTE: When not on the interrupt stack, this method may block. */ kern_return_t pmc_reservation_read(pmc_reservation_t reservation, uint64_t *value) { kern_return_t ret = KERN_FAILURE; uint64_t timeout; uint32_t spins; if(!reservation || !value) { return KERN_INVALID_ARGUMENT; } nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout); timeout += mach_absolute_time(); spins = 0; do { uint32_t state = reservation->state; if((PMC_STATE_STATE(state) == PMC_STATE_STATE_RUN)) { /* Attempt read from hardware via drivers. */ assert(reservation->pmc); ret = reservation->pmc->methods.get_count(reservation->pmc->object, value); break; } else if ((PMC_STATE_STATE(state) == PMC_STATE_STATE_STORE) || (PMC_STATE_STATE(state) == PMC_STATE_STATE_LOAD)) { /* Spin */ /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */ if (++spins > PMC_SPIN_THRESHOLD) { if (mach_absolute_time() > timeout) { pmc_spin_timeout_count++; assert(0); } } cpu_pause(); } else { break; } } while (1); /* If the direct hardware read failed (for whatever reason) */ if(KERN_SUCCESS != ret) { /* Read stored value */ *value = reservation->value; } return KERN_SUCCESS; } /* * pmc_reservation_write will write the event count associated with a reservation. * If the caller is current executing in a context that both a) matches the * reservation's context, and b) can access the reservation's pmc directly, the * value will be written to hardware. Otherwise, this writes the reservation's * stored value. * * NOTE: This method is interrupt safe. * NOTE: When not on the interrupt stack, this method may block. */ kern_return_t pmc_reservation_write(pmc_reservation_t reservation, uint64_t value) { kern_return_t ret = KERN_FAILURE; uint64_t timeout; uint32_t spins; if(!reservation) { return KERN_INVALID_ARGUMENT; } nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout); timeout += mach_absolute_time(); spins = 0; do { uint32_t state = reservation->state; if((PMC_STATE_STATE(state) == PMC_STATE_STATE_RUN)) { /* Write to hardware via drivers. */ assert(reservation->pmc); ret = reservation->pmc->methods.set_count(reservation->pmc->object, value); break; } else if ((PMC_STATE_STATE(state) == PMC_STATE_STATE_STORE) || (PMC_STATE_STATE(state) == PMC_STATE_STATE_LOAD)) { /* Spin */ /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */ if (++spins > PMC_SPIN_THRESHOLD) { if (mach_absolute_time() > timeout) { pmc_spin_timeout_count++; assert(0); } } cpu_pause(); } else { break; } } while (1); if(KERN_SUCCESS != ret) { /* Write stored value */ reservation->value = value; } return KERN_SUCCESS; } /* * pmc_reservation_free releases a reservation and all associated resources. * * NOTE: This method is NOT interrupt safe. */ kern_return_t pmc_reservation_free(pmc_reservation_t reservation) { pmc_state_t newState; if(!reservation) { return KERN_INVALID_ARGUMENT; } perf_monitor_reservation_remove(reservation->pmc->monitor); /* Move the state machine */ if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_FREE, NULL))) { return KERN_FAILURE; } /* If we didn't move directly to DEALLOC, help things along */ if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_DEALLOC) { /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu * on every cpu that can access the PMC. */ pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu); } /* Block until the reservation hits the state */ while (!(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(reservation->state) == 0 && PMC_STATE_FLAGS(reservation->state) == 0)) { assert_wait((event_t)reservation, THREAD_UNINT); thread_block(THREAD_CONTINUE_NULL); } /* remove from queues */ pmc_internal_reservation_remove(reservation); /* free reservation */ reservation_free(reservation); return KERN_SUCCESS; } /* * pmc_idle notifies eligible monitors of impending per-CPU idle, and can be used to save state. */ boolean_t pmc_idle(void) { perf_monitor_t monitor = NULL; queue_head_t *cpu_queue; lck_spin_lock(&perf_monitor_queue_spin); if (cpu_monitor_queues) { cpu_queue = cpu_monitor_queues[cpu_number()]; queue_iterate(cpu_queue, monitor, perf_monitor_t, cpu_link) { perf_monitor_methods_t *methods = &(monitor->methods); if ((methods->flags & PERFMON_FLAG_ALWAYS_ACTIVE) || (monitor->reservedCounters)) { methods->on_idle(monitor->object); } } } lck_spin_unlock(&perf_monitor_queue_spin); return TRUE; } /* * pmc_idle_exit notifies eligible monitors of wake from idle; it can be used to restore state. */ boolean_t pmc_idle_exit(void) { perf_monitor_t monitor = NULL; queue_head_t *cpu_queue; lck_spin_lock(&perf_monitor_queue_spin); if (cpu_monitor_queues) { cpu_queue = cpu_monitor_queues[cpu_number()]; queue_iterate(cpu_queue, monitor, perf_monitor_t, cpu_link) { perf_monitor_methods_t *methods = &(monitor->methods); if ((methods->flags & PERFMON_FLAG_ALWAYS_ACTIVE) || (monitor->reservedCounters)) { methods->on_idle_exit(monitor->object); } } } lck_spin_unlock(&perf_monitor_queue_spin); return TRUE; } /* * pmc_context_switch performs all context switching necessary to save all pmc * state associated with @oldThread (and the task to which @oldThread belongs), * as well as to restore all pmc state associated with @newThread (and the task * to which @newThread belongs). * * NOTE: This method IS interrupt safe. */ boolean_t pmc_context_switch(thread_t oldThread, thread_t newThread) { pmc_reservation_t resv = NULL; uint32_t cpuNum = cpu_number(); lck_spin_lock(&reservations_spin); /* Save pmc states */ if (thread_reservation_count) { queue_iterate(thread_reservations, resv, pmc_reservation_t, link) { if ((oldThread == resv->thread) && pmc_accessible_from_core(resv->pmc, cpuNum)) { (void)pmc_internal_reservation_context_out(resv); } } } if (task_reservation_count) { queue_iterate(task_reservations, resv, pmc_reservation_t, link) { if ((resv->task == oldThread->task) && pmc_accessible_from_core(resv->pmc, cpuNum)) { (void)pmc_internal_reservation_context_out(resv); } } } /* Restore */ if (thread_reservation_count) { queue_iterate(thread_reservations, resv, pmc_reservation_t, link) { if ((resv->thread == newThread) && pmc_accessible_from_core(resv->pmc, cpuNum)) { (void)pmc_internal_reservation_context_in(resv); } } } if (task_reservation_count) { queue_iterate(task_reservations, resv, pmc_reservation_t, link) { if ((resv->task == newThread->task) && pmc_accessible_from_core(resv->pmc, cpuNum)) { (void)pmc_internal_reservation_context_in(resv); } } } lck_spin_unlock(&reservations_spin); return TRUE; } #else /* !CONFIG_COUNTERS */ #if 0 #pragma mark - #pragma mark Dummy functions #endif /* * In the case that someone has chosen not to include the PMC KPI in some * configuration, we still have exports for kexts, so we'll need to define stub * methods that return failures. */ kern_return_t perf_monitor_register(perf_monitor_object_t monitor __unused, perf_monitor_methods_t *methods __unused) { return KERN_FAILURE; } kern_return_t perf_monitor_unregister(perf_monitor_object_t monitor __unused) { return KERN_FAILURE; } kern_return_t pmc_register(perf_monitor_object_t monitor __unused, pmc_object_t pmc __unused, pmc_methods_t *methods __unused, void *object __unused) { return KERN_FAILURE; } kern_return_t pmc_unregister(perf_monitor_object_t monitor __unused, pmc_object_t pmc __unused) { return KERN_FAILURE; } kern_return_t pmc_create_config(pmc_t pmc __unused, pmc_config_t *config __unused) { return KERN_FAILURE; } void pmc_free_config(pmc_t pmc __unused, pmc_config_t config __unused) { } kern_return_t pmc_config_set_value(pmc_t pmc __unused, pmc_config_t config __unused, uint8_t id __unused, uint64_t value __unused) { return KERN_FAILURE; } kern_return_t pmc_config_set_interrupt_threshold(pmc_t pmc __unused, pmc_config_t config __unused, uint64_t threshold __unused, pmc_interrupt_method_t method __unused, void *refCon __unused) { return KERN_FAILURE; } kern_return_t pmc_get_pmc_list(pmc_t **pmcs __unused, size_t *pmcCount __unused) { return KERN_FAILURE; } void pmc_free_pmc_list(pmc_t *pmcs __unused, size_t pmcCount __unused) { } kern_return_t pmc_find_by_name(const char *name __unused, pmc_t **pmcs __unused, size_t *pmcCount __unused) { return KERN_FAILURE; } const char *pmc_get_name(pmc_t pmc __unused) { return ""; } kern_return_t pmc_get_accessible_core_list(pmc_t pmc __unused, uint32_t **logicalCores __unused, size_t *logicalCoreCt __unused) { return KERN_FAILURE; } kern_return_t pmc_reserve(pmc_t pmc __unused, pmc_config_t config __unused, pmc_reservation_t *reservation __unused) { return KERN_FAILURE; } kern_return_t pmc_reserve_task(pmc_t pmc __unused, pmc_config_t config __unused, task_t task __unused, pmc_reservation_t *reservation __unused) { return KERN_FAILURE; } kern_return_t pmc_reserve_thread(pmc_t pmc __unused, pmc_config_t config __unused, thread_t thread __unused, pmc_reservation_t *reservation __unused) { return KERN_FAILURE; } kern_return_t pmc_reservation_start(pmc_reservation_t reservation __unused) { return KERN_FAILURE; } kern_return_t pmc_reservation_stop(pmc_reservation_t reservation __unused) { return KERN_FAILURE; } kern_return_t pmc_reservation_read(pmc_reservation_t reservation __unused, uint64_t *value __unused) { return KERN_FAILURE; } kern_return_t pmc_reservation_write(pmc_reservation_t reservation __unused, uint64_t value __unused) { return KERN_FAILURE; } kern_return_t pmc_reservation_free(pmc_reservation_t reservation __unused) { return KERN_FAILURE; } #endif /* !CONFIG_COUNTERS */