1145256Sjkoshy/*- 2183266Sjkoshy * Copyright (c) 2003-2008 Joseph Koshy 3174395Sjkoshy * Copyright (c) 2007 The FreeBSD Foundation 4174395Sjkoshy * All rights reserved. 5145256Sjkoshy * 6174395Sjkoshy * Portions of this software were developed by A. Joseph Koshy under 7174395Sjkoshy * sponsorship from the FreeBSD Foundation and Google, Inc. 8174395Sjkoshy * 9145256Sjkoshy * Redistribution and use in source and binary forms, with or without 10145256Sjkoshy * modification, are permitted provided that the following conditions 11145256Sjkoshy * are met: 12145256Sjkoshy * 1. Redistributions of source code must retain the above copyright 13145256Sjkoshy * notice, this list of conditions and the following disclaimer. 14145256Sjkoshy * 2. Redistributions in binary form must reproduce the above copyright 15145256Sjkoshy * notice, this list of conditions and the following disclaimer in the 16145256Sjkoshy * documentation and/or other materials provided with the distribution. 17145256Sjkoshy * 18145256Sjkoshy * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 19145256Sjkoshy * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20145256Sjkoshy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21145256Sjkoshy * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 22145256Sjkoshy * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23145256Sjkoshy * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24145256Sjkoshy * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25145256Sjkoshy * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26145256Sjkoshy * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27145256Sjkoshy * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28145256Sjkoshy * SUCH DAMAGE. 29145256Sjkoshy */ 30145256Sjkoshy 31145256Sjkoshy#include <sys/cdefs.h> 32145256Sjkoshy__FBSDID("$FreeBSD: stable/11/sys/kern/kern_pmc.c 315285 2017-03-14 20:43:04Z mjg $"); 33145256Sjkoshy 34148562Sjkoshy#include "opt_hwpmc_hooks.h" 35148562Sjkoshy 36148562Sjkoshy#include <sys/types.h> 37233628Sfabient#include <sys/ctype.h> 38233628Sfabient#include <sys/param.h> 39233628Sfabient#include <sys/malloc.h> 40233628Sfabient#include <sys/kernel.h> 41233628Sfabient#include <sys/lock.h> 42233628Sfabient#include <sys/mutex.h> 43148562Sjkoshy#include <sys/pmc.h> 44145256Sjkoshy#include <sys/pmckern.h> 45145256Sjkoshy#include <sys/smp.h> 46219028Snetchild#include <sys/sysctl.h> 47233628Sfabient#include <sys/systm.h> 48145256Sjkoshy 49153072Sru#ifdef HWPMC_HOOKS 50219028SnetchildFEATURE(hwpmc_hooks, "Kernel support for HW PMC"); 51148562Sjkoshy#define PMC_KERNEL_VERSION PMC_VERSION 52148562Sjkoshy#else 53148562Sjkoshy#define PMC_KERNEL_VERSION 0 54148562Sjkoshy#endif 55145256Sjkoshy 56233628SfabientMALLOC_DECLARE(M_PMCHOOKS); 57233628SfabientMALLOC_DEFINE(M_PMCHOOKS, "pmchooks", "Memory space for PMC hooks"); 58233628Sfabient 59148562Sjkoshyconst int pmc_kernel_version = PMC_KERNEL_VERSION; 60148562Sjkoshy 61145256Sjkoshy/* Hook variable. */ 62315285Smjgint __read_mostly (*pmc_hook)(struct thread *td, int function, void *arg) = NULL; 63145256Sjkoshy 64145256Sjkoshy/* Interrupt handler */ 65315285Smjgint __read_mostly (*pmc_intr)(int cpu, struct trapframe *tf) = NULL; 66145256Sjkoshy 67148562Sjkoshy/* Bitmask of CPUs requiring servicing at hardclock time */ 68222813Sattiliovolatile cpuset_t pmc_cpumask; 69146799Sjkoshy 70145256Sjkoshy/* 71147191Sjkoshy * A global count of SS mode PMCs. When non-zero, this means that 72147191Sjkoshy * we have processes that are sampling the system as a whole. 73147191Sjkoshy */ 74147191Sjkoshyvolatile int pmc_ss_count; 75147191Sjkoshy 76147191Sjkoshy/* 77145256Sjkoshy * Since PMC(4) may not be loaded in the current kernel, the 78145256Sjkoshy * convention followed is that a non-NULL value of 'pmc_hook' implies 79145256Sjkoshy * the presence of this kernel module. 80145256Sjkoshy * 81145256Sjkoshy * This requires us to protect 'pmc_hook' with a 82145256Sjkoshy * shared (sx) lock -- thus making the process of calling into PMC(4) 83145256Sjkoshy * somewhat more expensive than a simple 'if' check and indirect call. 84145256Sjkoshy */ 85148562Sjkoshystruct sx pmc_sx; 86145256Sjkoshy 87233628Sfabient/* 88233628Sfabient * PMC Soft per cpu trapframe. 89233628Sfabient */ 90233628Sfabientstruct trapframe pmc_tf[MAXCPU]; 91233628Sfabient 92233628Sfabient/* 93233628Sfabient * PMC Soft use a global table to store registered events. 94233628Sfabient */ 95233628Sfabient 96233628SfabientSYSCTL_NODE(_kern, OID_AUTO, hwpmc, CTLFLAG_RW, 0, "HWPMC parameters"); 97233628Sfabient 98233628Sfabientstatic int pmc_softevents = 16; 99267992ShselaskySYSCTL_INT(_kern_hwpmc, OID_AUTO, softevents, CTLFLAG_RDTUN, 100233628Sfabient &pmc_softevents, 0, "maximum number of soft events"); 101233628Sfabient 102233628Sfabientstruct mtx pmc_softs_mtx; 103233628Sfabientint pmc_softs_count; 104233628Sfabientstruct pmc_soft **pmc_softs; 105233628Sfabient 106233628SfabientMTX_SYSINIT(pmc_soft_mtx, &pmc_softs_mtx, "pmc-softs", MTX_SPIN); 107233628Sfabient 108174395Sjkoshystatic void 109174395Sjkoshypmc_init_sx(void) 110174395Sjkoshy{ 111174395Sjkoshy sx_init_flags(&pmc_sx, "pmc-sx", SX_NOWITNESS); 112174395Sjkoshy} 113174395Sjkoshy 114174395SjkoshySYSINIT(pmcsx, SI_SUB_LOCK, SI_ORDER_MIDDLE, pmc_init_sx, NULL); 115174395Sjkoshy 116145256Sjkoshy/* 117183266Sjkoshy * Helper functions. 118145256Sjkoshy */ 119145256Sjkoshy 120183266Sjkoshy/* 121183266Sjkoshy * A note on the CPU numbering scheme used by the hwpmc(4) driver. 122183266Sjkoshy * 123183266Sjkoshy * CPUs are denoted using numbers in the range 0..[pmc_cpu_max()-1]. 124183266Sjkoshy * CPUs could be numbered "sparsely" in this range; the predicate 125183266Sjkoshy * `pmc_cpu_is_present()' is used to test whether a given CPU is 126183266Sjkoshy * physically present. 127183266Sjkoshy * 128183266Sjkoshy * Further, a CPU that is physically present may be administratively 129183266Sjkoshy * disabled or otherwise unavailable for use by hwpmc(4). The 130183266Sjkoshy * `pmc_cpu_is_active()' predicate tests for CPU usability. An 131183266Sjkoshy * "active" CPU participates in thread scheduling and can field 132183266Sjkoshy * interrupts raised by PMC hardware. 133183266Sjkoshy * 134183266Sjkoshy * On systems with hyperthreaded CPUs, multiple logical CPUs may share 135183266Sjkoshy * PMC hardware resources. For such processors one logical CPU is 136183266Sjkoshy * denoted as the primary owner of the in-CPU PMC resources. The 137183266Sjkoshy * pmc_cpu_is_primary() predicate is used to distinguish this primary 138183266Sjkoshy * CPU from the others. 139183266Sjkoshy */ 140183266Sjkoshy 141145256Sjkoshyint 142183266Sjkoshypmc_cpu_is_active(int cpu) 143183266Sjkoshy{ 144183266Sjkoshy#ifdef SMP 145183266Sjkoshy return (pmc_cpu_is_present(cpu) && 146222813Sattilio !CPU_ISSET(cpu, &hlt_cpus_mask)); 147183266Sjkoshy#else 148183266Sjkoshy return (1); 149183266Sjkoshy#endif 150183266Sjkoshy} 151183266Sjkoshy 152183266Sjkoshy/* Deprecated. */ 153183266Sjkoshyint 154145256Sjkoshypmc_cpu_is_disabled(int cpu) 155145256Sjkoshy{ 156183266Sjkoshy return (!pmc_cpu_is_active(cpu)); 157183266Sjkoshy} 158183266Sjkoshy 159183266Sjkoshyint 160183266Sjkoshypmc_cpu_is_present(int cpu) 161183266Sjkoshy{ 162145256Sjkoshy#ifdef SMP 163183266Sjkoshy return (!CPU_ABSENT(cpu)); 164145256Sjkoshy#else 165183266Sjkoshy return (1); 166145256Sjkoshy#endif 167145256Sjkoshy} 168145256Sjkoshy 169145256Sjkoshyint 170183266Sjkoshypmc_cpu_is_primary(int cpu) 171145256Sjkoshy{ 172145256Sjkoshy#ifdef SMP 173222813Sattilio return (!CPU_ISSET(cpu, &logical_cpus_mask)); 174145256Sjkoshy#else 175183266Sjkoshy return (1); 176145256Sjkoshy#endif 177145256Sjkoshy} 178183266Sjkoshy 179183266Sjkoshy 180183266Sjkoshy/* 181183266Sjkoshy * Return the maximum CPU number supported by the system. The return 182183266Sjkoshy * value is used for scaling internal data structures and for runtime 183183266Sjkoshy * checks. 184183266Sjkoshy */ 185183266Sjkoshyunsigned int 186183266Sjkoshypmc_cpu_max(void) 187183266Sjkoshy{ 188183266Sjkoshy#ifdef SMP 189183266Sjkoshy return (mp_maxid+1); 190183266Sjkoshy#else 191183266Sjkoshy return (1); 192183266Sjkoshy#endif 193183266Sjkoshy} 194183266Sjkoshy 195183266Sjkoshy#ifdef INVARIANTS 196183266Sjkoshy 197183266Sjkoshy/* 198183266Sjkoshy * Return the count of CPUs in the `active' state in the system. 199183266Sjkoshy */ 200183266Sjkoshyint 201183266Sjkoshypmc_cpu_max_active(void) 202183266Sjkoshy{ 203183266Sjkoshy#ifdef SMP 204183266Sjkoshy /* 205183266Sjkoshy * When support for CPU hot-plugging is added to the kernel, 206183266Sjkoshy * this function would change to return the current number 207183266Sjkoshy * of "active" CPUs. 208183266Sjkoshy */ 209183266Sjkoshy return (mp_ncpus); 210183266Sjkoshy#else 211183266Sjkoshy return (1); 212183266Sjkoshy#endif 213183266Sjkoshy} 214183266Sjkoshy 215183266Sjkoshy#endif 216233628Sfabient 217233628Sfabient/* 218233628Sfabient * Cleanup event name: 219233628Sfabient * - remove duplicate '_' 220233628Sfabient * - all uppercase 221233628Sfabient */ 222233628Sfabientstatic void 223233628Sfabientpmc_soft_namecleanup(char *name) 224233628Sfabient{ 225233628Sfabient char *p, *q; 226233628Sfabient 227233628Sfabient p = q = name; 228233628Sfabient 229233628Sfabient for ( ; *p == '_' ; p++) 230233628Sfabient ; 231233628Sfabient for ( ; *p ; p++) { 232233628Sfabient if (*p == '_' && (*(p + 1) == '_' || *(p + 1) == '\0')) 233233628Sfabient continue; 234233628Sfabient else 235233628Sfabient *q++ = toupper(*p); 236233628Sfabient } 237233628Sfabient *q = '\0'; 238233628Sfabient} 239233628Sfabient 240233628Sfabientvoid 241233628Sfabientpmc_soft_ev_register(struct pmc_soft *ps) 242233628Sfabient{ 243233628Sfabient static int warned = 0; 244233628Sfabient int n; 245233628Sfabient 246233628Sfabient ps->ps_running = 0; 247233628Sfabient ps->ps_ev.pm_ev_code = 0; /* invalid */ 248233628Sfabient pmc_soft_namecleanup(ps->ps_ev.pm_ev_name); 249233628Sfabient 250233628Sfabient mtx_lock_spin(&pmc_softs_mtx); 251233628Sfabient 252233628Sfabient if (pmc_softs_count >= pmc_softevents) { 253233628Sfabient /* 254233628Sfabient * XXX Reusing events can enter a race condition where 255233628Sfabient * new allocated event will be used as an old one. 256233628Sfabient */ 257233628Sfabient for (n = 0; n < pmc_softevents; n++) 258233628Sfabient if (pmc_softs[n] == NULL) 259233628Sfabient break; 260233628Sfabient if (n == pmc_softevents) { 261233628Sfabient mtx_unlock_spin(&pmc_softs_mtx); 262233628Sfabient if (!warned) { 263233628Sfabient printf("hwpmc: too many soft events, " 264233628Sfabient "increase kern.hwpmc.softevents tunable\n"); 265233628Sfabient warned = 1; 266233628Sfabient } 267233628Sfabient return; 268233628Sfabient } 269233628Sfabient 270233628Sfabient ps->ps_ev.pm_ev_code = PMC_EV_SOFT_FIRST + n; 271233628Sfabient pmc_softs[n] = ps; 272233628Sfabient } else { 273233628Sfabient ps->ps_ev.pm_ev_code = PMC_EV_SOFT_FIRST + pmc_softs_count; 274233628Sfabient pmc_softs[pmc_softs_count++] = ps; 275233628Sfabient } 276233628Sfabient 277233628Sfabient mtx_unlock_spin(&pmc_softs_mtx); 278233628Sfabient} 279233628Sfabient 280233628Sfabientvoid 281233628Sfabientpmc_soft_ev_deregister(struct pmc_soft *ps) 282233628Sfabient{ 283233628Sfabient 284233628Sfabient KASSERT(ps != NULL, ("pmc_soft_deregister: called with NULL")); 285233628Sfabient 286233628Sfabient mtx_lock_spin(&pmc_softs_mtx); 287233628Sfabient 288233628Sfabient if (ps->ps_ev.pm_ev_code != 0 && 289233628Sfabient (ps->ps_ev.pm_ev_code - PMC_EV_SOFT_FIRST) < pmc_softevents) { 290233628Sfabient KASSERT(ps->ps_ev.pm_ev_code >= PMC_EV_SOFT_FIRST && 291233628Sfabient ps->ps_ev.pm_ev_code <= PMC_EV_SOFT_LAST, 292233628Sfabient ("pmc_soft_deregister: invalid event value")); 293233628Sfabient pmc_softs[ps->ps_ev.pm_ev_code - PMC_EV_SOFT_FIRST] = NULL; 294233628Sfabient } 295233628Sfabient 296233628Sfabient mtx_unlock_spin(&pmc_softs_mtx); 297233628Sfabient} 298233628Sfabient 299233628Sfabientstruct pmc_soft * 300233628Sfabientpmc_soft_ev_acquire(enum pmc_event ev) 301233628Sfabient{ 302233628Sfabient struct pmc_soft *ps; 303233628Sfabient 304233628Sfabient if (ev == 0 || (ev - PMC_EV_SOFT_FIRST) >= pmc_softevents) 305233628Sfabient return NULL; 306233628Sfabient 307233628Sfabient KASSERT(ev >= PMC_EV_SOFT_FIRST && 308233628Sfabient ev <= PMC_EV_SOFT_LAST, 309233628Sfabient ("event out of range")); 310233628Sfabient 311233628Sfabient mtx_lock_spin(&pmc_softs_mtx); 312233628Sfabient 313233628Sfabient ps = pmc_softs[ev - PMC_EV_SOFT_FIRST]; 314233628Sfabient if (ps == NULL) 315233628Sfabient mtx_unlock_spin(&pmc_softs_mtx); 316233628Sfabient 317233628Sfabient return ps; 318233628Sfabient} 319233628Sfabient 320233628Sfabientvoid 321233628Sfabientpmc_soft_ev_release(struct pmc_soft *ps) 322233628Sfabient{ 323233628Sfabient 324233628Sfabient mtx_unlock_spin(&pmc_softs_mtx); 325233628Sfabient} 326233628Sfabient 327233628Sfabient/* 328233628Sfabient * Initialise hwpmc. 329233628Sfabient */ 330233628Sfabientstatic void 331233628Sfabientinit_hwpmc(void *dummy __unused) 332233628Sfabient{ 333233628Sfabient if (pmc_softevents <= 0 || 334233628Sfabient pmc_softevents > PMC_EV_DYN_COUNT) { 335233628Sfabient (void) printf("hwpmc: tunable \"softevents\"=%d out of " 336233628Sfabient "range.\n", pmc_softevents); 337233628Sfabient pmc_softevents = PMC_EV_DYN_COUNT; 338233628Sfabient } 339233628Sfabient pmc_softs = malloc(pmc_softevents * sizeof(struct pmc_soft *), M_PMCHOOKS, M_NOWAIT|M_ZERO); 340233628Sfabient KASSERT(pmc_softs != NULL, ("cannot allocate soft events table")); 341233628Sfabient} 342233628Sfabient 343233628SfabientSYSINIT(hwpmc, SI_SUB_KDTRACE, SI_ORDER_FIRST, init_hwpmc, NULL); 344233628Sfabient 345