1179237Sjb/* 2179237Sjb * CDDL HEADER START 3179237Sjb * 4179237Sjb * The contents of this file are subject to the terms of the 5179237Sjb * Common Development and Distribution License (the "License"). 6179237Sjb * You may not use this file except in compliance with the License. 7179237Sjb * 8179237Sjb * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9179237Sjb * or http://www.opensolaris.org/os/licensing. 10179237Sjb * See the License for the specific language governing permissions 11179237Sjb * and limitations under the License. 12179237Sjb * 13179237Sjb * When distributing Covered Code, include this CDDL HEADER in each 14179237Sjb * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15179237Sjb * If applicable, add the following below this CDDL HEADER, with the 16179237Sjb * fields enclosed by brackets "[]" replaced with your own identifying 17179237Sjb * information: Portions Copyright [yyyy] [name of copyright owner] 18179237Sjb * 19179237Sjb * CDDL HEADER END 20179237Sjb * 21179237Sjb * Portions Copyright 2006-2008 John Birrell jb@freebsd.org 22179237Sjb * 23179237Sjb * $FreeBSD: stable/11/sys/cddl/dev/profile/profile.c 324282 2017-10-04 15:47:16Z markj $ 24179237Sjb * 25179237Sjb */ 26179237Sjb 27179237Sjb/* 28179237Sjb * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 29179237Sjb * Use is subject to license terms. 30179237Sjb */ 31179237Sjb 32179237Sjb#include <sys/cdefs.h> 33179237Sjb#include <sys/param.h> 34179237Sjb#include <sys/systm.h> 35179237Sjb#include <sys/conf.h> 36179237Sjb#include <sys/cpuvar.h> 37179237Sjb#include <sys/fcntl.h> 38179237Sjb#include <sys/filio.h> 39179237Sjb#include <sys/kdb.h> 40179237Sjb#include <sys/kernel.h> 41179237Sjb#include <sys/kmem.h> 42179237Sjb#include <sys/kthread.h> 43179237Sjb#include <sys/limits.h> 44179237Sjb#include <sys/linker.h> 45179237Sjb#include <sys/lock.h> 46179237Sjb#include <sys/malloc.h> 47179237Sjb#include <sys/module.h> 48179237Sjb#include <sys/mutex.h> 49179237Sjb#include <sys/poll.h> 50179237Sjb#include <sys/proc.h> 51179237Sjb#include <sys/selinfo.h> 52179237Sjb#include <sys/smp.h> 53291855Sandrew#include <sys/sysctl.h> 54179237Sjb#include <sys/uio.h> 55179237Sjb#include <sys/unistd.h> 56275576Savg#include <machine/cpu.h> 57179237Sjb#include <machine/stdarg.h> 58179237Sjb 59179237Sjb#include <sys/dtrace.h> 60179237Sjb#include <sys/dtrace_bsd.h> 61179237Sjb 62179237Sjb#define PROF_NAMELEN 15 63179237Sjb 64179237Sjb#define PROF_PROFILE 0 65179237Sjb#define PROF_TICK 1 66179237Sjb#define PROF_PREFIX_PROFILE "profile-" 67179237Sjb#define PROF_PREFIX_TICK "tick-" 68179237Sjb 69179237Sjb/* 70179237Sjb * Regardless of platform, there are five artificial frames in the case of the 71179237Sjb * profile provider: 72179237Sjb * 73179237Sjb * profile_fire 74179237Sjb * cyclic_expire 75179237Sjb * cyclic_fire 76179237Sjb * [ cbe ] 77179237Sjb * [ locore ] 78179237Sjb * 79179237Sjb * On amd64, there are two frames associated with locore: one in locore, and 80179237Sjb * another in common interrupt dispatch code. (i386 has not been modified to 81179237Sjb * use this common layer.) Further, on i386, the interrupted instruction 82179237Sjb * appears as its own stack frame. All of this means that we need to add one 83179237Sjb * frame for amd64, and then take one away for both amd64 and i386. 84179237Sjb * 85179237Sjb * On SPARC, the picture is further complicated because the compiler 86179237Sjb * optimizes away tail-calls -- so the following frames are optimized away: 87179237Sjb * 88179237Sjb * profile_fire 89179237Sjb * cyclic_expire 90179237Sjb * 91179237Sjb * This gives three frames. However, on DEBUG kernels, the cyclic_expire 92179237Sjb * frame cannot be tail-call eliminated, yielding four frames in this case. 93179237Sjb * 94179237Sjb * All of the above constraints lead to the mess below. Yes, the profile 95179237Sjb * provider should ideally figure this out on-the-fly by hiting one of its own 96179237Sjb * probes and then walking its own stack trace. This is complicated, however, 97179237Sjb * and the static definition doesn't seem to be overly brittle. Still, we 98179237Sjb * allow for a manual override in case we get it completely wrong. 99179237Sjb */ 100179237Sjb#ifdef __amd64 101275576Savg#define PROF_ARTIFICIAL_FRAMES 10 102179237Sjb#else 103179237Sjb#ifdef __i386 104179237Sjb#define PROF_ARTIFICIAL_FRAMES 6 105179237Sjb#else 106179237Sjb#ifdef __sparc 107179237Sjb#ifdef DEBUG 108179237Sjb#define PROF_ARTIFICIAL_FRAMES 4 109179237Sjb#else 110179237Sjb#define PROF_ARTIFICIAL_FRAMES 3 111179237Sjb#endif 112179237Sjb#endif 113179237Sjb#endif 114179237Sjb#endif 115179237Sjb 116233409Sgonzo#ifdef __mips 117233409Sgonzo/* 118233409Sgonzo * This value is bogus just to make module compilable on mips 119233409Sgonzo */ 120233409Sgonzo#define PROF_ARTIFICIAL_FRAMES 3 121233409Sgonzo#endif 122233409Sgonzo 123242723Sjhibbits#ifdef __powerpc__ 124242723Sjhibbits/* 125242723Sjhibbits * This value is bogus just to make module compilable on powerpc 126242723Sjhibbits */ 127242723Sjhibbits#define PROF_ARTIFICIAL_FRAMES 3 128242723Sjhibbits#endif 129242723Sjhibbits 130275576Savgstruct profile_probe_percpu; 131275576Savg 132278529Sgnn#ifdef __mips 133278529Sgnn/* bogus */ 134278529Sgnn#define PROF_ARTIFICIAL_FRAMES 3 135278529Sgnn#endif 136278529Sgnn 137278529Sgnn#ifdef __arm__ 138291855Sandrew#define PROF_ARTIFICIAL_FRAMES 3 139278529Sgnn#endif 140278529Sgnn 141285009Sbr#ifdef __aarch64__ 142285009Sbr/* TODO: verify */ 143285009Sbr#define PROF_ARTIFICIAL_FRAMES 10 144285009Sbr#endif 145285009Sbr 146300618Sbr#ifdef __riscv__ 147300618Sbr/* TODO: verify */ 148300618Sbr#define PROF_ARTIFICIAL_FRAMES 10 149300618Sbr#endif 150300618Sbr 151179237Sjbtypedef struct profile_probe { 152179237Sjb char prof_name[PROF_NAMELEN]; 153179237Sjb dtrace_id_t prof_id; 154179237Sjb int prof_kind; 155275576Savg#ifdef illumos 156179237Sjb hrtime_t prof_interval; 157179237Sjb cyclic_id_t prof_cyclic; 158275576Savg#else 159275576Savg sbintime_t prof_interval; 160275576Savg struct callout prof_cyclic; 161275576Savg sbintime_t prof_expected; 162275576Savg struct profile_probe_percpu **prof_pcpus; 163275576Savg#endif 164179237Sjb} profile_probe_t; 165179237Sjb 166179237Sjbtypedef struct profile_probe_percpu { 167179237Sjb hrtime_t profc_expected; 168179237Sjb hrtime_t profc_interval; 169179237Sjb profile_probe_t *profc_probe; 170275576Savg#ifdef __FreeBSD__ 171275576Savg struct callout profc_cyclic; 172275576Savg#endif 173179237Sjb} profile_probe_percpu_t; 174179237Sjb 175179237Sjbstatic d_open_t profile_open; 176179237Sjbstatic int profile_unload(void); 177179237Sjbstatic void profile_create(hrtime_t, char *, int); 178179237Sjbstatic void profile_destroy(void *, dtrace_id_t, void *); 179179237Sjbstatic void profile_enable(void *, dtrace_id_t, void *); 180179237Sjbstatic void profile_disable(void *, dtrace_id_t, void *); 181179237Sjbstatic void profile_load(void *); 182179237Sjbstatic void profile_provide(void *, dtrace_probedesc_t *); 183179237Sjb 184179237Sjbstatic int profile_rates[] = { 185179237Sjb 97, 199, 499, 997, 1999, 186179237Sjb 4001, 4999, 0, 0, 0, 187179237Sjb 0, 0, 0, 0, 0, 188179237Sjb 0, 0, 0, 0, 0 189179237Sjb}; 190179237Sjb 191179237Sjbstatic int profile_ticks[] = { 192179237Sjb 1, 10, 100, 500, 1000, 193179237Sjb 5000, 0, 0, 0, 0, 194179237Sjb 0, 0, 0, 0, 0 195179237Sjb}; 196179237Sjb 197179237Sjb/* 198179237Sjb * profile_max defines the upper bound on the number of profile probes that 199179237Sjb * can exist (this is to prevent malicious or clumsy users from exhausing 200179237Sjb * system resources by creating a slew of profile probes). At mod load time, 201179237Sjb * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's 202179237Sjb * present in the profile.conf file. 203179237Sjb */ 204179237Sjb#define PROFILE_MAX_DEFAULT 1000 /* default max. number of probes */ 205179237Sjbstatic uint32_t profile_max = PROFILE_MAX_DEFAULT; 206179237Sjb /* maximum number of profile probes */ 207179237Sjbstatic uint32_t profile_total; /* current number of profile probes */ 208179237Sjb 209179237Sjbstatic struct cdevsw profile_cdevsw = { 210179237Sjb .d_version = D_VERSION, 211179237Sjb .d_open = profile_open, 212179237Sjb .d_name = "profile", 213179237Sjb}; 214179237Sjb 215179237Sjbstatic dtrace_pattr_t profile_attr = { 216179237Sjb{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 217179237Sjb{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 218179237Sjb{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, 219179237Sjb{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 220179237Sjb{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, 221179237Sjb}; 222179237Sjb 223179237Sjbstatic dtrace_pops_t profile_pops = { 224324282Smarkj .dtps_provide = profile_provide, 225324282Smarkj .dtps_provide_module = NULL, 226324282Smarkj .dtps_enable = profile_enable, 227324282Smarkj .dtps_disable = profile_disable, 228324282Smarkj .dtps_suspend = NULL, 229324282Smarkj .dtps_resume = NULL, 230324282Smarkj .dtps_getargdesc = NULL, 231324282Smarkj .dtps_getargval = NULL, 232324282Smarkj .dtps_usermode = NULL, 233324282Smarkj .dtps_destroy = profile_destroy 234179237Sjb}; 235179237Sjb 236179237Sjbstatic struct cdev *profile_cdev; 237179237Sjbstatic dtrace_provider_id_t profile_id; 238179237Sjbstatic hrtime_t profile_interval_min = NANOSEC / 5000; /* 5000 hz */ 239291855Sandrewstatic int profile_aframes = PROF_ARTIFICIAL_FRAMES; 240179237Sjb 241291855SandrewSYSCTL_DECL(_kern_dtrace); 242291855SandrewSYSCTL_NODE(_kern_dtrace, OID_AUTO, profile, CTLFLAG_RD, 0, "DTrace profile parameters"); 243291855SandrewSYSCTL_INT(_kern_dtrace_profile, OID_AUTO, aframes, CTLFLAG_RW, &profile_aframes, 244291855Sandrew 0, "Skipped frames for profile provider"); 245291855Sandrew 246275576Savgstatic sbintime_t 247275576Savgnsec_to_sbt(hrtime_t nsec) 248275576Savg{ 249275576Savg time_t sec; 250275576Savg 251275576Savg /* 252275576Savg * We need to calculate nsec * 2^32 / 10^9 253275576Savg * Seconds and nanoseconds are split to avoid overflow. 254275576Savg */ 255275576Savg sec = nsec / NANOSEC; 256275576Savg nsec = nsec % NANOSEC; 257275576Savg return (((sbintime_t)sec << 32) | ((sbintime_t)nsec << 32) / NANOSEC); 258275576Savg} 259275576Savg 260275576Savgstatic hrtime_t 261275576Savgsbt_to_nsec(sbintime_t sbt) 262275576Savg{ 263275576Savg 264275576Savg return ((sbt >> 32) * NANOSEC + 265275576Savg (((uint32_t)sbt * (hrtime_t)NANOSEC) >> 32)); 266275576Savg} 267275576Savg 268179237Sjbstatic void 269179237Sjbprofile_fire(void *arg) 270179237Sjb{ 271179237Sjb profile_probe_percpu_t *pcpu = arg; 272179237Sjb profile_probe_t *prof = pcpu->profc_probe; 273179237Sjb hrtime_t late; 274275576Savg struct trapframe *frame; 275275576Savg uintfptr_t pc, upc; 276179237Sjb 277275576Savg#ifdef illumos 278179237Sjb late = gethrtime() - pcpu->profc_expected; 279275576Savg#else 280275576Savg late = sbt_to_nsec(sbinuptime() - pcpu->profc_expected); 281275576Savg#endif 282275576Savg 283275576Savg pc = 0; 284275576Savg upc = 0; 285275576Savg 286275576Savg /* 287275576Savg * td_intr_frame can be unset if this is a catch up event 288275576Savg * after waking up from idle sleep. 289275576Savg * This can only happen on a CPU idle thread. 290275576Savg */ 291275576Savg frame = curthread->td_intr_frame; 292275576Savg if (frame != NULL) { 293275576Savg if (TRAPF_USERMODE(frame)) 294275576Savg upc = TRAPF_PC(frame); 295275576Savg else 296275576Savg pc = TRAPF_PC(frame); 297275576Savg } 298275576Savg dtrace_probe(prof->prof_id, pc, upc, late, 0, 0); 299275576Savg 300179237Sjb pcpu->profc_expected += pcpu->profc_interval; 301275576Savg callout_schedule_sbt_curcpu(&pcpu->profc_cyclic, 302275576Savg pcpu->profc_expected, 0, C_DIRECT_EXEC | C_ABSOLUTE); 303179237Sjb} 304179237Sjb 305179237Sjbstatic void 306179237Sjbprofile_tick(void *arg) 307179237Sjb{ 308179237Sjb profile_probe_t *prof = arg; 309275576Savg struct trapframe *frame; 310275576Savg uintfptr_t pc, upc; 311179237Sjb 312275576Savg pc = 0; 313275576Savg upc = 0; 314275576Savg 315275576Savg /* 316275576Savg * td_intr_frame can be unset if this is a catch up event 317275576Savg * after waking up from idle sleep. 318275576Savg * This can only happen on a CPU idle thread. 319275576Savg */ 320275576Savg frame = curthread->td_intr_frame; 321275576Savg if (frame != NULL) { 322275576Savg if (TRAPF_USERMODE(frame)) 323275576Savg upc = TRAPF_PC(frame); 324275576Savg else 325275576Savg pc = TRAPF_PC(frame); 326275576Savg } 327275576Savg dtrace_probe(prof->prof_id, pc, upc, 0, 0, 0); 328275576Savg 329275576Savg prof->prof_expected += prof->prof_interval; 330275576Savg callout_schedule_sbt(&prof->prof_cyclic, 331275576Savg prof->prof_expected, 0, C_DIRECT_EXEC | C_ABSOLUTE); 332179237Sjb} 333179237Sjb 334179237Sjbstatic void 335179237Sjbprofile_create(hrtime_t interval, char *name, int kind) 336179237Sjb{ 337179237Sjb profile_probe_t *prof; 338179237Sjb 339179237Sjb if (interval < profile_interval_min) 340179237Sjb return; 341179237Sjb 342179237Sjb if (dtrace_probe_lookup(profile_id, NULL, NULL, name) != 0) 343179237Sjb return; 344179237Sjb 345179237Sjb atomic_add_32(&profile_total, 1); 346179237Sjb if (profile_total > profile_max) { 347179237Sjb atomic_add_32(&profile_total, -1); 348179237Sjb return; 349179237Sjb } 350179237Sjb 351179237Sjb prof = kmem_zalloc(sizeof (profile_probe_t), KM_SLEEP); 352179237Sjb (void) strcpy(prof->prof_name, name); 353275576Savg#ifdef illumos 354179237Sjb prof->prof_interval = interval; 355179237Sjb prof->prof_cyclic = CYCLIC_NONE; 356275576Savg#else 357275576Savg prof->prof_interval = nsec_to_sbt(interval); 358283291Sjkim callout_init(&prof->prof_cyclic, 1); 359275576Savg#endif 360179237Sjb prof->prof_kind = kind; 361179237Sjb prof->prof_id = dtrace_probe_create(profile_id, 362179237Sjb NULL, NULL, name, 363291855Sandrew profile_aframes, prof); 364179237Sjb} 365179237Sjb 366179237Sjb/*ARGSUSED*/ 367179237Sjbstatic void 368179237Sjbprofile_provide(void *arg, dtrace_probedesc_t *desc) 369179237Sjb{ 370179237Sjb int i, j, rate, kind; 371179237Sjb hrtime_t val = 0, mult = 1, len = 0; 372179237Sjb char *name, *suffix = NULL; 373179237Sjb 374179237Sjb const struct { 375179237Sjb char *prefix; 376179237Sjb int kind; 377179237Sjb } types[] = { 378179237Sjb { PROF_PREFIX_PROFILE, PROF_PROFILE }, 379179237Sjb { PROF_PREFIX_TICK, PROF_TICK }, 380179237Sjb { 0, 0 } 381179237Sjb }; 382179237Sjb 383179237Sjb const struct { 384179237Sjb char *name; 385179237Sjb hrtime_t mult; 386179237Sjb } suffixes[] = { 387179237Sjb { "ns", NANOSEC / NANOSEC }, 388179237Sjb { "nsec", NANOSEC / NANOSEC }, 389179237Sjb { "us", NANOSEC / MICROSEC }, 390179237Sjb { "usec", NANOSEC / MICROSEC }, 391179237Sjb { "ms", NANOSEC / MILLISEC }, 392179237Sjb { "msec", NANOSEC / MILLISEC }, 393179237Sjb { "s", NANOSEC / SEC }, 394179237Sjb { "sec", NANOSEC / SEC }, 395179237Sjb { "m", NANOSEC * (hrtime_t)60 }, 396179237Sjb { "min", NANOSEC * (hrtime_t)60 }, 397179237Sjb { "h", NANOSEC * (hrtime_t)(60 * 60) }, 398179237Sjb { "hour", NANOSEC * (hrtime_t)(60 * 60) }, 399179237Sjb { "d", NANOSEC * (hrtime_t)(24 * 60 * 60) }, 400179237Sjb { "day", NANOSEC * (hrtime_t)(24 * 60 * 60) }, 401179237Sjb { "hz", 0 }, 402179237Sjb { NULL } 403179237Sjb }; 404179237Sjb 405179237Sjb if (desc == NULL) { 406179237Sjb char n[PROF_NAMELEN]; 407179237Sjb 408179237Sjb /* 409179237Sjb * If no description was provided, provide all of our probes. 410179237Sjb */ 411179237Sjb for (i = 0; i < sizeof (profile_rates) / sizeof (int); i++) { 412179237Sjb if ((rate = profile_rates[i]) == 0) 413179237Sjb continue; 414179237Sjb 415179237Sjb (void) snprintf(n, PROF_NAMELEN, "%s%d", 416179237Sjb PROF_PREFIX_PROFILE, rate); 417179237Sjb profile_create(NANOSEC / rate, n, PROF_PROFILE); 418179237Sjb } 419179237Sjb 420179237Sjb for (i = 0; i < sizeof (profile_ticks) / sizeof (int); i++) { 421179237Sjb if ((rate = profile_ticks[i]) == 0) 422179237Sjb continue; 423179237Sjb 424179237Sjb (void) snprintf(n, PROF_NAMELEN, "%s%d", 425179237Sjb PROF_PREFIX_TICK, rate); 426179237Sjb profile_create(NANOSEC / rate, n, PROF_TICK); 427179237Sjb } 428179237Sjb 429179237Sjb return; 430179237Sjb } 431179237Sjb 432179237Sjb name = desc->dtpd_name; 433179237Sjb 434179237Sjb for (i = 0; types[i].prefix != NULL; i++) { 435179237Sjb len = strlen(types[i].prefix); 436179237Sjb 437179237Sjb if (strncmp(name, types[i].prefix, len) != 0) 438179237Sjb continue; 439179237Sjb break; 440179237Sjb } 441179237Sjb 442179237Sjb if (types[i].prefix == NULL) 443179237Sjb return; 444179237Sjb 445179237Sjb kind = types[i].kind; 446179237Sjb j = strlen(name) - len; 447179237Sjb 448179237Sjb /* 449179237Sjb * We need to start before any time suffix. 450179237Sjb */ 451179237Sjb for (j = strlen(name); j >= len; j--) { 452179237Sjb if (name[j] >= '0' && name[j] <= '9') 453179237Sjb break; 454179237Sjb suffix = &name[j]; 455179237Sjb } 456179237Sjb 457179237Sjb ASSERT(suffix != NULL); 458179237Sjb 459179237Sjb /* 460179237Sjb * Now determine the numerical value present in the probe name. 461179237Sjb */ 462179237Sjb for (; j >= len; j--) { 463179237Sjb if (name[j] < '0' || name[j] > '9') 464179237Sjb return; 465179237Sjb 466179237Sjb val += (name[j] - '0') * mult; 467179237Sjb mult *= (hrtime_t)10; 468179237Sjb } 469179237Sjb 470179237Sjb if (val == 0) 471179237Sjb return; 472179237Sjb 473179237Sjb /* 474179237Sjb * Look-up the suffix to determine the multiplier. 475179237Sjb */ 476179237Sjb for (i = 0, mult = 0; suffixes[i].name != NULL; i++) { 477179237Sjb if (strcasecmp(suffixes[i].name, suffix) == 0) { 478179237Sjb mult = suffixes[i].mult; 479179237Sjb break; 480179237Sjb } 481179237Sjb } 482179237Sjb 483179237Sjb if (suffixes[i].name == NULL && *suffix != '\0') 484179237Sjb return; 485179237Sjb 486179237Sjb if (mult == 0) { 487179237Sjb /* 488179237Sjb * The default is frequency-per-second. 489179237Sjb */ 490179237Sjb val = NANOSEC / val; 491179237Sjb } else { 492179237Sjb val *= mult; 493179237Sjb } 494179237Sjb 495179237Sjb profile_create(val, name, kind); 496179237Sjb} 497179237Sjb 498179237Sjb/* ARGSUSED */ 499179237Sjbstatic void 500179237Sjbprofile_destroy(void *arg, dtrace_id_t id, void *parg) 501179237Sjb{ 502179237Sjb profile_probe_t *prof = parg; 503179237Sjb 504275576Savg#ifdef illumos 505179237Sjb ASSERT(prof->prof_cyclic == CYCLIC_NONE); 506275576Savg#else 507275576Savg ASSERT(!callout_active(&prof->prof_cyclic) && prof->prof_pcpus == NULL); 508275576Savg#endif 509179237Sjb kmem_free(prof, sizeof (profile_probe_t)); 510179237Sjb 511179237Sjb ASSERT(profile_total >= 1); 512179237Sjb atomic_add_32(&profile_total, -1); 513179237Sjb} 514179237Sjb 515275576Savg#ifdef illumos 516179237Sjb/*ARGSUSED*/ 517179237Sjbstatic void 518179237Sjbprofile_online(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when) 519179237Sjb{ 520179237Sjb profile_probe_t *prof = arg; 521179237Sjb profile_probe_percpu_t *pcpu; 522179237Sjb 523179237Sjb pcpu = kmem_zalloc(sizeof (profile_probe_percpu_t), KM_SLEEP); 524179237Sjb pcpu->profc_probe = prof; 525179237Sjb 526179237Sjb hdlr->cyh_func = profile_fire; 527179237Sjb hdlr->cyh_arg = pcpu; 528179237Sjb 529179237Sjb when->cyt_interval = prof->prof_interval; 530179237Sjb when->cyt_when = gethrtime() + when->cyt_interval; 531179237Sjb 532179237Sjb pcpu->profc_expected = when->cyt_when; 533179237Sjb pcpu->profc_interval = when->cyt_interval; 534179237Sjb} 535179237Sjb 536179237Sjb/*ARGSUSED*/ 537179237Sjbstatic void 538179237Sjbprofile_offline(void *arg, cpu_t *cpu, void *oarg) 539179237Sjb{ 540179237Sjb profile_probe_percpu_t *pcpu = oarg; 541179237Sjb 542179237Sjb ASSERT(pcpu->profc_probe == arg); 543179237Sjb kmem_free(pcpu, sizeof (profile_probe_percpu_t)); 544179237Sjb} 545179237Sjb 546179237Sjb/* ARGSUSED */ 547179237Sjbstatic void 548179237Sjbprofile_enable(void *arg, dtrace_id_t id, void *parg) 549179237Sjb{ 550179237Sjb profile_probe_t *prof = parg; 551179237Sjb cyc_omni_handler_t omni; 552179237Sjb cyc_handler_t hdlr; 553179237Sjb cyc_time_t when; 554179237Sjb 555179237Sjb ASSERT(prof->prof_interval != 0); 556179237Sjb ASSERT(MUTEX_HELD(&cpu_lock)); 557179237Sjb 558179237Sjb if (prof->prof_kind == PROF_TICK) { 559179237Sjb hdlr.cyh_func = profile_tick; 560179237Sjb hdlr.cyh_arg = prof; 561179237Sjb 562179237Sjb when.cyt_interval = prof->prof_interval; 563179237Sjb when.cyt_when = gethrtime() + when.cyt_interval; 564179237Sjb } else { 565179237Sjb ASSERT(prof->prof_kind == PROF_PROFILE); 566179237Sjb omni.cyo_online = profile_online; 567179237Sjb omni.cyo_offline = profile_offline; 568179237Sjb omni.cyo_arg = prof; 569179237Sjb } 570179237Sjb 571179237Sjb if (prof->prof_kind == PROF_TICK) { 572179237Sjb prof->prof_cyclic = cyclic_add(&hdlr, &when); 573179237Sjb } else { 574179237Sjb prof->prof_cyclic = cyclic_add_omni(&omni); 575179237Sjb } 576179237Sjb} 577179237Sjb 578179237Sjb/* ARGSUSED */ 579179237Sjbstatic void 580179237Sjbprofile_disable(void *arg, dtrace_id_t id, void *parg) 581179237Sjb{ 582179237Sjb profile_probe_t *prof = parg; 583179237Sjb 584179237Sjb ASSERT(prof->prof_cyclic != CYCLIC_NONE); 585179237Sjb ASSERT(MUTEX_HELD(&cpu_lock)); 586179237Sjb 587179237Sjb cyclic_remove(prof->prof_cyclic); 588179237Sjb prof->prof_cyclic = CYCLIC_NONE; 589179237Sjb} 590179237Sjb 591275576Savg#else 592275576Savg 593179237Sjbstatic void 594275576Savgprofile_enable_omni(profile_probe_t *prof) 595275576Savg{ 596275576Savg profile_probe_percpu_t *pcpu; 597275576Savg int cpu; 598275576Savg 599275576Savg prof->prof_pcpus = kmem_zalloc((mp_maxid + 1) * sizeof(pcpu), KM_SLEEP); 600275576Savg CPU_FOREACH(cpu) { 601275576Savg pcpu = kmem_zalloc(sizeof(profile_probe_percpu_t), KM_SLEEP); 602275576Savg prof->prof_pcpus[cpu] = pcpu; 603275576Savg pcpu->profc_probe = prof; 604275576Savg pcpu->profc_expected = sbinuptime() + prof->prof_interval; 605275576Savg pcpu->profc_interval = prof->prof_interval; 606283291Sjkim callout_init(&pcpu->profc_cyclic, 1); 607275576Savg callout_reset_sbt_on(&pcpu->profc_cyclic, 608275576Savg pcpu->profc_expected, 0, profile_fire, pcpu, 609275576Savg cpu, C_DIRECT_EXEC | C_ABSOLUTE); 610275576Savg } 611275576Savg} 612275576Savg 613275576Savgstatic void 614275576Savgprofile_disable_omni(profile_probe_t *prof) 615275576Savg{ 616275576Savg profile_probe_percpu_t *pcpu; 617275576Savg int cpu; 618275576Savg 619275576Savg ASSERT(prof->prof_pcpus != NULL); 620275576Savg CPU_FOREACH(cpu) { 621275576Savg pcpu = prof->prof_pcpus[cpu]; 622275576Savg ASSERT(pcpu->profc_probe == prof); 623275576Savg ASSERT(callout_active(&pcpu->profc_cyclic)); 624275576Savg callout_stop(&pcpu->profc_cyclic); 625275576Savg callout_drain(&pcpu->profc_cyclic); 626275576Savg kmem_free(pcpu, sizeof(profile_probe_percpu_t)); 627275576Savg } 628275576Savg kmem_free(prof->prof_pcpus, (mp_maxid + 1) * sizeof(pcpu)); 629275576Savg prof->prof_pcpus = NULL; 630275576Savg} 631275576Savg 632275576Savg/* ARGSUSED */ 633275576Savgstatic void 634275576Savgprofile_enable(void *arg, dtrace_id_t id, void *parg) 635275576Savg{ 636275576Savg profile_probe_t *prof = parg; 637275576Savg 638275576Savg if (prof->prof_kind == PROF_TICK) { 639275576Savg prof->prof_expected = sbinuptime() + prof->prof_interval; 640275576Savg callout_reset_sbt(&prof->prof_cyclic, 641275576Savg prof->prof_expected, 0, profile_tick, prof, 642275576Savg C_DIRECT_EXEC | C_ABSOLUTE); 643275576Savg } else { 644275576Savg ASSERT(prof->prof_kind == PROF_PROFILE); 645275576Savg profile_enable_omni(prof); 646275576Savg } 647275576Savg} 648275576Savg 649275576Savg/* ARGSUSED */ 650275576Savgstatic void 651275576Savgprofile_disable(void *arg, dtrace_id_t id, void *parg) 652275576Savg{ 653275576Savg profile_probe_t *prof = parg; 654275576Savg 655275576Savg if (prof->prof_kind == PROF_TICK) { 656275576Savg ASSERT(callout_active(&prof->prof_cyclic)); 657275576Savg callout_stop(&prof->prof_cyclic); 658275576Savg callout_drain(&prof->prof_cyclic); 659275576Savg } else { 660275576Savg ASSERT(prof->prof_kind == PROF_PROFILE); 661275576Savg profile_disable_omni(prof); 662275576Savg } 663275576Savg} 664275576Savg#endif 665275576Savg 666275576Savgstatic void 667179237Sjbprofile_load(void *dummy) 668179237Sjb{ 669179237Sjb /* Create the /dev/dtrace/profile entry. */ 670179237Sjb profile_cdev = make_dev(&profile_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, 671179237Sjb "dtrace/profile"); 672179237Sjb 673179237Sjb if (dtrace_register("profile", &profile_attr, DTRACE_PRIV_USER, 674179237Sjb NULL, &profile_pops, NULL, &profile_id) != 0) 675179237Sjb return; 676179237Sjb} 677179237Sjb 678179237Sjb 679179237Sjbstatic int 680179237Sjbprofile_unload() 681179237Sjb{ 682179237Sjb int error = 0; 683179237Sjb 684179237Sjb if ((error = dtrace_unregister(profile_id)) != 0) 685179237Sjb return (error); 686179237Sjb 687179237Sjb destroy_dev(profile_cdev); 688179237Sjb 689179237Sjb return (error); 690179237Sjb} 691179237Sjb 692179237Sjb/* ARGSUSED */ 693179237Sjbstatic int 694179237Sjbprofile_modevent(module_t mod __unused, int type, void *data __unused) 695179237Sjb{ 696179237Sjb int error = 0; 697179237Sjb 698179237Sjb switch (type) { 699179237Sjb case MOD_LOAD: 700179237Sjb break; 701179237Sjb 702179237Sjb case MOD_UNLOAD: 703179237Sjb break; 704179237Sjb 705179237Sjb case MOD_SHUTDOWN: 706179237Sjb break; 707179237Sjb 708179237Sjb default: 709179237Sjb error = EOPNOTSUPP; 710179237Sjb break; 711179237Sjb 712179237Sjb } 713179237Sjb return (error); 714179237Sjb} 715179237Sjb 716179237Sjb/* ARGSUSED */ 717179237Sjbstatic int 718179237Sjbprofile_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused) 719179237Sjb{ 720179237Sjb return (0); 721179237Sjb} 722179237Sjb 723179237SjbSYSINIT(profile_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, profile_load, NULL); 724179237SjbSYSUNINIT(profile_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, profile_unload, NULL); 725179237Sjb 726179237SjbDEV_MODULE(profile, profile_modevent, NULL); 727179237SjbMODULE_VERSION(profile, 1); 728179237SjbMODULE_DEPEND(profile, dtrace, 1, 1, 1); 729179237SjbMODULE_DEPEND(profile, opensolaris, 1, 1, 1); 730