1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 27#include <sys/errno.h> 28#include <sys/stat.h> 29#include <sys/modctl.h> 30#include <sys/conf.h> 31#include <sys/systm.h> 32#include <sys/ddi.h> 33#include <sys/sunddi.h> 34#include <sys/cpuvar.h> 35#include <sys/kmem.h> 36#include <sys/strsubr.h> 37#include <sys/dtrace.h> 38#include <sys/cyclic.h> 39#include <sys/atomic.h> 40 41static dev_info_t *profile_devi; 42static dtrace_provider_id_t profile_id; 43 44/* 45 * Regardless of platform, the stack frames look like this in the case of the 46 * profile provider: 47 * 48 * profile_fire 49 * cyclic_expire 50 * cyclic_fire 51 * [ cbe ] 52 * [ interrupt code ] 53 * 54 * On x86, there are five frames from the generic interrupt code; further, the 55 * interrupted instruction appears as its own stack frame, giving us a total of 56 * 10. 57 * 58 * On SPARC, the picture is further complicated because the compiler 59 * optimizes away tail-calls -- so the following frames are optimized away: 60 * 61 * profile_fire 62 * cyclic_expire 63 * 64 * This gives three frames. However, on DEBUG kernels, the cyclic_expire 65 * frame cannot be tail-call eliminated, yielding four frames in this case. 66 * 67 * All of the above constraints lead to the mess below. Yes, the profile 68 * provider should ideally figure this out on-the-fly by hitting one of its own 69 * probes and then walking its own stack trace. This is complicated, however, 70 * and the static definition doesn't seem to be overly brittle. Still, we 71 * allow for a manual override in case we get it completely wrong. 72 */ 73#ifdef __x86 74#define PROF_ARTIFICIAL_FRAMES 10 75#else 76#ifdef __sparc 77#ifdef DEBUG 78#define PROF_ARTIFICIAL_FRAMES 4 79#else 80#define PROF_ARTIFICIAL_FRAMES 3 81#endif 82#endif 83#endif 84 85#define PROF_NAMELEN 15 86 87#define PROF_PROFILE 0 88#define PROF_TICK 1 89#define PROF_PREFIX_PROFILE "profile-" 90#define PROF_PREFIX_TICK "tick-" 91 92typedef struct profile_probe { 93 char prof_name[PROF_NAMELEN]; 94 dtrace_id_t prof_id; 95 int prof_kind; 96 hrtime_t prof_interval; 97 cyclic_id_t prof_cyclic; 98} profile_probe_t; 99 100typedef struct profile_probe_percpu { 101 hrtime_t profc_expected; 102 hrtime_t profc_interval; 103 profile_probe_t *profc_probe; 104} profile_probe_percpu_t; 105 106hrtime_t profile_interval_min = NANOSEC / 5000; /* 5000 hz */ 107int profile_aframes = 0; /* override */ 108 109static int profile_rates[] = { 110 97, 199, 499, 997, 1999, 111 4001, 4999, 0, 0, 0, 112 0, 0, 0, 0, 0, 113 0, 0, 0, 0, 0 114}; 115 116static int profile_ticks[] = { 117 1, 10, 100, 500, 1000, 118 5000, 0, 0, 0, 0, 119 0, 0, 0, 0, 0 120}; 121 122/* 123 * profile_max defines the upper bound on the number of profile probes that 124 * can exist (this is to prevent malicious or clumsy users from exhausing 125 * system resources by creating a slew of profile probes). At mod load time, 126 * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's 127 * present in the profile.conf file. 128 */ 129#define PROFILE_MAX_DEFAULT 1000 /* default max. number of probes */ 130static uint32_t profile_max; /* maximum number of profile probes */ 131static uint32_t profile_total; /* current number of profile probes */ 132 133static void 134profile_fire(void *arg) 135{ 136 profile_probe_percpu_t *pcpu = arg; 137 profile_probe_t *prof = pcpu->profc_probe; 138 hrtime_t late; 139 140 late = dtrace_gethrtime() - pcpu->profc_expected; 141 pcpu->profc_expected += pcpu->profc_interval; 142 143 dtrace_probe(prof->prof_id, CPU->cpu_profile_pc, 144 CPU->cpu_profile_upc, late, 0, 0); 145} 146 147static void 148profile_tick(void *arg) 149{ 150 profile_probe_t *prof = arg; 151 152 dtrace_probe(prof->prof_id, CPU->cpu_profile_pc, 153 CPU->cpu_profile_upc, 0, 0, 0); 154} 155 156static void 157profile_create(hrtime_t interval, const char *name, int kind) 158{ 159 profile_probe_t *prof; 160 int nr_frames = PROF_ARTIFICIAL_FRAMES + dtrace_mach_aframes(); 161 162 if (profile_aframes) 163 nr_frames = profile_aframes; 164 165 if (interval < profile_interval_min) 166 return; 167 168 if (dtrace_probe_lookup(profile_id, NULL, NULL, name) != 0) 169 return; 170 171 atomic_add_32(&profile_total, 1); 172 if (profile_total > profile_max) { 173 atomic_add_32(&profile_total, -1); 174 return; 175 } 176 177 prof = kmem_zalloc(sizeof (profile_probe_t), KM_SLEEP); 178 (void) strcpy(prof->prof_name, name); 179 prof->prof_interval = interval; 180 prof->prof_cyclic = CYCLIC_NONE; 181 prof->prof_kind = kind; 182 prof->prof_id = dtrace_probe_create(profile_id, 183 NULL, NULL, name, nr_frames, prof); 184} 185 186/*ARGSUSED*/ 187static void 188profile_provide(void *arg, const dtrace_probedesc_t *desc) 189{ 190 int i, j, rate, kind; 191 hrtime_t val = 0, mult = 1, len; 192 const char *name, *suffix = NULL; 193 194 const struct { 195 char *prefix; 196 int kind; 197 } types[] = { 198 { PROF_PREFIX_PROFILE, PROF_PROFILE }, 199 { PROF_PREFIX_TICK, PROF_TICK }, 200 { NULL, NULL } 201 }; 202 203 const struct { 204 char *name; 205 hrtime_t mult; 206 } suffixes[] = { 207 { "ns", NANOSEC / NANOSEC }, 208 { "nsec", NANOSEC / NANOSEC }, 209 { "us", NANOSEC / MICROSEC }, 210 { "usec", NANOSEC / MICROSEC }, 211 { "ms", NANOSEC / MILLISEC }, 212 { "msec", NANOSEC / MILLISEC }, 213 { "s", NANOSEC / SEC }, 214 { "sec", NANOSEC / SEC }, 215 { "m", NANOSEC * (hrtime_t)60 }, 216 { "min", NANOSEC * (hrtime_t)60 }, 217 { "h", NANOSEC * (hrtime_t)(60 * 60) }, 218 { "hour", NANOSEC * (hrtime_t)(60 * 60) }, 219 { "d", NANOSEC * (hrtime_t)(24 * 60 * 60) }, 220 { "day", NANOSEC * (hrtime_t)(24 * 60 * 60) }, 221 { "hz", 0 }, 222 { NULL } 223 }; 224 225 if (desc == NULL) { 226 char n[PROF_NAMELEN]; 227 228 /* 229 * If no description was provided, provide all of our probes. 230 */ 231 for (i = 0; i < sizeof (profile_rates) / sizeof (int); i++) { 232 if ((rate = profile_rates[i]) == 0) 233 continue; 234 235 (void) snprintf(n, PROF_NAMELEN, "%s%d", 236 PROF_PREFIX_PROFILE, rate); 237 profile_create(NANOSEC / rate, n, PROF_PROFILE); 238 } 239 240 for (i = 0; i < sizeof (profile_ticks) / sizeof (int); i++) { 241 if ((rate = profile_ticks[i]) == 0) 242 continue; 243 244 (void) snprintf(n, PROF_NAMELEN, "%s%d", 245 PROF_PREFIX_TICK, rate); 246 profile_create(NANOSEC / rate, n, PROF_TICK); 247 } 248 249 return; 250 } 251 252 name = desc->dtpd_name; 253 254 for (i = 0; types[i].prefix != NULL; i++) { 255 len = strlen(types[i].prefix); 256 257 if (strncmp(name, types[i].prefix, len) != 0) 258 continue; 259 break; 260 } 261 262 if (types[i].prefix == NULL) 263 return; 264 265 kind = types[i].kind; 266 j = strlen(name) - len; 267 268 /* 269 * We need to start before any time suffix. 270 */ 271 for (j = strlen(name); j >= len; j--) { 272 if (name[j] >= '0' && name[j] <= '9') 273 break; 274 suffix = &name[j]; 275 } 276 277 ASSERT(suffix != NULL); 278 279 /* 280 * Now determine the numerical value present in the probe name. 281 */ 282 for (; j >= len; j--) { 283 if (name[j] < '0' || name[j] > '9') 284 return; 285 286 val += (name[j] - '0') * mult; 287 mult *= (hrtime_t)10; 288 } 289 290 if (val == 0) 291 return; 292 293 /* 294 * Look-up the suffix to determine the multiplier. 295 */ 296 for (i = 0, mult = 0; suffixes[i].name != NULL; i++) { 297 if (strcasecmp(suffixes[i].name, suffix) == 0) { 298 mult = suffixes[i].mult; 299 break; 300 } 301 } 302 303 if (suffixes[i].name == NULL && *suffix != '\0') 304 return; 305 306 if (mult == 0) { 307 /* 308 * The default is frequency-per-second. 309 */ 310 val = NANOSEC / val; 311 } else { 312 val *= mult; 313 } 314 315 profile_create(val, name, kind); 316} 317 318/*ARGSUSED*/ 319static void 320profile_destroy(void *arg, dtrace_id_t id, void *parg) 321{ 322 profile_probe_t *prof = parg; 323 324 ASSERT(prof->prof_cyclic == CYCLIC_NONE); 325 kmem_free(prof, sizeof (profile_probe_t)); 326 327 ASSERT(profile_total >= 1); 328 atomic_add_32(&profile_total, -1); 329} 330 331/*ARGSUSED*/ 332static void 333profile_online(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when) 334{ 335 profile_probe_t *prof = arg; 336 profile_probe_percpu_t *pcpu; 337 338 pcpu = kmem_zalloc(sizeof (profile_probe_percpu_t), KM_SLEEP); 339 pcpu->profc_probe = prof; 340 341 hdlr->cyh_func = profile_fire; 342 hdlr->cyh_arg = pcpu; 343 hdlr->cyh_level = CY_HIGH_LEVEL; 344 345 when->cyt_interval = prof->prof_interval; 346 when->cyt_when = dtrace_gethrtime() + when->cyt_interval; 347 348 pcpu->profc_expected = when->cyt_when; 349 pcpu->profc_interval = when->cyt_interval; 350} 351 352/*ARGSUSED*/ 353static void 354profile_offline(void *arg, cpu_t *cpu, void *oarg) 355{ 356 profile_probe_percpu_t *pcpu = oarg; 357 358 ASSERT(pcpu->profc_probe == arg); 359 kmem_free(pcpu, sizeof (profile_probe_percpu_t)); 360} 361 362/*ARGSUSED*/ 363static int 364profile_enable(void *arg, dtrace_id_t id, void *parg) 365{ 366 profile_probe_t *prof = parg; 367 cyc_omni_handler_t omni; 368 cyc_handler_t hdlr; 369 cyc_time_t when; 370 371 ASSERT(prof->prof_interval != 0); 372 ASSERT(MUTEX_HELD(&cpu_lock)); 373 374 if (prof->prof_kind == PROF_TICK) { 375 hdlr.cyh_func = profile_tick; 376 hdlr.cyh_arg = prof; 377 hdlr.cyh_level = CY_HIGH_LEVEL; 378 379 when.cyt_interval = prof->prof_interval; 380 when.cyt_when = dtrace_gethrtime() + when.cyt_interval; 381 } else { 382 ASSERT(prof->prof_kind == PROF_PROFILE); 383 omni.cyo_online = profile_online; 384 omni.cyo_offline = profile_offline; 385 omni.cyo_arg = prof; 386 } 387 388 if (prof->prof_kind == PROF_TICK) { 389 prof->prof_cyclic = cyclic_add(&hdlr, &when); 390 } else { 391 prof->prof_cyclic = cyclic_add_omni(&omni); 392 } 393 return (0); 394} 395 396/*ARGSUSED*/ 397static void 398profile_disable(void *arg, dtrace_id_t id, void *parg) 399{ 400 profile_probe_t *prof = parg; 401 402 ASSERT(prof->prof_cyclic != CYCLIC_NONE); 403 ASSERT(MUTEX_HELD(&cpu_lock)); 404 405 cyclic_remove(prof->prof_cyclic); 406 prof->prof_cyclic = CYCLIC_NONE; 407} 408 409/*ARGSUSED*/ 410static int 411profile_usermode(void *arg, dtrace_id_t id, void *parg) 412{ 413 return (CPU->cpu_profile_pc == 0); 414} 415 416static dtrace_pattr_t profile_attr = { 417{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 418{ DTRACE_STABILITY_UNSTABLE, DTRACE_STABILITY_UNSTABLE, DTRACE_CLASS_UNKNOWN }, 419{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 420{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 421{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 422}; 423 424static dtrace_pops_t profile_pops = { 425 profile_provide, 426 NULL, 427 profile_enable, 428 profile_disable, 429 NULL, 430 NULL, 431 NULL, 432 NULL, 433 profile_usermode, 434 profile_destroy 435}; 436 437static int 438profile_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 439{ 440 switch (cmd) { 441 case DDI_ATTACH: 442 break; 443 case DDI_RESUME: 444 return (DDI_SUCCESS); 445 default: 446 return (DDI_FAILURE); 447 } 448 449 if (ddi_create_minor_node(devi, "profile", S_IFCHR, 0, 450 DDI_PSEUDO, NULL) == DDI_FAILURE || 451 dtrace_register("profile", &profile_attr, 452 DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER, NULL, 453 &profile_pops, NULL, &profile_id) != 0) { 454 ddi_remove_minor_node(devi, NULL); 455 return (DDI_FAILURE); 456 } 457 458 profile_max = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, 459 "profile-max-probes", PROFILE_MAX_DEFAULT); 460 461 ddi_report_dev(devi); 462 profile_devi = devi; 463 return (DDI_SUCCESS); 464} 465 466static int 467profile_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 468{ 469 switch (cmd) { 470 case DDI_DETACH: 471 break; 472 case DDI_SUSPEND: 473 return (DDI_SUCCESS); 474 default: 475 return (DDI_FAILURE); 476 } 477 478 if (dtrace_unregister(profile_id) != 0) 479 return (DDI_FAILURE); 480 481 ddi_remove_minor_node(devi, NULL); 482 return (DDI_SUCCESS); 483} 484 485/*ARGSUSED*/ 486static int 487profile_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 488{ 489 int error; 490 491 switch (infocmd) { 492 case DDI_INFO_DEVT2DEVINFO: 493 *result = (void *)profile_devi; 494 error = DDI_SUCCESS; 495 break; 496 case DDI_INFO_DEVT2INSTANCE: 497 *result = (void *)0; 498 error = DDI_SUCCESS; 499 break; 500 default: 501 error = DDI_FAILURE; 502 } 503 return (error); 504} 505 506/*ARGSUSED*/ 507static int 508profile_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) 509{ 510 return (0); 511} 512 513static struct cb_ops profile_cb_ops = { 514 profile_open, /* open */ 515 nodev, /* close */ 516 nulldev, /* strategy */ 517 nulldev, /* print */ 518 nodev, /* dump */ 519 nodev, /* read */ 520 nodev, /* write */ 521 nodev, /* ioctl */ 522 nodev, /* devmap */ 523 nodev, /* mmap */ 524 nodev, /* segmap */ 525 nochpoll, /* poll */ 526 ddi_prop_op, /* cb_prop_op */ 527 0, /* streamtab */ 528 D_NEW | D_MP /* Driver compatibility flag */ 529}; 530 531static struct dev_ops profile_ops = { 532 DEVO_REV, /* devo_rev, */ 533 0, /* refcnt */ 534 profile_info, /* get_dev_info */ 535 nulldev, /* identify */ 536 nulldev, /* probe */ 537 profile_attach, /* attach */ 538 profile_detach, /* detach */ 539 nodev, /* reset */ 540 &profile_cb_ops, /* driver operations */ 541 NULL, /* bus operations */ 542 nodev, /* dev power */ 543 ddi_quiesce_not_needed, /* quiesce */ 544}; 545 546/* 547 * Module linkage information for the kernel. 548 */ 549static struct modldrv modldrv = { 550 &mod_driverops, /* module type (this is a pseudo driver) */ 551 "Profile Interrupt Tracing", /* name of module */ 552 &profile_ops, /* driver ops */ 553}; 554 555static struct modlinkage modlinkage = { 556 MODREV_1, 557 (void *)&modldrv, 558 NULL 559}; 560 561int 562_init(void) 563{ 564 return (mod_install(&modlinkage)); 565} 566 567int 568_info(struct modinfo *modinfop) 569{ 570 return (mod_info(&modlinkage, modinfop)); 571} 572 573int 574_fini(void) 575{ 576 return (mod_remove(&modlinkage)); 577} 578