1/* $NetBSD: tprof.c,v 1.23 2023/04/11 10:07:12 msaitoh Exp $ */ 2 3/*- 4 * Copyright (c)2008,2009,2010 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29#include <sys/cdefs.h> 30__KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.23 2023/04/11 10:07:12 msaitoh Exp $"); 31 32#include <sys/param.h> 33#include <sys/systm.h> 34#include <sys/kernel.h> 35 36#include <sys/callout.h> 37#include <sys/conf.h> 38#include <sys/cpu.h> 39#include <sys/kmem.h> 40#include <sys/module.h> 41#include <sys/percpu.h> 42#include <sys/poll.h> 43#include <sys/proc.h> 44#include <sys/queue.h> 45#include <sys/select.h> 46#include <sys/workqueue.h> 47#include <sys/xcall.h> 48 49#include <dev/tprof/tprof.h> 50#include <dev/tprof/tprof_ioctl.h> 51 52#include "ioconf.h" 53 54#ifndef TPROF_HZ 55#define TPROF_HZ 10000 56#endif 57 58/* 59 * locking order: 60 * tprof_reader_lock -> tprof_lock 61 * tprof_startstop_lock -> tprof_lock 62 */ 63 64/* 65 * protected by: 66 * L: tprof_lock 67 * R: tprof_reader_lock 68 * S: tprof_startstop_lock 69 * s: writer should hold tprof_startstop_lock and tprof_lock 70 * reader should hold tprof_startstop_lock or tprof_lock 71 */ 72 73typedef struct tprof_buf { 74 u_int b_used; 75 u_int b_size; 76 u_int b_overflow; 77 u_int b_unused; 78 STAILQ_ENTRY(tprof_buf) b_list; 79 tprof_sample_t b_data[]; 80} tprof_buf_t; 81#define TPROF_BUF_BYTESIZE(sz) \ 82 (sizeof(tprof_buf_t) + (sz) * sizeof(tprof_sample_t)) 83#define TPROF_MAX_SAMPLES_PER_BUF TPROF_HZ 84 85typedef struct { 86 tprof_buf_t *c_buf; 87 uint32_t c_cpuid; 88 struct work c_work; 89 callout_t c_callout; 90} __aligned(CACHE_LINE_SIZE) tprof_cpu_t; 91 92typedef struct tprof_backend { 93 /* 94 * tprof_backend_softc_t must be passed as an argument to the interrupt 95 * handler, but since this is difficult to implement in armv7/v8. Then, 96 * tprof_backend is exposed. Additionally, softc must be placed at the 97 * beginning of struct tprof_backend. 98 */ 99 tprof_backend_softc_t tb_softc; 100 101 const char *tb_name; 102 const tprof_backend_ops_t *tb_ops; 103 LIST_ENTRY(tprof_backend) tb_list; 104} tprof_backend_t; 105 106static kmutex_t tprof_lock; 107static u_int tprof_nworker; /* L: # of running worker LWPs */ 108static lwp_t *tprof_owner; 109static STAILQ_HEAD(, tprof_buf) tprof_list; /* L: global buffer list */ 110static u_int tprof_nbuf_on_list; /* L: # of buffers on tprof_list */ 111static struct workqueue *tprof_wq; 112static struct percpu *tprof_cpus __read_mostly; /* tprof_cpu_t * */ 113static u_int tprof_samples_per_buf; 114static u_int tprof_max_buf; 115 116tprof_backend_t *tprof_backend; /* S: */ 117static LIST_HEAD(, tprof_backend) tprof_backends = 118 LIST_HEAD_INITIALIZER(tprof_backend); /* S: */ 119 120static kmutex_t tprof_reader_lock; 121static kcondvar_t tprof_reader_cv; /* L: */ 122static off_t tprof_reader_offset; /* R: */ 123 124static kmutex_t tprof_startstop_lock; 125static kcondvar_t tprof_cv; /* L: */ 126static struct selinfo tprof_selp; /* L: */ 127 128static struct tprof_stat tprof_stat; /* L: */ 129 130static tprof_cpu_t * 131tprof_cpu_direct(struct cpu_info *ci) 132{ 133 tprof_cpu_t **cp; 134 135 cp = percpu_getptr_remote(tprof_cpus, ci); 136 return *cp; 137} 138 139static tprof_cpu_t * 140tprof_cpu(struct cpu_info *ci) 141{ 142 tprof_cpu_t *c; 143 144 /* 145 * As long as xcalls are blocked -- e.g., by kpreempt_disable 146 * -- the percpu object will not be swapped and destroyed. We 147 * can't write to it, because the data may have already been 148 * moved to a new buffer, but we can safely read from it. 149 */ 150 kpreempt_disable(); 151 c = tprof_cpu_direct(ci); 152 kpreempt_enable(); 153 154 return c; 155} 156 157static tprof_cpu_t * 158tprof_curcpu(void) 159{ 160 161 return tprof_cpu(curcpu()); 162} 163 164static tprof_buf_t * 165tprof_buf_alloc(void) 166{ 167 tprof_buf_t *new; 168 u_int size = tprof_samples_per_buf; 169 170 new = kmem_alloc(TPROF_BUF_BYTESIZE(size), KM_SLEEP); 171 new->b_used = 0; 172 new->b_size = size; 173 new->b_overflow = 0; 174 return new; 175} 176 177static void 178tprof_buf_free(tprof_buf_t *buf) 179{ 180 181 kmem_free(buf, TPROF_BUF_BYTESIZE(buf->b_size)); 182} 183 184static tprof_buf_t * 185tprof_buf_switch(tprof_cpu_t *c, tprof_buf_t *new) 186{ 187 tprof_buf_t *old; 188 189 old = c->c_buf; 190 c->c_buf = new; 191 return old; 192} 193 194static tprof_buf_t * 195tprof_buf_refresh(void) 196{ 197 tprof_cpu_t * const c = tprof_curcpu(); 198 tprof_buf_t *new; 199 200 new = tprof_buf_alloc(); 201 return tprof_buf_switch(c, new); 202} 203 204static void 205tprof_worker(struct work *wk, void *dummy) 206{ 207 tprof_cpu_t * const c = tprof_curcpu(); 208 tprof_buf_t *buf; 209 tprof_backend_t *tb; 210 bool shouldstop; 211 212 KASSERT(wk == &c->c_work); 213 KASSERT(dummy == NULL); 214 215 /* 216 * Get a per cpu buffer. 217 */ 218 buf = tprof_buf_refresh(); 219 220 /* 221 * and put it on the global list for read(2). 222 */ 223 mutex_enter(&tprof_lock); 224 tb = tprof_backend; 225 shouldstop = (tb == NULL || tb->tb_softc.sc_ctr_running_mask == 0); 226 if (shouldstop) { 227 KASSERT(tprof_nworker > 0); 228 tprof_nworker--; 229 cv_broadcast(&tprof_cv); 230 cv_broadcast(&tprof_reader_cv); 231 } 232 if (buf->b_used == 0) { 233 tprof_stat.ts_emptybuf++; 234 } else if (tprof_nbuf_on_list < tprof_max_buf) { 235 tprof_stat.ts_sample += buf->b_used; 236 tprof_stat.ts_overflow += buf->b_overflow; 237 tprof_stat.ts_buf++; 238 STAILQ_INSERT_TAIL(&tprof_list, buf, b_list); 239 tprof_nbuf_on_list++; 240 buf = NULL; 241 selnotify(&tprof_selp, 0, NOTE_SUBMIT); 242 cv_broadcast(&tprof_reader_cv); 243 } else { 244 tprof_stat.ts_dropbuf_sample += buf->b_used; 245 tprof_stat.ts_dropbuf++; 246 } 247 mutex_exit(&tprof_lock); 248 if (buf) 249 tprof_buf_free(buf); 250 251 if (!shouldstop) 252 callout_schedule(&c->c_callout, hz / 8); 253} 254 255static void 256tprof_kick(void *vp) 257{ 258 struct cpu_info * const ci = vp; 259 tprof_cpu_t * const c = tprof_cpu(ci); 260 261 workqueue_enqueue(tprof_wq, &c->c_work, ci); 262} 263 264static void 265tprof_stop1(void) 266{ 267 CPU_INFO_ITERATOR cii; 268 struct cpu_info *ci; 269 270 KASSERT(mutex_owned(&tprof_startstop_lock)); 271 KASSERT(tprof_nworker == 0); 272 273 for (CPU_INFO_FOREACH(cii, ci)) { 274 tprof_cpu_t * const c = tprof_cpu(ci); 275 tprof_buf_t *old; 276 277 old = tprof_buf_switch(c, NULL); 278 if (old != NULL) 279 tprof_buf_free(old); 280 281 callout_destroy(&c->c_callout); 282 } 283 workqueue_destroy(tprof_wq); 284} 285 286static void 287tprof_getinfo(struct tprof_info *info) 288{ 289 tprof_backend_t *tb; 290 291 KASSERT(mutex_owned(&tprof_startstop_lock)); 292 293 memset(info, 0, sizeof(*info)); 294 info->ti_version = TPROF_VERSION; 295 if ((tb = tprof_backend) != NULL) 296 info->ti_ident = tb->tb_ops->tbo_ident(); 297} 298 299static int 300tprof_getncounters(u_int *ncounters) 301{ 302 tprof_backend_t *tb; 303 304 tb = tprof_backend; 305 if (tb == NULL) 306 return ENOENT; 307 308 *ncounters = tb->tb_ops->tbo_ncounters(); 309 return 0; 310} 311 312static void 313tprof_start_cpu(void *arg1, void *arg2) 314{ 315 tprof_backend_t *tb = arg1; 316 tprof_countermask_t runmask = (uintptr_t)arg2; 317 318 tb->tb_ops->tbo_start(runmask); 319} 320 321static void 322tprof_stop_cpu(void *arg1, void *arg2) 323{ 324 tprof_backend_t *tb = arg1; 325 tprof_countermask_t stopmask = (uintptr_t)arg2; 326 327 tb->tb_ops->tbo_stop(stopmask); 328} 329 330static int 331tprof_start(tprof_countermask_t runmask) 332{ 333 CPU_INFO_ITERATOR cii; 334 struct cpu_info *ci; 335 tprof_backend_t *tb; 336 uint64_t xc; 337 int error; 338 bool firstrun; 339 340 KASSERT(mutex_owned(&tprof_startstop_lock)); 341 342 tb = tprof_backend; 343 if (tb == NULL) { 344 error = ENOENT; 345 goto done; 346 } 347 348 runmask &= ~tb->tb_softc.sc_ctr_running_mask; 349 runmask &= tb->tb_softc.sc_ctr_configured_mask; 350 if (runmask == 0) { 351 /* 352 * Targets are already running. 353 * Unconfigured counters are ignored. 354 */ 355 error = 0; 356 goto done; 357 } 358 359 firstrun = (tb->tb_softc.sc_ctr_running_mask == 0); 360 if (firstrun) { 361 if (tb->tb_ops->tbo_establish != NULL) { 362 error = tb->tb_ops->tbo_establish(&tb->tb_softc); 363 if (error != 0) 364 goto done; 365 } 366 367 tprof_samples_per_buf = TPROF_MAX_SAMPLES_PER_BUF; 368 tprof_max_buf = ncpu * 3; 369 error = workqueue_create(&tprof_wq, "tprofmv", tprof_worker, 370 NULL, PRI_NONE, IPL_SOFTCLOCK, WQ_MPSAFE | WQ_PERCPU); 371 if (error != 0) { 372 if (tb->tb_ops->tbo_disestablish != NULL) 373 tb->tb_ops->tbo_disestablish(&tb->tb_softc); 374 goto done; 375 } 376 377 for (CPU_INFO_FOREACH(cii, ci)) { 378 tprof_cpu_t * const c = tprof_cpu(ci); 379 tprof_buf_t *new; 380 tprof_buf_t *old; 381 382 new = tprof_buf_alloc(); 383 old = tprof_buf_switch(c, new); 384 if (old != NULL) { 385 tprof_buf_free(old); 386 } 387 callout_init(&c->c_callout, CALLOUT_MPSAFE); 388 callout_setfunc(&c->c_callout, tprof_kick, ci); 389 } 390 } 391 392 runmask &= tb->tb_softc.sc_ctr_configured_mask; 393 xc = xc_broadcast(0, tprof_start_cpu, tb, (void *)(uintptr_t)runmask); 394 xc_wait(xc); 395 mutex_enter(&tprof_lock); 396 tb->tb_softc.sc_ctr_running_mask |= runmask; 397 mutex_exit(&tprof_lock); 398 399 if (firstrun) { 400 for (CPU_INFO_FOREACH(cii, ci)) { 401 tprof_cpu_t * const c = tprof_cpu(ci); 402 403 mutex_enter(&tprof_lock); 404 tprof_nworker++; 405 mutex_exit(&tprof_lock); 406 workqueue_enqueue(tprof_wq, &c->c_work, ci); 407 } 408 } 409 error = 0; 410 411done: 412 return error; 413} 414 415static void 416tprof_stop(tprof_countermask_t stopmask) 417{ 418 tprof_backend_t *tb; 419 uint64_t xc; 420 421 tb = tprof_backend; 422 if (tb == NULL) 423 return; 424 425 KASSERT(mutex_owned(&tprof_startstop_lock)); 426 stopmask &= tb->tb_softc.sc_ctr_running_mask; 427 if (stopmask == 0) { 428 /* Targets are not running */ 429 goto done; 430 } 431 432 xc = xc_broadcast(0, tprof_stop_cpu, tb, (void *)(uintptr_t)stopmask); 433 xc_wait(xc); 434 mutex_enter(&tprof_lock); 435 tb->tb_softc.sc_ctr_running_mask &= ~stopmask; 436 mutex_exit(&tprof_lock); 437 438 /* All counters have stopped? */ 439 if (tb->tb_softc.sc_ctr_running_mask == 0) { 440 mutex_enter(&tprof_lock); 441 cv_broadcast(&tprof_reader_cv); 442 while (tprof_nworker > 0) 443 cv_wait(&tprof_cv, &tprof_lock); 444 445 mutex_exit(&tprof_lock); 446 447 tprof_stop1(); 448 if (tb->tb_ops->tbo_disestablish != NULL) 449 tb->tb_ops->tbo_disestablish(&tb->tb_softc); 450 } 451done: 452 ; 453} 454 455static void 456tprof_init_percpu_counters_offset(void *vp, void *vp2, struct cpu_info *ci) 457{ 458 uint64_t *counters_offset = vp; 459 u_int counter = (uintptr_t)vp2; 460 461 tprof_backend_t *tb = tprof_backend; 462 tprof_param_t *param = &tb->tb_softc.sc_count[counter].ctr_param; 463 counters_offset[counter] = param->p_value; 464} 465 466static void 467tprof_configure_event_cpu(void *arg1, void *arg2) 468{ 469 tprof_backend_t *tb = arg1; 470 u_int counter = (uintptr_t)arg2; 471 tprof_param_t *param = &tb->tb_softc.sc_count[counter].ctr_param; 472 473 tb->tb_ops->tbo_configure_event(counter, param); 474} 475 476static int 477tprof_configure_event(const tprof_param_t *param) 478{ 479 tprof_backend_t *tb; 480 tprof_backend_softc_t *sc; 481 tprof_param_t *sc_param; 482 uint64_t xc; 483 int c, error; 484 485 if ((param->p_flags & (TPROF_PARAM_USER | TPROF_PARAM_KERN)) == 0) { 486 error = EINVAL; 487 goto done; 488 } 489 490 tb = tprof_backend; 491 if (tb == NULL) { 492 error = ENOENT; 493 goto done; 494 } 495 sc = &tb->tb_softc; 496 497 c = param->p_counter; 498 if (c >= tb->tb_softc.sc_ncounters) { 499 error = EINVAL; 500 goto done; 501 } 502 503 if (tb->tb_ops->tbo_valid_event != NULL) { 504 error = tb->tb_ops->tbo_valid_event(param->p_counter, param); 505 if (error != 0) 506 goto done; 507 } 508 509 /* if already running, stop the counter */ 510 if (ISSET(c, tb->tb_softc.sc_ctr_running_mask)) 511 tprof_stop(__BIT(c)); 512 513 sc->sc_count[c].ctr_bitwidth = 514 tb->tb_ops->tbo_counter_bitwidth(param->p_counter); 515 516 sc_param = &sc->sc_count[c].ctr_param; 517 memcpy(sc_param, param, sizeof(*sc_param)); /* save copy of param */ 518 519 if (ISSET(param->p_flags, TPROF_PARAM_PROFILE)) { 520 uint64_t freq, inum, dnum; 521 522 freq = tb->tb_ops->tbo_counter_estimate_freq(c); 523 sc->sc_count[c].ctr_counter_val = freq / TPROF_HZ; 524 if (sc->sc_count[c].ctr_counter_val == 0) { 525 printf("%s: counter#%d frequency (%"PRIu64") is" 526 " very low relative to TPROF_HZ (%u)\n", __func__, 527 c, freq, TPROF_HZ); 528 sc->sc_count[c].ctr_counter_val = 529 4000000000ULL / TPROF_HZ; 530 } 531 532 switch (param->p_flags & TPROF_PARAM_VALUE2_MASK) { 533 case TPROF_PARAM_VALUE2_SCALE: 534 if (sc_param->p_value2 == 0) 535 break; 536 /* 537 * p_value2 is 64-bit fixed-point 538 * upper 32 bits are the integer part 539 * lower 32 bits are the decimal part 540 */ 541 inum = sc_param->p_value2 >> 32; 542 dnum = sc_param->p_value2 & __BITS(31, 0); 543 sc->sc_count[c].ctr_counter_val = 544 sc->sc_count[c].ctr_counter_val * inum + 545 (sc->sc_count[c].ctr_counter_val * dnum >> 32); 546 if (sc->sc_count[c].ctr_counter_val == 0) 547 sc->sc_count[c].ctr_counter_val = 1; 548 break; 549 case TPROF_PARAM_VALUE2_TRIGGERCOUNT: 550 if (sc_param->p_value2 == 0) 551 sc_param->p_value2 = 1; 552 if (sc_param->p_value2 > 553 __BITS(sc->sc_count[c].ctr_bitwidth - 1, 0)) { 554 sc_param->p_value2 = 555 __BITS(sc->sc_count[c].ctr_bitwidth - 1, 0); 556 } 557 sc->sc_count[c].ctr_counter_val = sc_param->p_value2; 558 break; 559 default: 560 break; 561 } 562 sc->sc_count[c].ctr_counter_reset_val = 563 -sc->sc_count[c].ctr_counter_val; 564 sc->sc_count[c].ctr_counter_reset_val &= 565 __BITS(sc->sc_count[c].ctr_bitwidth - 1, 0); 566 } else { 567 sc->sc_count[c].ctr_counter_val = 0; 568 sc->sc_count[c].ctr_counter_reset_val = 0; 569 } 570 571 /* At this point, p_value is used as an initial value */ 572 percpu_foreach(tb->tb_softc.sc_ctr_offset_percpu, 573 tprof_init_percpu_counters_offset, (void *)(uintptr_t)c); 574 /* On the backend side, p_value is used as the reset value */ 575 sc_param->p_value = tb->tb_softc.sc_count[c].ctr_counter_reset_val; 576 577 xc = xc_broadcast(0, tprof_configure_event_cpu, 578 tb, (void *)(uintptr_t)c); 579 xc_wait(xc); 580 581 mutex_enter(&tprof_lock); 582 /* update counters bitmasks */ 583 SET(tb->tb_softc.sc_ctr_configured_mask, __BIT(c)); 584 CLR(tb->tb_softc.sc_ctr_prof_mask, __BIT(c)); 585 CLR(tb->tb_softc.sc_ctr_ovf_mask, __BIT(c)); 586 /* profiled counter requires overflow handling */ 587 if (ISSET(param->p_flags, TPROF_PARAM_PROFILE)) { 588 SET(tb->tb_softc.sc_ctr_prof_mask, __BIT(c)); 589 SET(tb->tb_softc.sc_ctr_ovf_mask, __BIT(c)); 590 } 591 /* counters with less than 64bits also require overflow handling */ 592 if (sc->sc_count[c].ctr_bitwidth != 64) 593 SET(tb->tb_softc.sc_ctr_ovf_mask, __BIT(c)); 594 mutex_exit(&tprof_lock); 595 596 error = 0; 597 598 done: 599 return error; 600} 601 602static void 603tprof_getcounts_cpu(void *arg1, void *arg2) 604{ 605 tprof_backend_t *tb = arg1; 606 tprof_backend_softc_t *sc = &tb->tb_softc; 607 uint64_t *counters = arg2; 608 uint64_t *counters_offset; 609 unsigned int c; 610 611 tprof_countermask_t configmask = sc->sc_ctr_configured_mask; 612 counters_offset = percpu_getref(sc->sc_ctr_offset_percpu); 613 for (c = 0; c < sc->sc_ncounters; c++) { 614 if (ISSET(configmask, __BIT(c))) { 615 uint64_t ctr = tb->tb_ops->tbo_counter_read(c); 616 counters[c] = counters_offset[c] + 617 ((ctr - sc->sc_count[c].ctr_counter_reset_val) & 618 __BITS(sc->sc_count[c].ctr_bitwidth - 1, 0)); 619 } else 620 counters[c] = 0; 621 } 622 percpu_putref(sc->sc_ctr_offset_percpu); 623} 624 625static int 626tprof_getcounts(tprof_counts_t *counts) 627{ 628 struct cpu_info *ci; 629 tprof_backend_t *tb; 630 uint64_t xc; 631 632 tb = tprof_backend; 633 if (tb == NULL) 634 return ENOENT; 635 636 if (counts->c_cpu >= ncpu) 637 return ESRCH; 638 ci = cpu_lookup(counts->c_cpu); 639 if (ci == NULL) 640 return ESRCH; 641 642 xc = xc_unicast(0, tprof_getcounts_cpu, tb, counts->c_count, ci); 643 xc_wait(xc); 644 645 counts->c_ncounters = tb->tb_softc.sc_ncounters; 646 counts->c_runningmask = tb->tb_softc.sc_ctr_running_mask; 647 return 0; 648} 649 650/* 651 * tprof_clear: drain unread samples. 652 */ 653 654static void 655tprof_clear(void) 656{ 657 tprof_buf_t *buf; 658 659 mutex_enter(&tprof_reader_lock); 660 mutex_enter(&tprof_lock); 661 while ((buf = STAILQ_FIRST(&tprof_list)) != NULL) { 662 if (buf != NULL) { 663 STAILQ_REMOVE_HEAD(&tprof_list, b_list); 664 KASSERT(tprof_nbuf_on_list > 0); 665 tprof_nbuf_on_list--; 666 mutex_exit(&tprof_lock); 667 tprof_buf_free(buf); 668 mutex_enter(&tprof_lock); 669 } 670 } 671 KASSERT(tprof_nbuf_on_list == 0); 672 mutex_exit(&tprof_lock); 673 tprof_reader_offset = 0; 674 mutex_exit(&tprof_reader_lock); 675 676 memset(&tprof_stat, 0, sizeof(tprof_stat)); 677} 678 679static tprof_backend_t * 680tprof_backend_lookup(const char *name) 681{ 682 tprof_backend_t *tb; 683 684 KASSERT(mutex_owned(&tprof_startstop_lock)); 685 686 LIST_FOREACH(tb, &tprof_backends, tb_list) { 687 if (!strcmp(tb->tb_name, name)) { 688 return tb; 689 } 690 } 691 return NULL; 692} 693 694/* -------------------- backend interfaces */ 695 696/* 697 * tprof_sample: record a sample on the per-cpu buffer. 698 * 699 * be careful; can be called in NMI context. 700 * we are bluntly assuming the followings are safe. 701 * curcpu() 702 * curlwp->l_lid 703 * curlwp->l_proc->p_pid 704 */ 705 706void 707tprof_sample(void *unused, const tprof_frame_info_t *tfi) 708{ 709 tprof_cpu_t * const c = tprof_cpu_direct(curcpu()); 710 tprof_buf_t * const buf = c->c_buf; 711 tprof_sample_t *sp; 712 const uintptr_t pc = tfi->tfi_pc; 713 const lwp_t * const l = curlwp; 714 u_int idx; 715 716 idx = buf->b_used; 717 if (__predict_false(idx >= buf->b_size)) { 718 buf->b_overflow++; 719 return; 720 } 721 sp = &buf->b_data[idx]; 722 sp->s_pid = l->l_proc->p_pid; 723 sp->s_lwpid = l->l_lid; 724 sp->s_cpuid = c->c_cpuid; 725 sp->s_flags = ((tfi->tfi_inkernel) ? TPROF_SAMPLE_INKERNEL : 0) | 726 __SHIFTIN(tfi->tfi_counter, TPROF_SAMPLE_COUNTER_MASK); 727 sp->s_pc = pc; 728 buf->b_used = idx + 1; 729} 730 731/* 732 * tprof_backend_register: 733 */ 734 735int 736tprof_backend_register(const char *name, const tprof_backend_ops_t *ops, 737 int vers) 738{ 739 tprof_backend_t *tb; 740 741 if (vers != TPROF_BACKEND_VERSION) 742 return EINVAL; 743 744 mutex_enter(&tprof_startstop_lock); 745 tb = tprof_backend_lookup(name); 746 if (tb != NULL) { 747 mutex_exit(&tprof_startstop_lock); 748 return EEXIST; 749 } 750#if 1 /* XXX for now */ 751 if (!LIST_EMPTY(&tprof_backends)) { 752 mutex_exit(&tprof_startstop_lock); 753 return ENOTSUP; 754 } 755#endif 756 tb = kmem_zalloc(sizeof(*tb), KM_SLEEP); 757 tb->tb_name = name; 758 tb->tb_ops = ops; 759 LIST_INSERT_HEAD(&tprof_backends, tb, tb_list); 760#if 1 /* XXX for now */ 761 if (tprof_backend == NULL) { 762 tprof_backend = tb; 763 } 764#endif 765 mutex_exit(&tprof_startstop_lock); 766 767 /* Init backend softc */ 768 tb->tb_softc.sc_ncounters = tb->tb_ops->tbo_ncounters(); 769 tb->tb_softc.sc_ctr_offset_percpu_size = 770 sizeof(uint64_t) * tb->tb_softc.sc_ncounters; 771 tb->tb_softc.sc_ctr_offset_percpu = 772 percpu_alloc(tb->tb_softc.sc_ctr_offset_percpu_size); 773 774 return 0; 775} 776 777/* 778 * tprof_backend_unregister: 779 */ 780 781int 782tprof_backend_unregister(const char *name) 783{ 784 tprof_backend_t *tb; 785 786 mutex_enter(&tprof_startstop_lock); 787 tb = tprof_backend_lookup(name); 788#if defined(DIAGNOSTIC) 789 if (tb == NULL) { 790 mutex_exit(&tprof_startstop_lock); 791 panic("%s: not found '%s'", __func__, name); 792 } 793#endif /* defined(DIAGNOSTIC) */ 794 if (tb->tb_softc.sc_ctr_running_mask != 0) { 795 mutex_exit(&tprof_startstop_lock); 796 return EBUSY; 797 } 798#if 1 /* XXX for now */ 799 if (tprof_backend == tb) 800 tprof_backend = NULL; 801#endif 802 LIST_REMOVE(tb, tb_list); 803 mutex_exit(&tprof_startstop_lock); 804 805 /* fini backend softc */ 806 percpu_free(tb->tb_softc.sc_ctr_offset_percpu, 807 tb->tb_softc.sc_ctr_offset_percpu_size); 808 809 /* Free backend */ 810 kmem_free(tb, sizeof(*tb)); 811 812 return 0; 813} 814 815/* -------------------- cdevsw interfaces */ 816 817static int 818tprof_open(dev_t dev, int flags, int type, struct lwp *l) 819{ 820 821 if (minor(dev) != 0) 822 return EXDEV; 823 824 mutex_enter(&tprof_lock); 825 if (tprof_owner != NULL) { 826 mutex_exit(&tprof_lock); 827 return EBUSY; 828 } 829 tprof_owner = curlwp; 830 mutex_exit(&tprof_lock); 831 832 return 0; 833} 834 835static int 836tprof_close(dev_t dev, int flags, int type, struct lwp *l) 837{ 838 839 KASSERT(minor(dev) == 0); 840 841 mutex_enter(&tprof_startstop_lock); 842 mutex_enter(&tprof_lock); 843 tprof_owner = NULL; 844 mutex_exit(&tprof_lock); 845 tprof_stop(TPROF_COUNTERMASK_ALL); 846 tprof_clear(); 847 848 tprof_backend_t *tb = tprof_backend; 849 if (tb != NULL) { 850 KASSERT(tb->tb_softc.sc_ctr_running_mask == 0); 851 tb->tb_softc.sc_ctr_configured_mask = 0; 852 tb->tb_softc.sc_ctr_prof_mask = 0; 853 tb->tb_softc.sc_ctr_ovf_mask = 0; 854 } 855 856 mutex_exit(&tprof_startstop_lock); 857 858 return 0; 859} 860 861static int 862tprof_poll(dev_t dev, int events, struct lwp *l) 863{ 864 int revents; 865 866 revents = events & (POLLIN | POLLRDNORM); 867 if (revents == 0) 868 return 0; 869 870 mutex_enter(&tprof_lock); 871 if (STAILQ_EMPTY(&tprof_list)) { 872 revents = 0; 873 selrecord(l, &tprof_selp); 874 } 875 mutex_exit(&tprof_lock); 876 877 return revents; 878} 879 880static void 881filt_tprof_read_detach(struct knote *kn) 882{ 883 mutex_enter(&tprof_lock); 884 selremove_knote(&tprof_selp, kn); 885 mutex_exit(&tprof_lock); 886} 887 888static int 889filt_tprof_read_event(struct knote *kn, long hint) 890{ 891 int rv = 0; 892 893 if ((hint & NOTE_SUBMIT) == 0) 894 mutex_enter(&tprof_lock); 895 896 if (!STAILQ_EMPTY(&tprof_list)) { 897 tprof_buf_t *buf; 898 int64_t n = 0; 899 900 STAILQ_FOREACH(buf, &tprof_list, b_list) { 901 n += buf->b_used; 902 } 903 kn->kn_data = n * sizeof(tprof_sample_t); 904 905 rv = 1; 906 } 907 908 if ((hint & NOTE_SUBMIT) == 0) 909 mutex_exit(&tprof_lock); 910 911 return rv; 912} 913 914static const struct filterops tprof_read_filtops = { 915 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 916 .f_attach = NULL, 917 .f_detach = filt_tprof_read_detach, 918 .f_event = filt_tprof_read_event, 919}; 920 921static int 922tprof_kqfilter(dev_t dev, struct knote *kn) 923{ 924 switch (kn->kn_filter) { 925 case EVFILT_READ: 926 kn->kn_fop = &tprof_read_filtops; 927 mutex_enter(&tprof_lock); 928 selrecord_knote(&tprof_selp, kn); 929 mutex_exit(&tprof_lock); 930 break; 931 default: 932 return EINVAL; 933 } 934 935 return 0; 936} 937 938static int 939tprof_read(dev_t dev, struct uio *uio, int flags) 940{ 941 tprof_buf_t *buf; 942 size_t bytes; 943 size_t resid; 944 size_t done = 0; 945 int error = 0; 946 947 KASSERT(minor(dev) == 0); 948 mutex_enter(&tprof_reader_lock); 949 while (uio->uio_resid > 0 && error == 0) { 950 /* 951 * Take the first buffer from the list. 952 */ 953 mutex_enter(&tprof_lock); 954 buf = STAILQ_FIRST(&tprof_list); 955 if (buf == NULL) { 956 if (tprof_nworker == 0 || done != 0) { 957 mutex_exit(&tprof_lock); 958 error = 0; 959 break; 960 } 961 mutex_exit(&tprof_reader_lock); 962 error = cv_wait_sig(&tprof_reader_cv, &tprof_lock); 963 mutex_exit(&tprof_lock); 964 mutex_enter(&tprof_reader_lock); 965 continue; 966 } 967 STAILQ_REMOVE_HEAD(&tprof_list, b_list); 968 KASSERT(tprof_nbuf_on_list > 0); 969 tprof_nbuf_on_list--; 970 mutex_exit(&tprof_lock); 971 972 /* 973 * Copy it out. 974 */ 975 bytes = MIN(buf->b_used * sizeof(tprof_sample_t) - 976 tprof_reader_offset, uio->uio_resid); 977 resid = uio->uio_resid; 978 error = uiomove((char *)buf->b_data + tprof_reader_offset, 979 bytes, uio); 980 done = resid - uio->uio_resid; 981 tprof_reader_offset += done; 982 983 /* 984 * If we didn't consume the whole buffer, 985 * put it back to the list. 986 */ 987 if (tprof_reader_offset < 988 buf->b_used * sizeof(tprof_sample_t)) { 989 mutex_enter(&tprof_lock); 990 STAILQ_INSERT_HEAD(&tprof_list, buf, b_list); 991 tprof_nbuf_on_list++; 992 cv_broadcast(&tprof_reader_cv); 993 mutex_exit(&tprof_lock); 994 } else { 995 tprof_buf_free(buf); 996 tprof_reader_offset = 0; 997 } 998 } 999 mutex_exit(&tprof_reader_lock); 1000 1001 return error; 1002} 1003 1004static int 1005tprof_ioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l) 1006{ 1007 const tprof_param_t *param; 1008 tprof_counts_t *counts; 1009 int error = 0; 1010 1011 KASSERT(minor(dev) == 0); 1012 1013 switch (cmd) { 1014 case TPROF_IOC_GETINFO: 1015 mutex_enter(&tprof_startstop_lock); 1016 tprof_getinfo(data); 1017 mutex_exit(&tprof_startstop_lock); 1018 break; 1019 case TPROF_IOC_GETNCOUNTERS: 1020 mutex_enter(&tprof_lock); 1021 error = tprof_getncounters((u_int *)data); 1022 mutex_exit(&tprof_lock); 1023 break; 1024 case TPROF_IOC_START: 1025 mutex_enter(&tprof_startstop_lock); 1026 error = tprof_start(*(tprof_countermask_t *)data); 1027 mutex_exit(&tprof_startstop_lock); 1028 break; 1029 case TPROF_IOC_STOP: 1030 mutex_enter(&tprof_startstop_lock); 1031 tprof_stop(*(tprof_countermask_t *)data); 1032 mutex_exit(&tprof_startstop_lock); 1033 break; 1034 case TPROF_IOC_GETSTAT: 1035 mutex_enter(&tprof_lock); 1036 memcpy(data, &tprof_stat, sizeof(tprof_stat)); 1037 mutex_exit(&tprof_lock); 1038 break; 1039 case TPROF_IOC_CONFIGURE_EVENT: 1040 param = data; 1041 mutex_enter(&tprof_startstop_lock); 1042 error = tprof_configure_event(param); 1043 mutex_exit(&tprof_startstop_lock); 1044 break; 1045 case TPROF_IOC_GETCOUNTS: 1046 counts = data; 1047 mutex_enter(&tprof_startstop_lock); 1048 error = tprof_getcounts(counts); 1049 mutex_exit(&tprof_startstop_lock); 1050 break; 1051 default: 1052 error = EINVAL; 1053 break; 1054 } 1055 1056 return error; 1057} 1058 1059const struct cdevsw tprof_cdevsw = { 1060 .d_open = tprof_open, 1061 .d_close = tprof_close, 1062 .d_read = tprof_read, 1063 .d_write = nowrite, 1064 .d_ioctl = tprof_ioctl, 1065 .d_stop = nostop, 1066 .d_tty = notty, 1067 .d_poll = tprof_poll, 1068 .d_mmap = nommap, 1069 .d_kqfilter = tprof_kqfilter, 1070 .d_discard = nodiscard, 1071 .d_flag = D_OTHER | D_MPSAFE 1072}; 1073 1074void 1075tprofattach(int nunits) 1076{ 1077 1078 /* Nothing */ 1079} 1080 1081MODULE(MODULE_CLASS_DRIVER, tprof, NULL); 1082 1083static void 1084tprof_cpu_init(void *vcp, void *vcookie, struct cpu_info *ci) 1085{ 1086 tprof_cpu_t **cp = vcp, *c; 1087 1088 c = kmem_zalloc(sizeof(*c), KM_SLEEP); 1089 c->c_buf = NULL; 1090 c->c_cpuid = cpu_index(ci); 1091 *cp = c; 1092} 1093 1094static void 1095tprof_cpu_fini(void *vcp, void *vcookie, struct cpu_info *ci) 1096{ 1097 tprof_cpu_t **cp = vcp, *c; 1098 1099 c = *cp; 1100 KASSERT(c->c_cpuid == cpu_index(ci)); 1101 KASSERT(c->c_buf == NULL); 1102 kmem_free(c, sizeof(*c)); 1103 *cp = NULL; 1104} 1105 1106static void 1107tprof_driver_init(void) 1108{ 1109 1110 mutex_init(&tprof_lock, MUTEX_DEFAULT, IPL_NONE); 1111 mutex_init(&tprof_reader_lock, MUTEX_DEFAULT, IPL_NONE); 1112 mutex_init(&tprof_startstop_lock, MUTEX_DEFAULT, IPL_NONE); 1113 selinit(&tprof_selp); 1114 cv_init(&tprof_cv, "tprof"); 1115 cv_init(&tprof_reader_cv, "tprof_rd"); 1116 STAILQ_INIT(&tprof_list); 1117 tprof_cpus = percpu_create(sizeof(tprof_cpu_t *), 1118 tprof_cpu_init, tprof_cpu_fini, NULL); 1119} 1120 1121static void 1122tprof_driver_fini(void) 1123{ 1124 1125 percpu_free(tprof_cpus, sizeof(tprof_cpu_t *)); 1126 mutex_destroy(&tprof_lock); 1127 mutex_destroy(&tprof_reader_lock); 1128 mutex_destroy(&tprof_startstop_lock); 1129 seldestroy(&tprof_selp); 1130 cv_destroy(&tprof_cv); 1131 cv_destroy(&tprof_reader_cv); 1132} 1133 1134static int 1135tprof_modcmd(modcmd_t cmd, void *arg) 1136{ 1137 1138 switch (cmd) { 1139 case MODULE_CMD_INIT: 1140 tprof_driver_init(); 1141#if defined(_MODULE) 1142 { 1143 devmajor_t bmajor = NODEVMAJOR; 1144 devmajor_t cmajor = NODEVMAJOR; 1145 int error; 1146 1147 error = devsw_attach("tprof", NULL, &bmajor, 1148 &tprof_cdevsw, &cmajor); 1149 if (error) { 1150 tprof_driver_fini(); 1151 return error; 1152 } 1153 } 1154#endif /* defined(_MODULE) */ 1155 return 0; 1156 1157 case MODULE_CMD_FINI: 1158#if defined(_MODULE) 1159 devsw_detach(NULL, &tprof_cdevsw); 1160#endif /* defined(_MODULE) */ 1161 tprof_driver_fini(); 1162 return 0; 1163 1164 default: 1165 return ENOTTY; 1166 } 1167} 1168