1131962Smp/* $NetBSD: subr_prof.c,v 1.50 2021/08/14 17:51:20 ryo Exp $ */ 259243Sobrien 359243Sobrien/*- 459243Sobrien * Copyright (c) 1982, 1986, 1993 559243Sobrien * The Regents of the University of California. All rights reserved. 659243Sobrien * 759243Sobrien * Redistribution and use in source and binary forms, with or without 859243Sobrien * modification, are permitted provided that the following conditions 959243Sobrien * are met: 1059243Sobrien * 1. Redistributions of source code must retain the above copyright 1159243Sobrien * notice, this list of conditions and the following disclaimer. 1259243Sobrien * 2. Redistributions in binary form must reproduce the above copyright 1359243Sobrien * notice, this list of conditions and the following disclaimer in the 1459243Sobrien * documentation and/or other materials provided with the distribution. 1559243Sobrien * 3. Neither the name of the University nor the names of its contributors 1659243Sobrien * may be used to endorse or promote products derived from this software 17131962Smp * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)subr_prof.c 8.4 (Berkeley) 2/14/95 32 */ 33 34#include <sys/cdefs.h> 35__KERNEL_RCSID(0, "$NetBSD: subr_prof.c,v 1.50 2021/08/14 17:51:20 ryo Exp $"); 36 37#ifdef _KERNEL_OPT 38#include "opt_gprof.h" 39#include "opt_multiprocessor.h" 40#endif 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/kernel.h> 45#include <sys/proc.h> 46#include <sys/mount.h> 47#include <sys/syscallargs.h> 48#include <sys/sysctl.h> 49 50#include <sys/cpu.h> 51 52#ifdef GPROF 53#include <sys/malloc.h> 54#include <sys/gmon.h> 55#include <sys/xcall.h> 56 57MALLOC_DEFINE(M_GPROF, "gprof", "kernel profiling buffer"); 58 59static int sysctl_kern_profiling(SYSCTLFN_ARGS); 60#ifdef MULTIPROCESSOR 61void _gmonparam_merge(struct gmonparam *, struct gmonparam *); 62#endif 63 64/* 65 * Froms is actually a bunch of unsigned shorts indexing tos 66 */ 67struct gmonparam _gmonparam = { .state = GMON_PROF_OFF }; 68 69/* Actual start of the kernel text segment. */ 70extern char kernel_text[]; 71 72extern char etext[]; 73 74 75void 76kmstartup(void) 77{ 78 char *cp; 79 struct gmonparam *p = &_gmonparam; 80 unsigned long size; 81 /* 82 * Round lowpc and highpc to multiples of the density we're using 83 * so the rest of the scaling (here and in gprof) stays in ints. 84 */ 85 p->lowpc = rounddown(((u_long)kernel_text), 86 HISTFRACTION * sizeof(HISTCOUNTER)); 87 p->highpc = roundup((u_long)etext, 88 HISTFRACTION * sizeof(HISTCOUNTER)); 89 p->textsize = p->highpc - p->lowpc; 90 printf("Profiling kernel, textsize=%ld [%lx..%lx]\n", 91 p->textsize, p->lowpc, p->highpc); 92 p->kcountsize = p->textsize / HISTFRACTION; 93 p->hashfraction = HASHFRACTION; 94 p->fromssize = p->textsize / HASHFRACTION; 95 p->tolimit = p->textsize * ARCDENSITY / 100; 96 if (p->tolimit < MINARCS) 97 p->tolimit = MINARCS; 98 else if (p->tolimit > MAXARCS) 99 p->tolimit = MAXARCS; 100 p->tossize = p->tolimit * sizeof(struct tostruct); 101 102 size = p->kcountsize + p->fromssize + p->tossize; 103#ifdef MULTIPROCESSOR 104 CPU_INFO_ITERATOR cii; 105 struct cpu_info *ci; 106 for (CPU_INFO_FOREACH(cii, ci)) { 107 p = malloc(sizeof(struct gmonparam) + size, M_GPROF, 108 M_NOWAIT | M_ZERO); 109 if (p == NULL) { 110 printf("No memory for profiling on %s\n", 111 cpu_name(ci)); 112 /* cannot profile on this cpu */ 113 continue; 114 } 115 memcpy(p, &_gmonparam, sizeof(_gmonparam)); 116 ci->ci_gmon = p; 117 118 /* 119 * To allow profiling to be controlled only by the global 120 * _gmonparam.state, set the default value for each CPU to 121 * GMON_PROF_ON. If _gmonparam.state is not ON, mcount will 122 * not be executed. 123 * This is For compatibility of the kgmon(8) kmem interface. 124 */ 125 p->state = GMON_PROF_ON; 126 127 cp = (char *)(p + 1); 128 p->tos = (struct tostruct *)cp; 129 p->kcount = (u_short *)(cp + p->tossize); 130 p->froms = (u_short *)(cp + p->tossize + p->kcountsize); 131 } 132 133 sysctl_createv(NULL, 0, NULL, NULL, 134 0, CTLTYPE_NODE, "percpu", 135 SYSCTL_DESCR("per cpu profiling information"), 136 NULL, 0, NULL, 0, 137 CTL_KERN, KERN_PROF, GPROF_PERCPU, CTL_EOL); 138 139 for (CPU_INFO_FOREACH(cii, ci)) { 140 if (ci->ci_gmon == NULL) 141 continue; 142 143 sysctl_createv(NULL, 0, NULL, NULL, 144 0, CTLTYPE_NODE, cpu_name(ci), 145 NULL, 146 NULL, 0, NULL, 0, 147 CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci), CTL_EOL); 148 149 sysctl_createv(NULL, 0, NULL, NULL, 150 CTLFLAG_READWRITE, CTLTYPE_INT, "state", 151 SYSCTL_DESCR("Profiling state"), 152 sysctl_kern_profiling, 0, (void *)ci, 0, 153 CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci), 154 GPROF_STATE, CTL_EOL); 155 sysctl_createv(NULL, 0, NULL, NULL, 156 CTLFLAG_READWRITE, CTLTYPE_STRUCT, "count", 157 SYSCTL_DESCR("Array of statistical program counters"), 158 sysctl_kern_profiling, 0, (void *)ci, 0, 159 CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci), 160 GPROF_COUNT, CTL_EOL); 161 sysctl_createv(NULL, 0, NULL, NULL, 162 CTLFLAG_READWRITE, CTLTYPE_STRUCT, "froms", 163 SYSCTL_DESCR("Array indexed by program counter of " 164 "call-from points"), 165 sysctl_kern_profiling, 0, (void *)ci, 0, 166 CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci), 167 GPROF_FROMS, CTL_EOL); 168 sysctl_createv(NULL, 0, NULL, NULL, 169 CTLFLAG_READWRITE, CTLTYPE_STRUCT, "tos", 170 SYSCTL_DESCR("Array of structures describing " 171 "destination of calls and their counts"), 172 sysctl_kern_profiling, 0, (void *)ci, 0, 173 CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci), 174 GPROF_TOS, CTL_EOL); 175 sysctl_createv(NULL, 0, NULL, NULL, 176 CTLFLAG_READWRITE, CTLTYPE_STRUCT, "gmonparam", 177 SYSCTL_DESCR("Structure giving the sizes of the above " 178 "arrays"), 179 sysctl_kern_profiling, 0, (void *)ci, 0, 180 CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci), 181 GPROF_GMONPARAM, CTL_EOL); 182 } 183 184 /* 185 * For minimal compatibility of the kgmon(8) kmem interface, 186 * the _gmonparam and cpu0:ci_gmon share buffers. 187 */ 188 p = curcpu()->ci_gmon; 189 if (p != NULL) { 190 _gmonparam.tos = p->tos; 191 _gmonparam.kcount = p->kcount; 192 _gmonparam.froms = p->froms; 193 } 194#else /* MULTIPROCESSOR */ 195 cp = malloc(size, M_GPROF, M_NOWAIT | M_ZERO); 196 if (cp == 0) { 197 printf("No memory for profiling.\n"); 198 return; 199 } 200 p->tos = (struct tostruct *)cp; 201 cp += p->tossize; 202 p->kcount = (u_short *)cp; 203 cp += p->kcountsize; 204 p->froms = (u_short *)cp; 205#endif /* MULTIPROCESSOR */ 206} 207 208#ifdef MULTIPROCESSOR 209static void 210prof_set_state_xc(void *arg1, void *arg2 __unused) 211{ 212 int state = PTRTOUINT64(arg1); 213 struct gmonparam *gp = curcpu()->ci_gmon; 214 215 if (gp != NULL) 216 gp->state = state; 217} 218#endif /* MULTIPROCESSOR */ 219 220/* 221 * Return kernel profiling information. 222 */ 223/* 224 * sysctl helper routine for kern.profiling subtree. enables/disables 225 * kernel profiling and gives out copies of the profiling data. 226 */ 227static int 228sysctl_kern_profiling(SYSCTLFN_ARGS) 229{ 230 struct sysctlnode node = *rnode; 231 struct gmonparam *gp; 232 int error; 233#ifdef MULTIPROCESSOR 234 CPU_INFO_ITERATOR cii; 235 struct cpu_info *ci, *target_ci; 236 uint64_t where; 237 int state; 238 bool prof_on, do_merge; 239 240 target_ci = (struct cpu_info *)rnode->sysctl_data; 241 do_merge = (oldp != NULL) && (target_ci == NULL) && 242 ((node.sysctl_num == GPROF_COUNT) || 243 (node.sysctl_num == GPROF_FROMS) || 244 (node.sysctl_num == GPROF_TOS)); 245 246 if (do_merge) { 247 /* kern.profiling.{count,froms,tos} */ 248 unsigned long size; 249 char *cp; 250 251 /* allocate temporary gmonparam, and merge results of all CPU */ 252 size = _gmonparam.kcountsize + _gmonparam.fromssize + 253 _gmonparam.tossize; 254 gp = malloc(sizeof(struct gmonparam) + size, M_GPROF, 255 M_NOWAIT | M_ZERO); 256 if (gp == NULL) 257 return ENOMEM; 258 memcpy(gp, &_gmonparam, sizeof(_gmonparam)); 259 cp = (char *)(gp + 1); 260 gp->tos = (struct tostruct *)cp; 261 gp->kcount = (u_short *)(cp + gp->tossize); 262 gp->froms = (u_short *)(cp + gp->tossize + gp->kcountsize); 263 264 for (CPU_INFO_FOREACH(cii, ci)) { 265 if (ci->ci_gmon == NULL) 266 continue; 267 _gmonparam_merge(gp, ci->ci_gmon); 268 } 269 } else if (target_ci != NULL) { 270 /* kern.profiling.percpu.* */ 271 gp = target_ci->ci_gmon; 272 } else { 273 /* kern.profiling.{state,gmonparam} */ 274 gp = &_gmonparam; 275 } 276#else /* MULTIPROCESSOR */ 277 gp = &_gmonparam; 278#endif 279 280 switch (node.sysctl_num) { 281 case GPROF_STATE: 282#ifdef MULTIPROCESSOR 283 /* 284 * if _gmonparam.state is OFF, the state of each CPU is 285 * considered to be OFF, even if it is actually ON. 286 */ 287 if (_gmonparam.state == GMON_PROF_OFF || 288 gp->state == GMON_PROF_OFF) 289 state = GMON_PROF_OFF; 290 else 291 state = GMON_PROF_ON; 292 node.sysctl_data = &state; 293#else 294 node.sysctl_data = &gp->state; 295#endif 296 break; 297 case GPROF_COUNT: 298 node.sysctl_data = gp->kcount; 299 node.sysctl_size = gp->kcountsize; 300 break; 301 case GPROF_FROMS: 302 node.sysctl_data = gp->froms; 303 node.sysctl_size = gp->fromssize; 304 break; 305 case GPROF_TOS: 306 node.sysctl_data = gp->tos; 307 node.sysctl_size = gp->tossize; 308 break; 309 case GPROF_GMONPARAM: 310 node.sysctl_data = gp; 311 node.sysctl_size = sizeof(*gp); 312 break; 313 default: 314 return (EOPNOTSUPP); 315 } 316 317 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 318 if (error || newp == NULL) 319 goto done; 320 321#ifdef MULTIPROCESSOR 322 switch (node.sysctl_num) { 323 case GPROF_STATE: 324 if (target_ci != NULL) { 325 where = xc_unicast(0, prof_set_state_xc, 326 UINT64TOPTR(state), NULL, target_ci); 327 xc_wait(where); 328 329 /* if even one CPU being profiled, enable perfclock. */ 330 prof_on = false; 331 for (CPU_INFO_FOREACH(cii, ci)) { 332 if (ci->ci_gmon == NULL) 333 continue; 334 if (ci->ci_gmon->state != GMON_PROF_OFF) { 335 prof_on = true; 336 break; 337 } 338 } 339 mutex_spin_enter(&proc0.p_stmutex); 340 if (prof_on) 341 startprofclock(&proc0); 342 else 343 stopprofclock(&proc0); 344 mutex_spin_exit(&proc0.p_stmutex); 345 346 if (prof_on) { 347 _gmonparam.state = GMON_PROF_ON; 348 } else { 349 _gmonparam.state = GMON_PROF_OFF; 350 /* 351 * when _gmonparam.state and all CPU gmon state 352 * are OFF, all CPU states should be ON so that 353 * the entire CPUs profiling can be controlled 354 * by _gmonparam.state only. 355 */ 356 for (CPU_INFO_FOREACH(cii, ci)) { 357 if (ci->ci_gmon == NULL) 358 continue; 359 ci->ci_gmon->state = GMON_PROF_ON; 360 } 361 } 362 } else { 363 _gmonparam.state = state; 364 where = xc_broadcast(0, prof_set_state_xc, 365 UINT64TOPTR(state), NULL); 366 xc_wait(where); 367 368 mutex_spin_enter(&proc0.p_stmutex); 369 if (state == GMON_PROF_OFF) 370 stopprofclock(&proc0); 371 else 372 startprofclock(&proc0); 373 mutex_spin_exit(&proc0.p_stmutex); 374 } 375 break; 376 case GPROF_COUNT: 377 /* 378 * if 'kern.profiling.{count,froms,tos}' is written, the same 379 * data will be written to 'kern.profiling.percpu.cpuN.xxx' 380 */ 381 if (target_ci == NULL) { 382 for (CPU_INFO_FOREACH(cii, ci)) { 383 if (ci->ci_gmon == NULL) 384 continue; 385 memmove(ci->ci_gmon->kcount, gp->kcount, 386 newlen); 387 } 388 } 389 break; 390 case GPROF_FROMS: 391 if (target_ci == NULL) { 392 for (CPU_INFO_FOREACH(cii, ci)) { 393 if (ci->ci_gmon == NULL) 394 continue; 395 memmove(ci->ci_gmon->froms, gp->froms, newlen); 396 } 397 } 398 break; 399 case GPROF_TOS: 400 if (target_ci == NULL) { 401 for (CPU_INFO_FOREACH(cii, ci)) { 402 if (ci->ci_gmon == NULL) 403 continue; 404 memmove(ci->ci_gmon->tos, gp->tos, newlen); 405 } 406 } 407 break; 408 } 409#else 410 if (node.sysctl_num == GPROF_STATE) { 411 mutex_spin_enter(&proc0.p_stmutex); 412 if (gp->state == GMON_PROF_OFF) 413 stopprofclock(&proc0); 414 else 415 startprofclock(&proc0); 416 mutex_spin_exit(&proc0.p_stmutex); 417 } 418#endif 419 420 done: 421#ifdef MULTIPROCESSOR 422 if (do_merge) 423 free(gp, M_GPROF); 424#endif 425 return error; 426} 427 428SYSCTL_SETUP(sysctl_kern_gprof_setup, "sysctl kern.profiling subtree setup") 429{ 430 431 sysctl_createv(clog, 0, NULL, NULL, 432 CTLFLAG_PERMANENT, 433 CTLTYPE_NODE, "profiling", 434 SYSCTL_DESCR("Profiling information (available)"), 435 NULL, 0, NULL, 0, 436 CTL_KERN, KERN_PROF, CTL_EOL); 437 438 sysctl_createv(clog, 0, NULL, NULL, 439 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 440 CTLTYPE_INT, "state", 441 SYSCTL_DESCR("Profiling state"), 442 sysctl_kern_profiling, 0, NULL, 0, 443 CTL_KERN, KERN_PROF, GPROF_STATE, CTL_EOL); 444 sysctl_createv(clog, 0, NULL, NULL, 445 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 446 CTLTYPE_STRUCT, "count", 447 SYSCTL_DESCR("Array of statistical program counters"), 448 sysctl_kern_profiling, 0, NULL, 0, 449 CTL_KERN, KERN_PROF, GPROF_COUNT, CTL_EOL); 450 sysctl_createv(clog, 0, NULL, NULL, 451 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 452 CTLTYPE_STRUCT, "froms", 453 SYSCTL_DESCR("Array indexed by program counter of " 454 "call-from points"), 455 sysctl_kern_profiling, 0, NULL, 0, 456 CTL_KERN, KERN_PROF, GPROF_FROMS, CTL_EOL); 457 sysctl_createv(clog, 0, NULL, NULL, 458 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 459 CTLTYPE_STRUCT, "tos", 460 SYSCTL_DESCR("Array of structures describing " 461 "destination of calls and their counts"), 462 sysctl_kern_profiling, 0, NULL, 0, 463 CTL_KERN, KERN_PROF, GPROF_TOS, CTL_EOL); 464 sysctl_createv(clog, 0, NULL, NULL, 465 CTLFLAG_PERMANENT, 466 CTLTYPE_STRUCT, "gmonparam", 467 SYSCTL_DESCR("Structure giving the sizes of the above " 468 "arrays"), 469 sysctl_kern_profiling, 0, NULL, 0, 470 CTL_KERN, KERN_PROF, GPROF_GMONPARAM, CTL_EOL); 471} 472#endif /* GPROF */ 473 474/* 475 * Profiling system call. 476 * 477 * The scale factor is a fixed point number with 16 bits of fraction, so that 478 * 1.0 is represented as 0x10000. A scale factor of 0 turns off profiling. 479 */ 480/* ARGSUSED */ 481int 482sys_profil(struct lwp *l, const struct sys_profil_args *uap, register_t *retval) 483{ 484 /* { 485 syscallarg(char *) samples; 486 syscallarg(size_t) size; 487 syscallarg(u_long) offset; 488 syscallarg(u_int) scale; 489 } */ 490 struct proc *p = l->l_proc; 491 struct uprof *upp; 492 493 if (SCARG(uap, scale) > (1 << 16)) 494 return (EINVAL); 495 if (SCARG(uap, scale) == 0) { 496 mutex_spin_enter(&p->p_stmutex); 497 stopprofclock(p); 498 mutex_spin_exit(&p->p_stmutex); 499 return (0); 500 } 501 upp = &p->p_stats->p_prof; 502 503 /* Block profile interrupts while changing state. */ 504 mutex_spin_enter(&p->p_stmutex); 505 upp->pr_off = SCARG(uap, offset); 506 upp->pr_scale = SCARG(uap, scale); 507 upp->pr_base = SCARG(uap, samples); 508 upp->pr_size = SCARG(uap, size); 509 startprofclock(p); 510 mutex_spin_exit(&p->p_stmutex); 511 512 return (0); 513} 514 515/* 516 * Scale is a fixed-point number with the binary point 16 bits 517 * into the value, and is <= 1.0. pc is at most 32 bits, so the 518 * intermediate result is at most 48 bits. 519 */ 520#define PC_TO_INDEX(pc, prof) \ 521 ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \ 522 (u_quad_t)((prof)->pr_scale)) >> 16) & ~1) 523 524/* 525 * Collect user-level profiling statistics; called on a profiling tick, 526 * when a process is running in user-mode. This routine may be called 527 * from an interrupt context. We schedule an AST that will vector us 528 * to trap() with a context in which copyin and copyout will work. 529 * Trap will then call addupc_task(). 530 * 531 * XXX We could use ufetch/ustore here if the profile buffers were 532 * wired. 533 * 534 * Note that we may (rarely) not get around to the AST soon enough, and 535 * lose profile ticks when the next tick overwrites this one, but in this 536 * case the system is overloaded and the profile is probably already 537 * inaccurate. 538 */ 539void 540addupc_intr(struct lwp *l, u_long pc) 541{ 542 struct uprof *prof; 543 struct proc *p; 544 u_int i; 545 546 p = l->l_proc; 547 548 KASSERT(mutex_owned(&p->p_stmutex)); 549 550 prof = &p->p_stats->p_prof; 551 if (pc < prof->pr_off || 552 (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size) 553 return; /* out of range; ignore */ 554 555 mutex_spin_exit(&p->p_stmutex); 556 557 /* XXXSMP */ 558 prof->pr_addr = pc; 559 prof->pr_ticks++; 560 cpu_need_proftick(l); 561 562 mutex_spin_enter(&p->p_stmutex); 563} 564 565/* 566 * Much like before, but we can afford to take faults here. If the 567 * update fails, we simply turn off profiling. 568 */ 569void 570addupc_task(struct lwp *l, u_long pc, u_int ticks) 571{ 572 struct uprof *prof; 573 struct proc *p; 574 void *addr; 575 int error; 576 u_int i; 577 u_short v; 578 579 p = l->l_proc; 580 581 if (ticks == 0) 582 return; 583 584 mutex_spin_enter(&p->p_stmutex); 585 prof = &p->p_stats->p_prof; 586 587 /* Testing P_PROFIL may be unnecessary, but is certainly safe. */ 588 if ((p->p_stflag & PST_PROFIL) == 0 || pc < prof->pr_off || 589 (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size) { 590 mutex_spin_exit(&p->p_stmutex); 591 return; 592 } 593 594 addr = prof->pr_base + i; 595 mutex_spin_exit(&p->p_stmutex); 596 if ((error = copyin(addr, (void *)&v, sizeof(v))) == 0) { 597 v += ticks; 598 error = copyout((void *)&v, addr, sizeof(v)); 599 } 600 if (error != 0) { 601 mutex_spin_enter(&p->p_stmutex); 602 stopprofclock(p); 603 mutex_spin_exit(&p->p_stmutex); 604 } 605} 606