kern_clock.c revision 330897
1/*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1982, 1986, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 37 */ 38 39#include <sys/cdefs.h> 40__FBSDID("$FreeBSD: stable/11/sys/kern/kern_clock.c 330897 2018-03-14 03:19:51Z eadler $"); 41 42#include "opt_kdb.h" 43#include "opt_device_polling.h" 44#include "opt_hwpmc_hooks.h" 45#include "opt_ntp.h" 46#include "opt_watchdog.h" 47 48#include <sys/param.h> 49#include <sys/systm.h> 50#include <sys/callout.h> 51#include <sys/kdb.h> 52#include <sys/kernel.h> 53#include <sys/kthread.h> 54#include <sys/ktr.h> 55#include <sys/lock.h> 56#include <sys/mutex.h> 57#include <sys/proc.h> 58#include <sys/resource.h> 59#include <sys/resourcevar.h> 60#include <sys/sched.h> 61#include <sys/sdt.h> 62#include <sys/signalvar.h> 63#include <sys/sleepqueue.h> 64#include <sys/smp.h> 65#include <vm/vm.h> 66#include <vm/pmap.h> 67#include <vm/vm_map.h> 68#include <sys/sysctl.h> 69#include <sys/bus.h> 70#include <sys/interrupt.h> 71#include <sys/limits.h> 72#include <sys/timetc.h> 73 74#ifdef GPROF 75#include <sys/gmon.h> 76#endif 77 78#ifdef HWPMC_HOOKS 79#include <sys/pmckern.h> 80PMC_SOFT_DEFINE( , , clock, hard); 81PMC_SOFT_DEFINE( , , clock, stat); 82PMC_SOFT_DEFINE_EX( , , clock, prof, \ 83 cpu_startprofclock, cpu_stopprofclock); 84#endif 85 86#ifdef DEVICE_POLLING 87extern void hardclock_device_poll(void); 88#endif /* DEVICE_POLLING */ 89 90static void initclocks(void *dummy); 91SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL); 92 93/* Spin-lock protecting profiling statistics. */ 94static struct mtx time_lock; 95 96SDT_PROVIDER_DECLARE(sched); 97SDT_PROBE_DEFINE2(sched, , , tick, "struct thread *", "struct proc *"); 98 99static int 100sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS) 101{ 102 int error; 103 long cp_time[CPUSTATES]; 104#ifdef SCTL_MASK32 105 int i; 106 unsigned int cp_time32[CPUSTATES]; 107#endif 108 109 read_cpu_time(cp_time); 110#ifdef SCTL_MASK32 111 if (req->flags & SCTL_MASK32) { 112 if (!req->oldptr) 113 return SYSCTL_OUT(req, 0, sizeof(cp_time32)); 114 for (i = 0; i < CPUSTATES; i++) 115 cp_time32[i] = (unsigned int)cp_time[i]; 116 error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); 117 } else 118#endif 119 { 120 if (!req->oldptr) 121 return SYSCTL_OUT(req, 0, sizeof(cp_time)); 122 error = SYSCTL_OUT(req, cp_time, sizeof(cp_time)); 123 } 124 return error; 125} 126 127SYSCTL_PROC(_kern, OID_AUTO, cp_time, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, 128 0,0, sysctl_kern_cp_time, "LU", "CPU time statistics"); 129 130static long empty[CPUSTATES]; 131 132static int 133sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS) 134{ 135 struct pcpu *pcpu; 136 int error; 137 int c; 138 long *cp_time; 139#ifdef SCTL_MASK32 140 unsigned int cp_time32[CPUSTATES]; 141 int i; 142#endif 143 144 if (!req->oldptr) { 145#ifdef SCTL_MASK32 146 if (req->flags & SCTL_MASK32) 147 return SYSCTL_OUT(req, 0, sizeof(cp_time32) * (mp_maxid + 1)); 148 else 149#endif 150 return SYSCTL_OUT(req, 0, sizeof(long) * CPUSTATES * (mp_maxid + 1)); 151 } 152 for (error = 0, c = 0; error == 0 && c <= mp_maxid; c++) { 153 if (!CPU_ABSENT(c)) { 154 pcpu = pcpu_find(c); 155 cp_time = pcpu->pc_cp_time; 156 } else { 157 cp_time = empty; 158 } 159#ifdef SCTL_MASK32 160 if (req->flags & SCTL_MASK32) { 161 for (i = 0; i < CPUSTATES; i++) 162 cp_time32[i] = (unsigned int)cp_time[i]; 163 error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); 164 } else 165#endif 166 error = SYSCTL_OUT(req, cp_time, sizeof(long) * CPUSTATES); 167 } 168 return error; 169} 170 171SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, 172 0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics"); 173 174#ifdef DEADLKRES 175static const char *blessed[] = { 176 "getblk", 177 "so_snd_sx", 178 "so_rcv_sx", 179 NULL 180}; 181static int slptime_threshold = 1800; 182static int blktime_threshold = 900; 183static int sleepfreq = 3; 184 185static void 186deadlkres(void) 187{ 188 struct proc *p; 189 struct thread *td; 190 void *wchan; 191 int blkticks, i, slpticks, slptype, tryl, tticks; 192 193 tryl = 0; 194 for (;;) { 195 blkticks = blktime_threshold * hz; 196 slpticks = slptime_threshold * hz; 197 198 /* 199 * Avoid to sleep on the sx_lock in order to avoid a possible 200 * priority inversion problem leading to starvation. 201 * If the lock can't be held after 100 tries, panic. 202 */ 203 if (!sx_try_slock(&allproc_lock)) { 204 if (tryl > 100) 205 panic("%s: possible deadlock detected on allproc_lock\n", 206 __func__); 207 tryl++; 208 pause("allproc", sleepfreq * hz); 209 continue; 210 } 211 tryl = 0; 212 FOREACH_PROC_IN_SYSTEM(p) { 213 PROC_LOCK(p); 214 if (p->p_state == PRS_NEW) { 215 PROC_UNLOCK(p); 216 continue; 217 } 218 FOREACH_THREAD_IN_PROC(p, td) { 219 220 thread_lock(td); 221 if (TD_ON_LOCK(td)) { 222 223 /* 224 * The thread should be blocked on a 225 * turnstile, simply check if the 226 * turnstile channel is in good state. 227 */ 228 MPASS(td->td_blocked != NULL); 229 230 tticks = ticks - td->td_blktick; 231 thread_unlock(td); 232 if (tticks > blkticks) { 233 234 /* 235 * Accordingly with provided 236 * thresholds, this thread is 237 * stuck for too long on a 238 * turnstile. 239 */ 240 PROC_UNLOCK(p); 241 sx_sunlock(&allproc_lock); 242 panic("%s: possible deadlock detected for %p, blocked for %d ticks\n", 243 __func__, td, tticks); 244 } 245 } else if (TD_IS_SLEEPING(td) && 246 TD_ON_SLEEPQ(td)) { 247 248 /* 249 * Check if the thread is sleeping on a 250 * lock, otherwise skip the check. 251 * Drop the thread lock in order to 252 * avoid a LOR with the sleepqueue 253 * spinlock. 254 */ 255 wchan = td->td_wchan; 256 tticks = ticks - td->td_slptick; 257 thread_unlock(td); 258 slptype = sleepq_type(wchan); 259 if ((slptype == SLEEPQ_SX || 260 slptype == SLEEPQ_LK) && 261 tticks > slpticks) { 262 263 /* 264 * Accordingly with provided 265 * thresholds, this thread is 266 * stuck for too long on a 267 * sleepqueue. 268 * However, being on a 269 * sleepqueue, we might still 270 * check for the blessed 271 * list. 272 */ 273 tryl = 0; 274 for (i = 0; blessed[i] != NULL; 275 i++) { 276 if (!strcmp(blessed[i], 277 td->td_wmesg)) { 278 tryl = 1; 279 break; 280 } 281 } 282 if (tryl != 0) { 283 tryl = 0; 284 continue; 285 } 286 PROC_UNLOCK(p); 287 sx_sunlock(&allproc_lock); 288 panic("%s: possible deadlock detected for %p, blocked for %d ticks\n", 289 __func__, td, tticks); 290 } 291 } else 292 thread_unlock(td); 293 } 294 PROC_UNLOCK(p); 295 } 296 sx_sunlock(&allproc_lock); 297 298 /* Sleep for sleepfreq seconds. */ 299 pause("-", sleepfreq * hz); 300 } 301} 302 303static struct kthread_desc deadlkres_kd = { 304 "deadlkres", 305 deadlkres, 306 (struct thread **)NULL 307}; 308 309SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd); 310 311static SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW, 0, 312 "Deadlock resolver"); 313SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RW, 314 &slptime_threshold, 0, 315 "Number of seconds within is valid to sleep on a sleepqueue"); 316SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RW, 317 &blktime_threshold, 0, 318 "Number of seconds within is valid to block on a turnstile"); 319SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RW, &sleepfreq, 0, 320 "Number of seconds between any deadlock resolver thread run"); 321#endif /* DEADLKRES */ 322 323void 324read_cpu_time(long *cp_time) 325{ 326 struct pcpu *pc; 327 int i, j; 328 329 /* Sum up global cp_time[]. */ 330 bzero(cp_time, sizeof(long) * CPUSTATES); 331 CPU_FOREACH(i) { 332 pc = pcpu_find(i); 333 for (j = 0; j < CPUSTATES; j++) 334 cp_time[j] += pc->pc_cp_time[j]; 335 } 336} 337 338#include <sys/watchdog.h> 339 340static int watchdog_ticks; 341static int watchdog_enabled; 342static void watchdog_fire(void); 343static void watchdog_config(void *, u_int, int *); 344 345static void 346watchdog_attach(void) 347{ 348 EVENTHANDLER_REGISTER(watchdog_list, watchdog_config, NULL, 0); 349} 350 351/* 352 * Clock handling routines. 353 * 354 * This code is written to operate with two timers that run independently of 355 * each other. 356 * 357 * The main timer, running hz times per second, is used to trigger interval 358 * timers, timeouts and rescheduling as needed. 359 * 360 * The second timer handles kernel and user profiling, 361 * and does resource use estimation. If the second timer is programmable, 362 * it is randomized to avoid aliasing between the two clocks. For example, 363 * the randomization prevents an adversary from always giving up the cpu 364 * just before its quantum expires. Otherwise, it would never accumulate 365 * cpu ticks. The mean frequency of the second timer is stathz. 366 * 367 * If no second timer exists, stathz will be zero; in this case we drive 368 * profiling and statistics off the main clock. This WILL NOT be accurate; 369 * do not do it unless absolutely necessary. 370 * 371 * The statistics clock may (or may not) be run at a higher rate while 372 * profiling. This profile clock runs at profhz. We require that profhz 373 * be an integral multiple of stathz. 374 * 375 * If the statistics clock is running fast, it must be divided by the ratio 376 * profhz/stathz for statistics. (For profiling, every tick counts.) 377 * 378 * Time-of-day is maintained using a "timecounter", which may or may 379 * not be related to the hardware generating the above mentioned 380 * interrupts. 381 */ 382 383int stathz; 384int profhz; 385int profprocs; 386volatile int ticks; 387int psratio; 388 389static DPCPU_DEFINE(int, pcputicks); /* Per-CPU version of ticks. */ 390#ifdef DEVICE_POLLING 391static int devpoll_run = 0; 392#endif 393 394/* 395 * Initialize clock frequencies and start both clocks running. 396 */ 397/* ARGSUSED*/ 398static void 399initclocks(dummy) 400 void *dummy; 401{ 402 register int i; 403 404 /* 405 * Set divisors to 1 (normal case) and let the machine-specific 406 * code do its bit. 407 */ 408 mtx_init(&time_lock, "time lock", NULL, MTX_DEF); 409 cpu_initclocks(); 410 411 /* 412 * Compute profhz/stathz, and fix profhz if needed. 413 */ 414 i = stathz ? stathz : hz; 415 if (profhz == 0) 416 profhz = i; 417 psratio = profhz / i; 418 419#ifdef SW_WATCHDOG 420 /* Enable hardclock watchdog now, even if a hardware watchdog exists. */ 421 watchdog_attach(); 422#else 423 /* Volunteer to run a software watchdog. */ 424 if (wdog_software_attach == NULL) 425 wdog_software_attach = watchdog_attach; 426#endif 427} 428 429/* 430 * Each time the real-time timer fires, this function is called on all CPUs. 431 * Note that hardclock() calls hardclock_cpu() for the boot CPU, so only 432 * the other CPUs in the system need to call this function. 433 */ 434void 435hardclock_cpu(int usermode) 436{ 437 struct pstats *pstats; 438 struct thread *td = curthread; 439 struct proc *p = td->td_proc; 440 int flags; 441 442 /* 443 * Run current process's virtual and profile time, as needed. 444 */ 445 pstats = p->p_stats; 446 flags = 0; 447 if (usermode && 448 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) { 449 PROC_ITIMLOCK(p); 450 if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 451 flags |= TDF_ALRMPEND | TDF_ASTPENDING; 452 PROC_ITIMUNLOCK(p); 453 } 454 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) { 455 PROC_ITIMLOCK(p); 456 if (itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 457 flags |= TDF_PROFPEND | TDF_ASTPENDING; 458 PROC_ITIMUNLOCK(p); 459 } 460 thread_lock(td); 461 td->td_flags |= flags; 462 thread_unlock(td); 463 464#ifdef HWPMC_HOOKS 465 if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid))) 466 PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); 467 if (td->td_intr_frame != NULL) 468 PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame); 469#endif 470 callout_process(sbinuptime()); 471} 472 473/* 474 * The real-time timer, interrupting hz times per second. 475 */ 476void 477hardclock(int usermode, uintfptr_t pc) 478{ 479 480 atomic_add_int(&ticks, 1); 481 hardclock_cpu(usermode); 482 tc_ticktock(1); 483 cpu_tick_calibration(); 484 /* 485 * If no separate statistics clock is available, run it from here. 486 * 487 * XXX: this only works for UP 488 */ 489 if (stathz == 0) { 490 profclock(usermode, pc); 491 statclock(usermode); 492 } 493#ifdef DEVICE_POLLING 494 hardclock_device_poll(); /* this is very short and quick */ 495#endif /* DEVICE_POLLING */ 496 if (watchdog_enabled > 0 && --watchdog_ticks <= 0) 497 watchdog_fire(); 498} 499 500void 501hardclock_cnt(int cnt, int usermode) 502{ 503 struct pstats *pstats; 504 struct thread *td = curthread; 505 struct proc *p = td->td_proc; 506 int *t = DPCPU_PTR(pcputicks); 507 int flags, global, newticks; 508 int i; 509 510 /* 511 * Update per-CPU and possibly global ticks values. 512 */ 513 *t += cnt; 514 do { 515 global = ticks; 516 newticks = *t - global; 517 if (newticks <= 0) { 518 if (newticks < -1) 519 *t = global - 1; 520 newticks = 0; 521 break; 522 } 523 } while (!atomic_cmpset_int(&ticks, global, *t)); 524 525 /* 526 * Run current process's virtual and profile time, as needed. 527 */ 528 pstats = p->p_stats; 529 flags = 0; 530 if (usermode && 531 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) { 532 PROC_ITIMLOCK(p); 533 if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], 534 tick * cnt) == 0) 535 flags |= TDF_ALRMPEND | TDF_ASTPENDING; 536 PROC_ITIMUNLOCK(p); 537 } 538 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) { 539 PROC_ITIMLOCK(p); 540 if (itimerdecr(&pstats->p_timer[ITIMER_PROF], 541 tick * cnt) == 0) 542 flags |= TDF_PROFPEND | TDF_ASTPENDING; 543 PROC_ITIMUNLOCK(p); 544 } 545 if (flags != 0) { 546 thread_lock(td); 547 td->td_flags |= flags; 548 thread_unlock(td); 549 } 550 551#ifdef HWPMC_HOOKS 552 if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid))) 553 PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); 554 if (td->td_intr_frame != NULL) 555 PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame); 556#endif 557 /* We are in charge to handle this tick duty. */ 558 if (newticks > 0) { 559 tc_ticktock(newticks); 560#ifdef DEVICE_POLLING 561 /* Dangerous and no need to call these things concurrently. */ 562 if (atomic_cmpset_acq_int(&devpoll_run, 0, 1)) { 563 /* This is very short and quick. */ 564 hardclock_device_poll(); 565 atomic_store_rel_int(&devpoll_run, 0); 566 } 567#endif /* DEVICE_POLLING */ 568 if (watchdog_enabled > 0) { 569 i = atomic_fetchadd_int(&watchdog_ticks, -newticks); 570 if (i > 0 && i <= newticks) 571 watchdog_fire(); 572 } 573 } 574 if (curcpu == CPU_FIRST()) 575 cpu_tick_calibration(); 576} 577 578void 579hardclock_sync(int cpu) 580{ 581 int *t = DPCPU_ID_PTR(cpu, pcputicks); 582 583 *t = ticks; 584} 585 586/* 587 * Compute number of ticks in the specified amount of time. 588 */ 589int 590tvtohz(tv) 591 struct timeval *tv; 592{ 593 register unsigned long ticks; 594 register long sec, usec; 595 596 /* 597 * If the number of usecs in the whole seconds part of the time 598 * difference fits in a long, then the total number of usecs will 599 * fit in an unsigned long. Compute the total and convert it to 600 * ticks, rounding up and adding 1 to allow for the current tick 601 * to expire. Rounding also depends on unsigned long arithmetic 602 * to avoid overflow. 603 * 604 * Otherwise, if the number of ticks in the whole seconds part of 605 * the time difference fits in a long, then convert the parts to 606 * ticks separately and add, using similar rounding methods and 607 * overflow avoidance. This method would work in the previous 608 * case but it is slightly slower and assumes that hz is integral. 609 * 610 * Otherwise, round the time difference down to the maximum 611 * representable value. 612 * 613 * If ints have 32 bits, then the maximum value for any timeout in 614 * 10ms ticks is 248 days. 615 */ 616 sec = tv->tv_sec; 617 usec = tv->tv_usec; 618 if (usec < 0) { 619 sec--; 620 usec += 1000000; 621 } 622 if (sec < 0) { 623#ifdef DIAGNOSTIC 624 if (usec > 0) { 625 sec++; 626 usec -= 1000000; 627 } 628 printf("tvotohz: negative time difference %ld sec %ld usec\n", 629 sec, usec); 630#endif 631 ticks = 1; 632 } else if (sec <= LONG_MAX / 1000000) 633 ticks = howmany(sec * 1000000 + (unsigned long)usec, tick) + 1; 634 else if (sec <= LONG_MAX / hz) 635 ticks = sec * hz 636 + howmany((unsigned long)usec, tick) + 1; 637 else 638 ticks = LONG_MAX; 639 if (ticks > INT_MAX) 640 ticks = INT_MAX; 641 return ((int)ticks); 642} 643 644/* 645 * Start profiling on a process. 646 * 647 * Kernel profiling passes proc0 which never exits and hence 648 * keeps the profile clock running constantly. 649 */ 650void 651startprofclock(p) 652 register struct proc *p; 653{ 654 655 PROC_LOCK_ASSERT(p, MA_OWNED); 656 if (p->p_flag & P_STOPPROF) 657 return; 658 if ((p->p_flag & P_PROFIL) == 0) { 659 p->p_flag |= P_PROFIL; 660 mtx_lock(&time_lock); 661 if (++profprocs == 1) 662 cpu_startprofclock(); 663 mtx_unlock(&time_lock); 664 } 665} 666 667/* 668 * Stop profiling on a process. 669 */ 670void 671stopprofclock(p) 672 register struct proc *p; 673{ 674 675 PROC_LOCK_ASSERT(p, MA_OWNED); 676 if (p->p_flag & P_PROFIL) { 677 if (p->p_profthreads != 0) { 678 while (p->p_profthreads != 0) { 679 p->p_flag |= P_STOPPROF; 680 msleep(&p->p_profthreads, &p->p_mtx, PPAUSE, 681 "stopprof", 0); 682 } 683 } 684 if ((p->p_flag & P_PROFIL) == 0) 685 return; 686 p->p_flag &= ~P_PROFIL; 687 mtx_lock(&time_lock); 688 if (--profprocs == 0) 689 cpu_stopprofclock(); 690 mtx_unlock(&time_lock); 691 } 692} 693 694/* 695 * Statistics clock. Updates rusage information and calls the scheduler 696 * to adjust priorities of the active thread. 697 * 698 * This should be called by all active processors. 699 */ 700void 701statclock(int usermode) 702{ 703 704 statclock_cnt(1, usermode); 705} 706 707void 708statclock_cnt(int cnt, int usermode) 709{ 710 struct rusage *ru; 711 struct vmspace *vm; 712 struct thread *td; 713 struct proc *p; 714 long rss; 715 long *cp_time; 716 717 td = curthread; 718 p = td->td_proc; 719 720 cp_time = (long *)PCPU_PTR(cp_time); 721 if (usermode) { 722 /* 723 * Charge the time as appropriate. 724 */ 725 td->td_uticks += cnt; 726 if (p->p_nice > NZERO) 727 cp_time[CP_NICE] += cnt; 728 else 729 cp_time[CP_USER] += cnt; 730 } else { 731 /* 732 * Came from kernel mode, so we were: 733 * - handling an interrupt, 734 * - doing syscall or trap work on behalf of the current 735 * user process, or 736 * - spinning in the idle loop. 737 * Whichever it is, charge the time as appropriate. 738 * Note that we charge interrupts to the current process, 739 * regardless of whether they are ``for'' that process, 740 * so that we know how much of its real time was spent 741 * in ``non-process'' (i.e., interrupt) work. 742 */ 743 if ((td->td_pflags & TDP_ITHREAD) || 744 td->td_intr_nesting_level >= 2) { 745 td->td_iticks += cnt; 746 cp_time[CP_INTR] += cnt; 747 } else { 748 td->td_pticks += cnt; 749 td->td_sticks += cnt; 750 if (!TD_IS_IDLETHREAD(td)) 751 cp_time[CP_SYS] += cnt; 752 else 753 cp_time[CP_IDLE] += cnt; 754 } 755 } 756 757 /* Update resource usage integrals and maximums. */ 758 MPASS(p->p_vmspace != NULL); 759 vm = p->p_vmspace; 760 ru = &td->td_ru; 761 ru->ru_ixrss += pgtok(vm->vm_tsize) * cnt; 762 ru->ru_idrss += pgtok(vm->vm_dsize) * cnt; 763 ru->ru_isrss += pgtok(vm->vm_ssize) * cnt; 764 rss = pgtok(vmspace_resident_count(vm)); 765 if (ru->ru_maxrss < rss) 766 ru->ru_maxrss = rss; 767 KTR_POINT2(KTR_SCHED, "thread", sched_tdname(td), "statclock", 768 "prio:%d", td->td_priority, "stathz:%d", (stathz)?stathz:hz); 769 SDT_PROBE2(sched, , , tick, td, td->td_proc); 770 thread_lock_flags(td, MTX_QUIET); 771 for ( ; cnt > 0; cnt--) 772 sched_clock(td); 773 thread_unlock(td); 774#ifdef HWPMC_HOOKS 775 if (td->td_intr_frame != NULL) 776 PMC_SOFT_CALL_TF( , , clock, stat, td->td_intr_frame); 777#endif 778} 779 780void 781profclock(int usermode, uintfptr_t pc) 782{ 783 784 profclock_cnt(1, usermode, pc); 785} 786 787void 788profclock_cnt(int cnt, int usermode, uintfptr_t pc) 789{ 790 struct thread *td; 791#ifdef GPROF 792 struct gmonparam *g; 793 uintfptr_t i; 794#endif 795 796 td = curthread; 797 if (usermode) { 798 /* 799 * Came from user mode; CPU was in user state. 800 * If this process is being profiled, record the tick. 801 * if there is no related user location yet, don't 802 * bother trying to count it. 803 */ 804 if (td->td_proc->p_flag & P_PROFIL) 805 addupc_intr(td, pc, cnt); 806 } 807#ifdef GPROF 808 else { 809 /* 810 * Kernel statistics are just like addupc_intr, only easier. 811 */ 812 g = &_gmonparam; 813 if (g->state == GMON_PROF_ON && pc >= g->lowpc) { 814 i = PC_TO_I(g, pc); 815 if (i < g->textsize) { 816 KCOUNT(g, i) += cnt; 817 } 818 } 819 } 820#endif 821#ifdef HWPMC_HOOKS 822 if (td->td_intr_frame != NULL) 823 PMC_SOFT_CALL_TF( , , clock, prof, td->td_intr_frame); 824#endif 825} 826 827/* 828 * Return information about system clocks. 829 */ 830static int 831sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS) 832{ 833 struct clockinfo clkinfo; 834 /* 835 * Construct clockinfo structure. 836 */ 837 bzero(&clkinfo, sizeof(clkinfo)); 838 clkinfo.hz = hz; 839 clkinfo.tick = tick; 840 clkinfo.profhz = profhz; 841 clkinfo.stathz = stathz ? stathz : hz; 842 return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); 843} 844 845SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, 846 CTLTYPE_STRUCT|CTLFLAG_RD|CTLFLAG_MPSAFE, 847 0, 0, sysctl_kern_clockrate, "S,clockinfo", 848 "Rate and period of various kernel clocks"); 849 850static void 851watchdog_config(void *unused __unused, u_int cmd, int *error) 852{ 853 u_int u; 854 855 u = cmd & WD_INTERVAL; 856 if (u >= WD_TO_1SEC) { 857 watchdog_ticks = (1 << (u - WD_TO_1SEC)) * hz; 858 watchdog_enabled = 1; 859 *error = 0; 860 } else { 861 watchdog_enabled = 0; 862 } 863} 864 865/* 866 * Handle a watchdog timeout by dumping interrupt information and 867 * then either dropping to DDB or panicking. 868 */ 869static void 870watchdog_fire(void) 871{ 872 int nintr; 873 uint64_t inttotal; 874 u_long *curintr; 875 char *curname; 876 877 curintr = intrcnt; 878 curname = intrnames; 879 inttotal = 0; 880 nintr = sintrcnt / sizeof(u_long); 881 882 printf("interrupt total\n"); 883 while (--nintr >= 0) { 884 if (*curintr) 885 printf("%-12s %20lu\n", curname, *curintr); 886 curname += strlen(curname) + 1; 887 inttotal += *curintr++; 888 } 889 printf("Total %20ju\n", (uintmax_t)inttotal); 890 891#if defined(KDB) && !defined(KDB_UNATTENDED) 892 kdb_backtrace(); 893 kdb_enter(KDB_WHY_WATCHDOG, "watchdog timeout"); 894#else 895 panic("watchdog timeout"); 896#endif 897} 898