kern_timeout.c revision 2320
1/*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 39 * $Id: kern_clock.c,v 1.4 1994/08/18 22:34:58 wollman Exp $ 40 */ 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/dkstat.h> 45#include <sys/callout.h> 46#include <sys/kernel.h> 47#include <sys/proc.h> 48#include <sys/resourcevar.h> 49#include <vm/vm.h> 50 51#include <machine/cpu.h> 52 53#ifdef GPROF 54#include <sys/gmon.h> 55#endif 56 57/* Does anybody else really care about these? */ 58struct callout *callfree, *callout, calltodo; 59int ncallout; 60 61/* Some of these don't belong here, but it's easiest to concentrate them. */ 62long cp_time[CPUSTATES]; 63long dk_seek[DK_NDRIVE]; 64long dk_time[DK_NDRIVE]; 65long dk_wds[DK_NDRIVE]; 66long dk_wpms[DK_NDRIVE]; 67long dk_xfer[DK_NDRIVE]; 68 69int dk_busy; 70int dk_ndrive = DK_NDRIVE; 71 72long tk_cancc; 73long tk_nin; 74long tk_nout; 75long tk_rawcc; 76 77/* 78 * Clock handling routines. 79 * 80 * This code is written to operate with two timers that run independently of 81 * each other. The main clock, running hz times per second, is used to keep 82 * track of real time. The second timer handles kernel and user profiling, 83 * and does resource use estimation. If the second timer is programmable, 84 * it is randomized to avoid aliasing between the two clocks. For example, 85 * the randomization prevents an adversary from always giving up the cpu 86 * just before its quantum expires. Otherwise, it would never accumulate 87 * cpu ticks. The mean frequency of the second timer is stathz. 88 * 89 * If no second timer exists, stathz will be zero; in this case we drive 90 * profiling and statistics off the main clock. This WILL NOT be accurate; 91 * do not do it unless absolutely necessary. 92 * 93 * The statistics clock may (or may not) be run at a higher rate while 94 * profiling. This profile clock runs at profhz. We require that profhz 95 * be an integral multiple of stathz. 96 * 97 * If the statistics clock is running fast, it must be divided by the ratio 98 * profhz/stathz for statistics. (For profiling, every tick counts.) 99 */ 100 101/* 102 * TODO: 103 * allocate more timeout table slots when table overflows. 104 */ 105 106/* 107 * Bump a timeval by a small number of usec's. 108 */ 109#define BUMPTIME(t, usec) { \ 110 register volatile struct timeval *tp = (t); \ 111 register long us; \ 112 \ 113 tp->tv_usec = us = tp->tv_usec + (usec); \ 114 if (us >= 1000000) { \ 115 tp->tv_usec = us - 1000000; \ 116 tp->tv_sec++; \ 117 } \ 118} 119 120int stathz; 121int profhz; 122int profprocs; 123int ticks; 124static int psdiv, pscnt; /* prof => stat divider */ 125int psratio; /* ratio: prof / stat */ 126 127volatile struct timeval time; 128volatile struct timeval mono_time; 129 130/* 131 * Initialize clock frequencies and start both clocks running. 132 */ 133void 134initclocks() 135{ 136 register int i; 137 138 /* 139 * Set divisors to 1 (normal case) and let the machine-specific 140 * code do its bit. 141 */ 142 psdiv = pscnt = 1; 143 cpu_initclocks(); 144 145 /* 146 * Compute profhz/stathz, and fix profhz if needed. 147 */ 148 i = stathz ? stathz : hz; 149 if (profhz == 0) 150 profhz = i; 151 psratio = profhz / i; 152} 153 154/* 155 * The real-time timer, interrupting hz times per second. 156 */ 157void 158hardclock(frame) 159 register struct clockframe *frame; 160{ 161 register struct callout *p1; 162 register struct proc *p; 163 register int delta, needsoft; 164 extern int tickdelta; 165 extern long timedelta; 166 167 /* 168 * Update real-time timeout queue. 169 * At front of queue are some number of events which are ``due''. 170 * The time to these is <= 0 and if negative represents the 171 * number of ticks which have passed since it was supposed to happen. 172 * The rest of the q elements (times > 0) are events yet to happen, 173 * where the time for each is given as a delta from the previous. 174 * Decrementing just the first of these serves to decrement the time 175 * to all events. 176 */ 177 needsoft = 0; 178 for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) { 179 if (--p1->c_time > 0) 180 break; 181 needsoft = 1; 182 if (p1->c_time == 0) 183 break; 184 } 185 186 p = curproc; 187 if (p) { 188 register struct pstats *pstats; 189 190 /* 191 * Run current process's virtual and profile time, as needed. 192 */ 193 pstats = p->p_stats; 194 if (CLKF_USERMODE(frame) && 195 timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 196 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 197 psignal(p, SIGVTALRM); 198 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 199 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 200 psignal(p, SIGPROF); 201 } 202 203 /* 204 * If no separate statistics clock is available, run it from here. 205 */ 206 if (stathz == 0) 207 statclock(frame); 208 209 /* 210 * Increment the time-of-day. The increment is just ``tick'' unless 211 * we are still adjusting the clock; see adjtime(). 212 */ 213 ticks++; 214 if (timedelta == 0) 215 delta = tick; 216 else { 217 delta = tick + tickdelta; 218 timedelta -= tickdelta; 219 } 220 BUMPTIME(&time, delta); 221 BUMPTIME(&mono_time, delta); 222 223 /* 224 * Process callouts at a very low cpu priority, so we don't keep the 225 * relatively high clock interrupt priority any longer than necessary. 226 */ 227 if (needsoft) { 228 if (CLKF_BASEPRI(frame)) { 229 /* 230 * Save the overhead of a software interrupt; 231 * it will happen as soon as we return, so do it now. 232 */ 233 (void)splsoftclock(); 234 softclock(); 235 } else 236 setsoftclock(); 237 } 238} 239 240/* 241 * Software (low priority) clock interrupt. 242 * Run periodic events from timeout queue. 243 */ 244/*ARGSUSED*/ 245void 246softclock() 247{ 248 register struct callout *c; 249 register void *arg; 250 register void (*func) __P((void *)); 251 register int s; 252 253 s = splhigh(); 254 while ((c = calltodo.c_next) != NULL && c->c_time <= 0) { 255 func = c->c_func; 256 arg = c->c_arg; 257 calltodo.c_next = c->c_next; 258 c->c_next = callfree; 259 callfree = c; 260 splx(s); 261 (*func)(arg); 262 (void) splhigh(); 263 } 264 splx(s); 265} 266 267/* 268 * timeout -- 269 * Execute a function after a specified length of time. 270 * 271 * untimeout -- 272 * Cancel previous timeout function call. 273 * 274 * See AT&T BCI Driver Reference Manual for specification. This 275 * implementation differs from that one in that no identification 276 * value is returned from timeout, rather, the original arguments 277 * to timeout are used to identify entries for untimeout. 278 */ 279void 280timeout(ftn, arg, ticks) 281 timeout_t ftn; 282 void *arg; 283 register int ticks; 284{ 285 register struct callout *new, *p, *t; 286 register int s; 287 288 if (ticks <= 0) 289 ticks = 1; 290 291 /* Lock out the clock. */ 292 s = splhigh(); 293 294 /* Fill in the next free callout structure. */ 295 if (callfree == NULL) 296 panic("timeout table full"); 297 new = callfree; 298 callfree = new->c_next; 299 new->c_arg = arg; 300 new->c_func = ftn; 301 302 /* 303 * The time for each event is stored as a difference from the time 304 * of the previous event on the queue. Walk the queue, correcting 305 * the ticks argument for queue entries passed. Correct the ticks 306 * value for the queue entry immediately after the insertion point 307 * as well. Watch out for negative c_time values; these represent 308 * overdue events. 309 */ 310 for (p = &calltodo; 311 (t = p->c_next) != NULL && ticks > t->c_time; p = t) 312 if (t->c_time > 0) 313 ticks -= t->c_time; 314 new->c_time = ticks; 315 if (t != NULL) 316 t->c_time -= ticks; 317 318 /* Insert the new entry into the queue. */ 319 p->c_next = new; 320 new->c_next = t; 321 splx(s); 322} 323 324void 325untimeout(ftn, arg) 326 timeout_t ftn; 327 void *arg; 328{ 329 register struct callout *p, *t; 330 register int s; 331 332 s = splhigh(); 333 for (p = &calltodo; (t = p->c_next) != NULL; p = t) 334 if (t->c_func == ftn && t->c_arg == arg) { 335 /* Increment next entry's tick count. */ 336 if (t->c_next && t->c_time > 0) 337 t->c_next->c_time += t->c_time; 338 339 /* Move entry from callout queue to callfree queue. */ 340 p->c_next = t->c_next; 341 t->c_next = callfree; 342 callfree = t; 343 break; 344 } 345 splx(s); 346} 347 348/* 349 * Compute number of hz until specified time. Used to 350 * compute third argument to timeout() from an absolute time. 351 */ 352int 353hzto(tv) 354 struct timeval *tv; 355{ 356 register long ticks, sec; 357 int s; 358 359 /* 360 * If number of milliseconds will fit in 32 bit arithmetic, 361 * then compute number of milliseconds to time and scale to 362 * ticks. Otherwise just compute number of hz in time, rounding 363 * times greater than representible to maximum value. 364 * 365 * Delta times less than 25 days can be computed ``exactly''. 366 * Maximum value for any timeout in 10ms ticks is 250 days. 367 */ 368 s = splhigh(); 369 sec = tv->tv_sec - time.tv_sec; 370 if (sec <= 0x7fffffff / 1000 - 1000) 371 ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 372 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 373 else if (sec <= 0x7fffffff / hz) 374 ticks = sec * hz; 375 else 376 ticks = 0x7fffffff; 377 splx(s); 378 return (ticks); 379} 380 381/* 382 * Start profiling on a process. 383 * 384 * Kernel profiling passes proc0 which never exits and hence 385 * keeps the profile clock running constantly. 386 */ 387void 388startprofclock(p) 389 register struct proc *p; 390{ 391 int s; 392 393 if ((p->p_flag & P_PROFIL) == 0) { 394 p->p_flag |= P_PROFIL; 395 if (++profprocs == 1 && stathz != 0) { 396 s = splstatclock(); 397 psdiv = pscnt = psratio; 398 setstatclockrate(profhz); 399 splx(s); 400 } 401 } 402} 403 404/* 405 * Stop profiling on a process. 406 */ 407void 408stopprofclock(p) 409 register struct proc *p; 410{ 411 int s; 412 413 if (p->p_flag & P_PROFIL) { 414 p->p_flag &= ~P_PROFIL; 415 if (--profprocs == 0 && stathz != 0) { 416 s = splstatclock(); 417 psdiv = pscnt = 1; 418 setstatclockrate(stathz); 419 splx(s); 420 } 421 } 422} 423 424/* 425 * Statistics clock. Grab profile sample, and if divider reaches 0, 426 * do process and kernel statistics. 427 */ 428void 429statclock(frame) 430 register struct clockframe *frame; 431{ 432#ifdef GPROF 433 register struct gmonparam *g; 434#endif 435 register struct proc *p = curproc; 436 register int i; 437 438 if (p) { 439 struct pstats *pstats; 440 struct rusage *ru; 441 struct vmspace *vm; 442 443 /* bump the resource usage of integral space use */ 444 if ((pstats = p->p_stats) && (ru = &pstats->p_ru) && (vm = p->p_vmspace)) { 445 ru->ru_ixrss += vm->vm_tsize * PAGE_SIZE / 1024; 446 ru->ru_idrss += vm->vm_dsize * PAGE_SIZE / 1024; 447 ru->ru_isrss += vm->vm_ssize * PAGE_SIZE / 1024; 448 if ((vm->vm_pmap.pm_stats.resident_count * PAGE_SIZE / 1024) > 449 ru->ru_maxrss) { 450 ru->ru_maxrss = 451 vm->vm_pmap.pm_stats.resident_count * PAGE_SIZE / 1024; 452 } 453 } 454 } 455 456 if (CLKF_USERMODE(frame)) { 457 if (p->p_flag & P_PROFIL) 458 addupc_intr(p, CLKF_PC(frame), 1); 459 if (--pscnt > 0) 460 return; 461 /* 462 * Came from user mode; CPU was in user state. 463 * If this process is being profiled record the tick. 464 */ 465 p->p_uticks++; 466 if (p->p_nice > NZERO) 467 cp_time[CP_NICE]++; 468 else 469 cp_time[CP_USER]++; 470 } else { 471#ifdef GPROF 472 /* 473 * Kernel statistics are just like addupc_intr, only easier. 474 */ 475 g = &_gmonparam; 476 if (g->state == GMON_PROF_ON) { 477 i = CLKF_PC(frame) - g->lowpc; 478 if (i < g->textsize) { 479 i /= HISTFRACTION * sizeof(*g->kcount); 480 g->kcount[i]++; 481 } 482 } 483#endif 484 if (--pscnt > 0) 485 return; 486 /* 487 * Came from kernel mode, so we were: 488 * - handling an interrupt, 489 * - doing syscall or trap work on behalf of the current 490 * user process, or 491 * - spinning in the idle loop. 492 * Whichever it is, charge the time as appropriate. 493 * Note that we charge interrupts to the current process, 494 * regardless of whether they are ``for'' that process, 495 * so that we know how much of its real time was spent 496 * in ``non-process'' (i.e., interrupt) work. 497 */ 498 if (CLKF_INTR(frame)) { 499 if (p != NULL) 500 p->p_iticks++; 501 cp_time[CP_INTR]++; 502 } else if (p != NULL) { 503 p->p_sticks++; 504 cp_time[CP_SYS]++; 505 } else 506 cp_time[CP_IDLE]++; 507 } 508 pscnt = psdiv; 509 510 /* 511 * We maintain statistics shown by user-level statistics 512 * programs: the amount of time in each cpu state, and 513 * the amount of time each of DK_NDRIVE ``drives'' is busy. 514 * 515 * XXX should either run linked list of drives, or (better) 516 * grab timestamps in the start & done code. 517 */ 518 for (i = 0; i < DK_NDRIVE; i++) 519 if (dk_busy & (1 << i)) 520 dk_time[i]++; 521 522 /* 523 * We adjust the priority of the current process. The priority of 524 * a process gets worse as it accumulates CPU time. The cpu usage 525 * estimator (p_estcpu) is increased here. The formula for computing 526 * priorities (in kern_synch.c) will compute a different value each 527 * time p_estcpu increases by 4. The cpu usage estimator ramps up 528 * quite quickly when the process is running (linearly), and decays 529 * away exponentially, at a rate which is proportionally slower when 530 * the system is busy. The basic principal is that the system will 531 * 90% forget that the process used a lot of CPU time in 5 * loadav 532 * seconds. This causes the system to favor processes which haven't 533 * run much recently, and to round-robin among other processes. 534 */ 535 if (p != NULL) { 536 p->p_cpticks++; 537 if (++p->p_estcpu == 0) 538 p->p_estcpu--; 539 if ((p->p_estcpu & 3) == 0) { 540 resetpriority(p); 541 if (p->p_priority >= PUSER) 542 p->p_priority = p->p_usrpri; 543 } 544 } 545} 546 547/* 548 * Return information about system clocks. 549 */ 550int 551sysctl_clockrate(where, sizep) 552 register char *where; 553 size_t *sizep; 554{ 555 struct clockinfo clkinfo; 556 557 /* 558 * Construct clockinfo structure. 559 */ 560 clkinfo.hz = hz; 561 clkinfo.tick = tick; 562 clkinfo.profhz = profhz; 563 clkinfo.stathz = stathz ? stathz : hz; 564 return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo))); 565} 566