kern_tc.c revision 1817
1/*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 39 * $Id$ 40 */ 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/dkstat.h> 45#include <sys/callout.h> 46#include <sys/kernel.h> 47#include <sys/proc.h> 48#include <sys/resourcevar.h> 49 50#include <machine/cpu.h> 51 52#ifdef GPROF 53#include <sys/gmon.h> 54#endif 55 56/* 57 * Clock handling routines. 58 * 59 * This code is written to operate with two timers that run independently of 60 * each other. The main clock, running hz times per second, is used to keep 61 * track of real time. The second timer handles kernel and user profiling, 62 * and does resource use estimation. If the second timer is programmable, 63 * it is randomized to avoid aliasing between the two clocks. For example, 64 * the randomization prevents an adversary from always giving up the cpu 65 * just before its quantum expires. Otherwise, it would never accumulate 66 * cpu ticks. The mean frequency of the second timer is stathz. 67 * 68 * If no second timer exists, stathz will be zero; in this case we drive 69 * profiling and statistics off the main clock. This WILL NOT be accurate; 70 * do not do it unless absolutely necessary. 71 * 72 * The statistics clock may (or may not) be run at a higher rate while 73 * profiling. This profile clock runs at profhz. We require that profhz 74 * be an integral multiple of stathz. 75 * 76 * If the statistics clock is running fast, it must be divided by the ratio 77 * profhz/stathz for statistics. (For profiling, every tick counts.) 78 */ 79 80/* 81 * TODO: 82 * allocate more timeout table slots when table overflows. 83 */ 84 85/* 86 * Bump a timeval by a small number of usec's. 87 */ 88#define BUMPTIME(t, usec) { \ 89 register volatile struct timeval *tp = (t); \ 90 register long us; \ 91 \ 92 tp->tv_usec = us = tp->tv_usec + (usec); \ 93 if (us >= 1000000) { \ 94 tp->tv_usec = us - 1000000; \ 95 tp->tv_sec++; \ 96 } \ 97} 98 99int stathz; 100int profhz; 101int profprocs; 102int ticks; 103static int psdiv, pscnt; /* prof => stat divider */ 104int psratio; /* ratio: prof / stat */ 105 106volatile struct timeval time; 107volatile struct timeval mono_time; 108 109/* 110 * Initialize clock frequencies and start both clocks running. 111 */ 112void 113initclocks() 114{ 115 register int i; 116 117 /* 118 * Set divisors to 1 (normal case) and let the machine-specific 119 * code do its bit. 120 */ 121 psdiv = pscnt = 1; 122 cpu_initclocks(); 123 124 /* 125 * Compute profhz/stathz, and fix profhz if needed. 126 */ 127 i = stathz ? stathz : hz; 128 if (profhz == 0) 129 profhz = i; 130 psratio = profhz / i; 131} 132 133/* 134 * The real-time timer, interrupting hz times per second. 135 */ 136void 137hardclock(frame) 138 register struct clockframe *frame; 139{ 140 register struct callout *p1; 141 register struct proc *p; 142 register int delta, needsoft; 143 extern int tickdelta; 144 extern long timedelta; 145 146 /* 147 * Update real-time timeout queue. 148 * At front of queue are some number of events which are ``due''. 149 * The time to these is <= 0 and if negative represents the 150 * number of ticks which have passed since it was supposed to happen. 151 * The rest of the q elements (times > 0) are events yet to happen, 152 * where the time for each is given as a delta from the previous. 153 * Decrementing just the first of these serves to decrement the time 154 * to all events. 155 */ 156 needsoft = 0; 157 for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) { 158 if (--p1->c_time > 0) 159 break; 160 needsoft = 1; 161 if (p1->c_time == 0) 162 break; 163 } 164 165 p = curproc; 166 if (p) { 167 register struct pstats *pstats; 168 169 /* 170 * Run current process's virtual and profile time, as needed. 171 */ 172 pstats = p->p_stats; 173 if (CLKF_USERMODE(frame) && 174 timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 175 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 176 psignal(p, SIGVTALRM); 177 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 178 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 179 psignal(p, SIGPROF); 180 } 181 182 /* 183 * If no separate statistics clock is available, run it from here. 184 */ 185 if (stathz == 0) 186 statclock(frame); 187 188 /* 189 * Increment the time-of-day. The increment is just ``tick'' unless 190 * we are still adjusting the clock; see adjtime(). 191 */ 192 ticks++; 193 if (timedelta == 0) 194 delta = tick; 195 else { 196 delta = tick + tickdelta; 197 timedelta -= tickdelta; 198 } 199 BUMPTIME(&time, delta); 200 BUMPTIME(&mono_time, delta); 201 202 /* 203 * Process callouts at a very low cpu priority, so we don't keep the 204 * relatively high clock interrupt priority any longer than necessary. 205 */ 206 if (needsoft) { 207 if (CLKF_BASEPRI(frame)) { 208 /* 209 * Save the overhead of a software interrupt; 210 * it will happen as soon as we return, so do it now. 211 */ 212 (void)splsoftclock(); 213 softclock(); 214 } else 215 setsoftclock(); 216 } 217} 218 219/* 220 * Software (low priority) clock interrupt. 221 * Run periodic events from timeout queue. 222 */ 223/*ARGSUSED*/ 224void 225softclock() 226{ 227 register struct callout *c; 228 register void *arg; 229 register void (*func) __P((void *)); 230 register int s; 231 232 s = splhigh(); 233 while ((c = calltodo.c_next) != NULL && c->c_time <= 0) { 234 func = c->c_func; 235 arg = c->c_arg; 236 calltodo.c_next = c->c_next; 237 c->c_next = callfree; 238 callfree = c; 239 splx(s); 240 (*func)(arg); 241 (void) splhigh(); 242 } 243 splx(s); 244} 245 246/* 247 * timeout -- 248 * Execute a function after a specified length of time. 249 * 250 * untimeout -- 251 * Cancel previous timeout function call. 252 * 253 * See AT&T BCI Driver Reference Manual for specification. This 254 * implementation differs from that one in that no identification 255 * value is returned from timeout, rather, the original arguments 256 * to timeout are used to identify entries for untimeout. 257 */ 258void 259timeout(ftn, arg, ticks) 260 void (*ftn) __P((void *)); 261 void *arg; 262 register int ticks; 263{ 264 register struct callout *new, *p, *t; 265 register int s; 266 267 if (ticks <= 0) 268 ticks = 1; 269 270 /* Lock out the clock. */ 271 s = splhigh(); 272 273 /* Fill in the next free callout structure. */ 274 if (callfree == NULL) 275 panic("timeout table full"); 276 new = callfree; 277 callfree = new->c_next; 278 new->c_arg = arg; 279 new->c_func = ftn; 280 281 /* 282 * The time for each event is stored as a difference from the time 283 * of the previous event on the queue. Walk the queue, correcting 284 * the ticks argument for queue entries passed. Correct the ticks 285 * value for the queue entry immediately after the insertion point 286 * as well. Watch out for negative c_time values; these represent 287 * overdue events. 288 */ 289 for (p = &calltodo; 290 (t = p->c_next) != NULL && ticks > t->c_time; p = t) 291 if (t->c_time > 0) 292 ticks -= t->c_time; 293 new->c_time = ticks; 294 if (t != NULL) 295 t->c_time -= ticks; 296 297 /* Insert the new entry into the queue. */ 298 p->c_next = new; 299 new->c_next = t; 300 splx(s); 301} 302 303void 304untimeout(ftn, arg) 305 void (*ftn) __P((void *)); 306 void *arg; 307{ 308 register struct callout *p, *t; 309 register int s; 310 311 s = splhigh(); 312 for (p = &calltodo; (t = p->c_next) != NULL; p = t) 313 if (t->c_func == ftn && t->c_arg == arg) { 314 /* Increment next entry's tick count. */ 315 if (t->c_next && t->c_time > 0) 316 t->c_next->c_time += t->c_time; 317 318 /* Move entry from callout queue to callfree queue. */ 319 p->c_next = t->c_next; 320 t->c_next = callfree; 321 callfree = t; 322 break; 323 } 324 splx(s); 325} 326 327/* 328 * Compute number of hz until specified time. Used to 329 * compute third argument to timeout() from an absolute time. 330 */ 331int 332hzto(tv) 333 struct timeval *tv; 334{ 335 register long ticks, sec; 336 int s; 337 338 /* 339 * If number of milliseconds will fit in 32 bit arithmetic, 340 * then compute number of milliseconds to time and scale to 341 * ticks. Otherwise just compute number of hz in time, rounding 342 * times greater than representible to maximum value. 343 * 344 * Delta times less than 25 days can be computed ``exactly''. 345 * Maximum value for any timeout in 10ms ticks is 250 days. 346 */ 347 s = splhigh(); 348 sec = tv->tv_sec - time.tv_sec; 349 if (sec <= 0x7fffffff / 1000 - 1000) 350 ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 351 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 352 else if (sec <= 0x7fffffff / hz) 353 ticks = sec * hz; 354 else 355 ticks = 0x7fffffff; 356 splx(s); 357 return (ticks); 358} 359 360/* 361 * Start profiling on a process. 362 * 363 * Kernel profiling passes proc0 which never exits and hence 364 * keeps the profile clock running constantly. 365 */ 366void 367startprofclock(p) 368 register struct proc *p; 369{ 370 int s; 371 372 if ((p->p_flag & P_PROFIL) == 0) { 373 p->p_flag |= P_PROFIL; 374 if (++profprocs == 1 && stathz != 0) { 375 s = splstatclock(); 376 psdiv = pscnt = psratio; 377 setstatclockrate(profhz); 378 splx(s); 379 } 380 } 381} 382 383/* 384 * Stop profiling on a process. 385 */ 386void 387stopprofclock(p) 388 register struct proc *p; 389{ 390 int s; 391 392 if (p->p_flag & P_PROFIL) { 393 p->p_flag &= ~P_PROFIL; 394 if (--profprocs == 0 && stathz != 0) { 395 s = splstatclock(); 396 psdiv = pscnt = 1; 397 setstatclockrate(stathz); 398 splx(s); 399 } 400 } 401} 402 403int dk_ndrive = DK_NDRIVE; 404 405/* 406 * Statistics clock. Grab profile sample, and if divider reaches 0, 407 * do process and kernel statistics. 408 */ 409void 410statclock(frame) 411 register struct clockframe *frame; 412{ 413#ifdef GPROF 414 register struct gmonparam *g; 415#endif 416 register struct proc *p; 417 register int i; 418 419 if (CLKF_USERMODE(frame)) { 420 p = curproc; 421 if (p->p_flag & P_PROFIL) 422 addupc_intr(p, CLKF_PC(frame), 1); 423 if (--pscnt > 0) 424 return; 425 /* 426 * Came from user mode; CPU was in user state. 427 * If this process is being profiled record the tick. 428 */ 429 p->p_uticks++; 430 if (p->p_nice > NZERO) 431 cp_time[CP_NICE]++; 432 else 433 cp_time[CP_USER]++; 434 } else { 435#ifdef GPROF 436 /* 437 * Kernel statistics are just like addupc_intr, only easier. 438 */ 439 g = &_gmonparam; 440 if (g->state == GMON_PROF_ON) { 441 i = CLKF_PC(frame) - g->lowpc; 442 if (i < g->textsize) { 443 i /= HISTFRACTION * sizeof(*g->kcount); 444 g->kcount[i]++; 445 } 446 } 447#endif 448 if (--pscnt > 0) 449 return; 450 /* 451 * Came from kernel mode, so we were: 452 * - handling an interrupt, 453 * - doing syscall or trap work on behalf of the current 454 * user process, or 455 * - spinning in the idle loop. 456 * Whichever it is, charge the time as appropriate. 457 * Note that we charge interrupts to the current process, 458 * regardless of whether they are ``for'' that process, 459 * so that we know how much of its real time was spent 460 * in ``non-process'' (i.e., interrupt) work. 461 */ 462 p = curproc; 463 if (CLKF_INTR(frame)) { 464 if (p != NULL) 465 p->p_iticks++; 466 cp_time[CP_INTR]++; 467 } else if (p != NULL) { 468 p->p_sticks++; 469 cp_time[CP_SYS]++; 470 } else 471 cp_time[CP_IDLE]++; 472 } 473 pscnt = psdiv; 474 475 /* 476 * We maintain statistics shown by user-level statistics 477 * programs: the amount of time in each cpu state, and 478 * the amount of time each of DK_NDRIVE ``drives'' is busy. 479 * 480 * XXX should either run linked list of drives, or (better) 481 * grab timestamps in the start & done code. 482 */ 483 for (i = 0; i < DK_NDRIVE; i++) 484 if (dk_busy & (1 << i)) 485 dk_time[i]++; 486 487 /* 488 * We adjust the priority of the current process. The priority of 489 * a process gets worse as it accumulates CPU time. The cpu usage 490 * estimator (p_estcpu) is increased here. The formula for computing 491 * priorities (in kern_synch.c) will compute a different value each 492 * time p_estcpu increases by 4. The cpu usage estimator ramps up 493 * quite quickly when the process is running (linearly), and decays 494 * away exponentially, at a rate which is proportionally slower when 495 * the system is busy. The basic principal is that the system will 496 * 90% forget that the process used a lot of CPU time in 5 * loadav 497 * seconds. This causes the system to favor processes which haven't 498 * run much recently, and to round-robin among other processes. 499 */ 500 if (p != NULL) { 501 p->p_cpticks++; 502 if (++p->p_estcpu == 0) 503 p->p_estcpu--; 504 if ((p->p_estcpu & 3) == 0) { 505 resetpriority(p); 506 if (p->p_priority >= PUSER) 507 p->p_priority = p->p_usrpri; 508 } 509 } 510} 511 512/* 513 * Return information about system clocks. 514 */ 515int 516sysctl_clockrate(where, sizep) 517 register char *where; 518 size_t *sizep; 519{ 520 struct clockinfo clkinfo; 521 522 /* 523 * Construct clockinfo structure. 524 */ 525 clkinfo.hz = hz; 526 clkinfo.tick = tick; 527 clkinfo.profhz = profhz; 528 clkinfo.stathz = stathz ? stathz : hz; 529 return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo))); 530} 531