kern_timeout.c revision 29680
150477Speter/*- 21817Sdg * Copyright (c) 1982, 1986, 1991, 1993 31817Sdg * The Regents of the University of California. All rights reserved. 41541Srgrimes * (c) UNIX System Laboratories, Inc. 51541Srgrimes * All or some portions of this file are derived from material licensed 61541Srgrimes * to the University of California by American Telephone and Telegraph 7160798Sjhb * Co. or Unix System Laboratories, Inc. and are reproduced herein with 81541Srgrimes * the permission of UNIX System Laboratories, Inc. 9146806Srwatson * 10146806Srwatson * Redistribution and use in source and binary forms, with or without 11146806Srwatson * modification, are permitted provided that the following conditions 12146806Srwatson * are met: 13146806Srwatson * 1. Redistributions of source code must retain the above copyright 14194390Sjhb * notice, this list of conditions and the following disclaimer. 15203660Sed * 2. Redistributions in binary form must reproduce the above copyright 16194390Sjhb * notice, this list of conditions and the following disclaimer in the 17194390Sjhb * documentation and/or other materials provided with the distribution. 1811294Sswallace * 3. All advertising materials mentioning features or use of this software 1910905Sbde * must display the following acknowledgement: 201541Srgrimes * This product includes software developed by the University of 2110905Sbde * California, Berkeley and its contributors. 2210905Sbde * 4. Neither the name of the University nor the names of its contributors 231541Srgrimes * may be used to endorse or promote products derived from this software 241541Srgrimes * without specific prior written permission. 251541Srgrimes * 261541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 271541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2899855Salfred * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29194645Sjhb * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30194833Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 311541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 321541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3369449Salfred * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34194383Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35160797Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36181972Sobrien * SUCH DAMAGE. 37181972Sobrien * 38183361Sjhb * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 39181972Sobrien * $Id: kern_clock.c,v 1.40 1997/09/07 05:25:43 bde Exp $ 40181972Sobrien */ 41181972Sobrien 42181972Sobrien/* Portions of this software are covered by the following: */ 43211838Skib/****************************************************************************** 44104747Srwatson * * 45104747Srwatson * Copyright (c) David L. Mills 1993, 1994 * 46123408Speter * * 47123408Speter * Permission to use, copy, modify, and distribute this software and its * 481541Srgrimes * documentation for any purpose and without fee is hereby granted, provided * 491541Srgrimes * that the above copyright notice appears in all copies and that both the * 5011294Sswallace * copyright notice and this permission notice appear in supporting * 5111294Sswallace * documentation, and that the name University of Delaware not be used in * 5211294Sswallace * advertising or publicity pertaining to distribution of the software * 5311294Sswallace * without specific, written prior permission. The University of Delaware * 541541Srgrimes * makes no representations about the suitability this software for any * 551541Srgrimes * purpose. It is provided "as is" without express or implied warranty. * 561541Srgrimes * * 571541Srgrimes *****************************************************************************/ 581541Srgrimes 591541Srgrimes#include "opt_cpu.h" /* XXX */ 60160798Sjhb 61160798Sjhb#include <sys/param.h> 62146806Srwatson#include <sys/systm.h> 63160798Sjhb#include <sys/dkstat.h> 64160798Sjhb#include <sys/callout.h> 65146806Srwatson#include <sys/kernel.h> 66160798Sjhb#include <sys/proc.h> 67146806Srwatson#include <sys/resourcevar.h> 68160798Sjhb#include <sys/signalvar.h> 6912216Sbde#include <sys/timex.h> 7012216Sbde#include <vm/vm.h> 7112216Sbde#include <sys/lock.h> 72160798Sjhb#include <vm/pmap.h> 73160798Sjhb#include <vm/vm_map.h> 74146806Srwatson#include <sys/sysctl.h> 75146806Srwatson 76162991Srwatson#include <machine/cpu.h> 77160798Sjhb#define CLOCK_HAIR /* XXX */ 78160798Sjhb#include <machine/clock.h> 79146806Srwatson#include <machine/limits.h> 80160798Sjhb 81160798Sjhb#ifdef GPROF 82160798Sjhb#include <sys/gmon.h> 83160798Sjhb#endif 84160798Sjhb 85160798Sjhbstatic void initclocks __P((void *dummy)); 86146806SrwatsonSYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) 87160798Sjhb 88146806Srwatson/* Exported to machdep.c. */ 89160798Sjhbstruct callout *callout; 90146806Srwatsonstruct callout_list callfree; 91160798Sjhbint callwheelsize, callwheelbits, callwheelmask; 92160798Sjhbstruct callout_tailq *callwheel; 93146806Srwatson 9412216Sbde 95160798Sjhb/* Some of these don't belong here, but it's easiest to concentrate them. */ 96160798Sjhbstatic long cp_time[CPUSTATES]; 97160798Sjhblong dk_seek[DK_NDRIVE]; 98160798Sjhbstatic long dk_time[DK_NDRIVE]; /* time busy (in statclock ticks) */ 99160798Sjhblong dk_wds[DK_NDRIVE]; 100146806Srwatsonlong dk_wpms[DK_NDRIVE]; 101160798Sjhblong dk_xfer[DK_NDRIVE]; 102146806Srwatson 103160798Sjhbint dk_busy; 104146806Srwatsonint dk_ndrive = 0; 105160798Sjhbchar dk_names[DK_NDRIVE][DK_NAMELEN]; 106146806Srwatson 107146806Srwatsonlong tk_cancc; 108146806Srwatsonlong tk_nin; 109160798Sjhblong tk_nout; 110146806Srwatsonlong tk_rawcc; 111146806Srwatson 112160798Sjhb/* 113146806Srwatson * Clock handling routines. 114146806Srwatson * 115160798Sjhb * This code is written to operate with two timers that run independently of 116146806Srwatson * each other. The main clock, running hz times per second, is used to keep 117146806Srwatson * track of real time. The second timer handles kernel and user profiling, 118160798Sjhb * and does resource use estimation. If the second timer is programmable, 119160798Sjhb * it is randomized to avoid aliasing between the two clocks. For example, 120160798Sjhb * the randomization prevents an adversary from always giving up the cpu 121160798Sjhb * just before its quantum expires. Otherwise, it would never accumulate 122160798Sjhb * cpu ticks. The mean frequency of the second timer is stathz. 123160798Sjhb * 124160798Sjhb * If no second timer exists, stathz will be zero; in this case we drive 125160798Sjhb * profiling and statistics off the main clock. This WILL NOT be accurate; 126160798Sjhb * do not do it unless absolutely necessary. 127160798Sjhb * 128160798Sjhb * The statistics clock may (or may not) be run at a higher rate while 129160798Sjhb * profiling. This profile clock runs at profhz. We require that profhz 130146806Srwatson * be an integral multiple of stathz. 131160798Sjhb * 132146806Srwatson * If the statistics clock is running fast, it must be divided by the ratio 133160798Sjhb * profhz/stathz for statistics. (For profiling, every tick counts.) 134146806Srwatson */ 135146806Srwatson 136160798Sjhb/* 137160798Sjhb * TODO: 13821776Sbde * allocate more timeout table slots when table overflows. 13921776Sbde */ 14021776Sbde 141160798Sjhb/* 142146806Srwatson * Bump a timeval by a small number of usec's. 143160798Sjhb */ 144160798Sjhb#define BUMPTIME(t, usec) { \ 145160798Sjhb register volatile struct timeval *tp = (t); \ 146162373Srwatson register long us; \ 147146806Srwatson \ 148160798Sjhb tp->tv_usec = us = tp->tv_usec + (usec); \ 149146806Srwatson if (us >= 1000000) { \ 150160798Sjhb tp->tv_usec = us - 1000000; \ 151160798Sjhb tp->tv_sec++; \ 152160798Sjhb } \ 153176215Sru} 154176215Sru 155160798Sjhbint stathz; 156146806Srwatsonint profhz; 157160798Sjhbstatic int profprocs; 158146806Srwatsonint ticks; 159160798Sjhbstatic int softticks; /* Like ticks, but for softclock(). */ 160160798Sjhbstatic struct callout *nextsoftcheck; /* Next callout to be checked. */ 161160798Sjhbstatic int psdiv, pscnt; /* prof => stat divider */ 162146806Srwatsonint psratio; /* ratio: prof / stat */ 163146806Srwatson 164162991Srwatsonvolatile struct timeval time; 165146806Srwatsonvolatile struct timeval mono_time; 166160798Sjhb 167146806Srwatson/* 168160798Sjhb * Phase/frequency-lock loop (PLL/FLL) definitions 169146806Srwatson * 170146806Srwatson * The following variables are read and set by the ntp_adjtime() system 171160798Sjhb * call. 172160798Sjhb * 173160798Sjhb * time_state shows the state of the system clock, with values defined 174146806Srwatson * in the timex.h header file. 175160798Sjhb * 176146806Srwatson * time_status shows the status of the system clock, with bits defined 177160798Sjhb * in the timex.h header file. 178160798Sjhb * 179146806Srwatson * time_offset is used by the PLL/FLL to adjust the system time in small 180160798Sjhb * increments. 181146806Srwatson * 182146806Srwatson * time_constant determines the bandwidth or "stiffness" of the PLL. 183146806Srwatson * 184160798Sjhb * time_tolerance determines maximum frequency error or tolerance of the 185146806Srwatson * CPU clock oscillator and is a property of the architecture; however, 186160798Sjhb * in principle it could change as result of the presence of external 187146806Srwatson * discipline signals, for instance. 188160798Sjhb * 189146806Srwatson * time_precision is usually equal to the kernel tick variable; however, 190160798Sjhb * in cases where a precision clock counter or external clock is 191160798Sjhb * available, the resolution can be much less than this and depend on 192160798Sjhb * whether the external clock is working or not. 193146806Srwatson * 194160798Sjhb * time_maxerror is initialized by a ntp_adjtime() call and increased by 195160798Sjhb * the kernel once each second to reflect the maximum error 196160798Sjhb * bound growth. 197146806Srwatson * 198160798Sjhb * time_esterror is set and read by the ntp_adjtime() call, but 199146806Srwatson * otherwise not used by the kernel. 200146806Srwatson */ 201160798Sjhbint time_status = STA_UNSYNC; /* clock status bits */ 202146806Srwatsonint time_state = TIME_OK; /* clock state */ 203146806Srwatsonlong time_offset = 0; /* time offset (us) */ 204160798Sjhblong time_constant = 0; /* pll time constant */ 205160798Sjhblong time_tolerance = MAXFREQ; /* frequency tolerance (scaled ppm) */ 206146806Srwatsonlong time_precision = 1; /* clock precision (us) */ 207160798Sjhblong time_maxerror = MAXPHASE; /* maximum error (us) */ 208123750Speterlong time_esterror = MAXPHASE; /* estimated error (us) */ 20912216Sbde 210160798Sjhb/* 211146806Srwatson * The following variables establish the state of the PLL/FLL and the 212146806Srwatson * residual time and frequency offset of the local clock. The scale 213160798Sjhb * factors are defined in the timex.h header file. 214160798Sjhb * 215146806Srwatson * time_phase and time_freq are the phase increment and the frequency 216160798Sjhb * increment, respectively, of the kernel time variable at each tick of 217146806Srwatson * the clock. 218160798Sjhb * 219146806Srwatson * time_freq is set via ntp_adjtime() from a value stored in a file when 220194390Sjhb * the synchronization daemon is first started. Its value is retrieved 221146806Srwatson * via ntp_adjtime() and written to the file about once per hour by the 222160798Sjhb * daemon. 223160798Sjhb * 224146806Srwatson * time_adj is the adjustment added to the value of tick at each timer 225160798Sjhb * interrupt and is recomputed from time_phase and time_freq at each 226146806Srwatson * seconds rollover. 227160798Sjhb * 228146806Srwatson * time_reftime is the second's portion of the system time on the last 229160798Sjhb * call to ntp_adjtime(). It is used to adjust the time_freq variable 230146806Srwatson * and to increase the time_maxerror as the time since last update 231160798Sjhb * increases. 232146806Srwatson */ 233160798Sjhbstatic long time_phase = 0; /* phase offset (scaled us) */ 234146806Srwatsonlong time_freq = 0; /* frequency offset (scaled ppm) */ 235160798Sjhbstatic long time_adj = 0; /* tick adjust (scaled 1 / hz) */ 236146806Srwatsonstatic long time_reftime = 0; /* time at last adjustment (s) */ 237160798Sjhb 238160798Sjhb#ifdef PPS_SYNC 239160798Sjhb/* 24021776Sbde * The following variables are used only if the kernel PPS discipline 24121776Sbde * code is configured (PPS_SYNC). The scale factors are defined in the 242160798Sjhb * timex.h header file. 243146806Srwatson * 244160798Sjhb * pps_time contains the time at each calibration interval, as read by 245146806Srwatson * microtime(). pps_count counts the seconds of the calibration 246160798Sjhb * interval, the duration of which is nominally pps_shift in powers of 247146806Srwatson * two. 248146806Srwatson * 249160798Sjhb * pps_offset is the time offset produced by the time median filter 250146806Srwatson * pps_tf[], while pps_jitter is the dispersion (jitter) measured by 251160798Sjhb * this filter. 252146806Srwatson * 253160798Sjhb * pps_freq is the frequency offset produced by the frequency median 254146806Srwatson * filter pps_ff[], while pps_stabil is the dispersion (wander) measured 255146806Srwatson * by this filter. 256160798Sjhb * 257146806Srwatson * pps_usec is latched from a high resolution counter or external clock 258160798Sjhb * at pps_time. Here we want the hardware counter contents only, not the 259146806Srwatson * contents plus the time_tv.usec as usual. 260160798Sjhb * 261146806Srwatson * pps_valid counts the number of seconds since the last PPS update. It 262160798Sjhb * is used as a watchdog timer to disable the PPS discipline should the 263160798Sjhb * PPS signal be lost. 264194390Sjhb * 265146806Srwatson * pps_glitch counts the number of seconds since the beginning of an 266146806Srwatson * offset burst more than tick/2 from current nominal offset. It is used 267146806Srwatson * mainly to suppress error bursts due to priority conflicts between the 268160798Sjhb * PPS interrupt and timer interrupt. 269160798Sjhb * 270160798Sjhb * pps_intcnt counts the calibration intervals for use in the interval- 271160798Sjhb * adaptation algorithm. It's just too complicated for words. 272160798Sjhb */ 273160798Sjhbstruct timeval pps_time; /* kernel time at last interval */ 274160798Sjhblong pps_offset = 0; /* pps time offset (us) */ 275160798Sjhblong pps_jitter = MAXTIME; /* pps time dispersion (jitter) (us) */ 276146806Srwatsonlong pps_tf[] = {0, 0, 0}; /* pps time offset median filter (us) */ 277160798Sjhblong pps_freq = 0; /* frequency offset (scaled ppm) */ 278160798Sjhblong pps_stabil = MAXFREQ; /* frequency dispersion (scaled ppm) */ 279146806Srwatsonlong pps_ff[] = {0, 0, 0}; /* frequency offset median filter */ 280160798Sjhblong pps_usec = 0; /* microsec counter at last interval */ 281160798Sjhblong pps_valid = PPS_VALID; /* pps signal watchdog counter */ 282160798Sjhbint pps_glitch = 0; /* pps signal glitch counter */ 283146806Srwatsonint pps_count = 0; /* calibration interval counter (s) */ 284146806Srwatsonint pps_shift = PPS_SHIFT; /* interval duration (s) (shift) */ 285160798Sjhbint pps_intcnt = 0; /* intervals at current duration */ 286146806Srwatson 287160798Sjhb/* 288146806Srwatson * PPS signal quality monitors 289160798Sjhb * 290160798Sjhb * pps_jitcnt counts the seconds that have been discarded because the 291160798Sjhb * jitter measured by the time median filter exceeds the limit MAXTIME 292146806Srwatson * (100 us). 293160798Sjhb * 294146806Srwatson * pps_calcnt counts the frequency calibration intervals, which are 295160798Sjhb * variable from 4 s to 256 s. 296160798Sjhb * 297160798Sjhb * pps_errcnt counts the calibration intervals which have been discarded 298146806Srwatson * because the wander exceeds the limit MAXFREQ (100 ppm) or where the 299160798Sjhb * calibration interval jitter exceeds two ticks. 300194390Sjhb * 301146806Srwatson * pps_stbcnt counts the calibration intervals that have been discarded 302146806Srwatson * because the frequency wander exceeds the limit MAXFREQ / 4 (25 us). 3031541Srgrimes */ 3041541Srgrimeslong pps_jitcnt = 0; /* jitter limit exceeded */ 3051541Srgrimeslong pps_calcnt = 0; /* calibration intervals */ 3061541Srgrimeslong pps_errcnt = 0; /* calibration errors */ 3071541Srgrimeslong pps_stbcnt = 0; /* stability limit exceeded */ 308146806Srwatson#endif /* PPS_SYNC */ 309146806Srwatson 310146806Srwatson/* XXX none of this stuff works under FreeBSD */ 311177633Sdfr#ifdef EXT_CLOCK 312177633Sdfr/* 31330740Sphk * External clock definitions 314161325Sjhb * 315160798Sjhb * The following definitions and declarations are used only if an 316146806Srwatson * external clock (HIGHBALL or TPRO) is configured on the system. 317160798Sjhb */ 318146806Srwatson#define CLOCK_INTERVAL 30 /* CPU clock update interval (s) */ 319160798Sjhb 320146806Srwatson/* 321146806Srwatson * The clock_count variable is set to CLOCK_INTERVAL at each PPS 322160798Sjhb * interrupt and decremented once each second. 323146806Srwatson */ 324160798Sjhbint clock_count = 0; /* CPU clock counter */ 325146806Srwatson 326184789Sed#ifdef HIGHBALL 327146806Srwatson/* 328184789Sed * The clock_offset and clock_cpu variables are used by the HIGHBALL 329146806Srwatson * interface. The clock_offset variable defines the offset between 330184789Sed * system time and the HIGBALL counters. The clock_cpu variable contains 331161952Srwatson * the offset between the system clock and the HIGHBALL clock for use in 332161952Srwatson * disciplining the kernel time variable. 333146806Srwatson */ 334146806Srwatsonextern struct timeval clock_offset; /* Highball clock offset */ 335146806Srwatsonlong clock_cpu = 0; /* CPU clock adjust */ 336160798Sjhb#endif /* HIGHBALL */ 337146806Srwatson#endif /* EXT_CLOCK */ 338123750Speter 339160798Sjhb/* 340146806Srwatson * hardupdate() - local clock update 341123750Speter * 342160798Sjhb * This routine is called by ntp_adjtime() to update the local clock 343146806Srwatson * phase and frequency. The implementation is of an adaptive-parameter, 344123750Speter * hybrid phase/frequency-lock loop (PLL/FLL). The routine computes new 345146806Srwatson * time and frequency offset estimates for each call. If the kernel PPS 346171209Speter * discipline code is configured (PPS_SYNC), the PPS signal itself 347146806Srwatson * determines the new time offset, instead of the calling argument. 348171209Speter * Presumably, calls to ntp_adjtime() occur only when the caller 349171209Speter * believes the local clock is valid within some bound (+-128 ms with 350146806Srwatson * NTP). If the caller's time is far different than the PPS time, an 351178888Sjulian * argument will ensue, and it's not clear who will lose. 352161946Srwatson * 353146806Srwatson * For uncompensated quartz crystal oscillatores and nominal update 354146806Srwatson * intervals less than 1024 s, operation should be in phase-lock mode 355146806Srwatson * (STA_FLL = 0), where the loop is disciplined to phase. For update 356146806Srwatson * intervals greater than thiss, operation should be in frequency-lock 3571541Srgrimes * mode (STA_FLL = 1), where the loop is disciplined to frequency. 35849428Sjkh * 359160798Sjhb * Note: splclock() is in effect. 360160798Sjhb */ 361160798Sjhbvoid 362146806Srwatsonhardupdate(offset) 363146806Srwatson long offset; 364146806Srwatson{ 365146806Srwatson long ltemp, mtemp; 366160798Sjhb 367160798Sjhb if (!(time_status & STA_PLL) && !(time_status & STA_PPSTIME)) 368160798Sjhb return; 369160798Sjhb ltemp = offset; 370160798Sjhb#ifdef PPS_SYNC 371146806Srwatson if (time_status & STA_PPSTIME && time_status & STA_PPSSIGNAL) 372160798Sjhb ltemp = pps_offset; 373146806Srwatson#endif /* PPS_SYNC */ 374146806Srwatson 375160798Sjhb /* 376146806Srwatson * Scale the phase adjustment and clamp to the operating range. 377146806Srwatson */ 378160798Sjhb if (ltemp > MAXPHASE) 379146806Srwatson time_offset = MAXPHASE << SHIFT_UPDATE; 380171209Speter else if (ltemp < -MAXPHASE) 381171209Speter time_offset = -(MAXPHASE << SHIFT_UPDATE); 382171209Speter else 383183361Sjhb time_offset = ltemp << SHIFT_UPDATE; 384146806Srwatson 385171209Speter /* 386171209Speter * Select whether the frequency is to be controlled and in which 387171209Speter * mode (PLL or FLL). Clamp to the operating range. Ugly 388146806Srwatson * multiply/divide should be replaced someday. 389171209Speter */ 390146806Srwatson if (time_status & STA_FREQHOLD || time_reftime == 0) 391160798Sjhb time_reftime = time.tv_sec; 392146806Srwatson mtemp = time.tv_sec - time_reftime; 393146806Srwatson time_reftime = time.tv_sec; 394160798Sjhb if (time_status & STA_FLL) { 395160798Sjhb if (mtemp >= MINSEC) { 396160798Sjhb ltemp = ((time_offset / mtemp) << (SHIFT_USEC - 397160798Sjhb SHIFT_UPDATE)); 398160798Sjhb if (ltemp < 0) 399146806Srwatson time_freq -= -ltemp >> SHIFT_KH; 400160798Sjhb else 401146806Srwatson time_freq += ltemp >> SHIFT_KH; 4022124Sdg } 4032124Sdg } else { 4042124Sdg if (mtemp < MAXSEC) { 4052124Sdg ltemp *= mtemp; 406209579Skib if (ltemp < 0) 407209579Skib time_freq -= -ltemp >> (time_constant + 408209579Skib time_constant + SHIFT_KF - 409209579Skib SHIFT_USEC); 410209579Skib else 411209579Skib time_freq += ltemp >> (time_constant + 412209579Skib time_constant + SHIFT_KF - 413209579Skib SHIFT_USEC); 414209579Skib } 415209579Skib } 41612864Speter if (time_freq > time_tolerance) 41712864Speter time_freq = time_tolerance; 41814215Speter else if (time_freq < -time_tolerance) 419194910Sjhb time_freq = -time_tolerance; 420194910Sjhb} 421160798Sjhb 422146806Srwatson 423160798Sjhb 424146806Srwatson/* 425146806Srwatson * Initialize clock frequencies and start both clocks running. 426194910Sjhb */ 427194910Sjhb/* ARGSUSED*/ 428160798Sjhbstatic void 429160798Sjhbinitclocks(dummy) 430146806Srwatson void *dummy; 431160798Sjhb{ 432146806Srwatson register int i; 433160798Sjhb 434146806Srwatson /* 435194910Sjhb * Set divisors to 1 (normal case) and let the machine-specific 436194910Sjhb * code do its bit. 437160798Sjhb */ 438160798Sjhb psdiv = pscnt = 1; 439146806Srwatson cpu_initclocks(); 44014219Speter 441160798Sjhb /* 442146806Srwatson * Compute profhz/stathz, and fix profhz if needed. 443161952Srwatson */ 444161952Srwatson i = stathz ? stathz : hz; 445146806Srwatson if (profhz == 0) 446160798Sjhb profhz = i; 447146806Srwatson psratio = profhz / i; 448160798Sjhb} 449156134Sdavidxu 450160798Sjhb/* 451160798Sjhb * The real-time timer, interrupting hz times per second. 452151576Sdavidxu */ 453151576Sdavidxuvoid 454160798Sjhbhardclock(frame) 455151576Sdavidxu register struct clockframe *frame; 456160798Sjhb{ 457160798Sjhb register struct callout *p1; 458146806Srwatson register struct proc *p; 459146806Srwatson 460146806Srwatson p = curproc; 461146806Srwatson if (p) { 462146806Srwatson register struct pstats *pstats; 463146806Srwatson 464146806Srwatson /* 465146806Srwatson * Run current process's virtual and profile time, as needed. 466160798Sjhb */ 467146806Srwatson pstats = p->p_stats; 46814219Speter if (CLKF_USERMODE(frame) && 469160798Sjhb timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 470146806Srwatson itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 471160798Sjhb psignal(p, SIGVTALRM); 472160798Sjhb if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 473146806Srwatson itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 474160798Sjhb psignal(p, SIGPROF); 475160798Sjhb } 476160798Sjhb 477160798Sjhb /* 478160798Sjhb * If no separate statistics clock is available, run it from here. 479151867Sdavidxu */ 480151867Sdavidxu if (stathz == 0) 481152845Sdavidxu statclock(frame); 482152845Sdavidxu 483152845Sdavidxu /* 484152845Sdavidxu * Increment the time-of-day. 485152845Sdavidxu */ 486152845Sdavidxu ticks++; 487146806Srwatson { 488146806Srwatson int time_update; 489146806Srwatson struct timeval newtime = time; 490146806Srwatson long ltemp; 491146806Srwatson 492146806Srwatson if (timedelta == 0) { 493146806Srwatson time_update = CPU_THISTICKLEN(tick); 494146806Srwatson } else { 495160798Sjhb time_update = CPU_THISTICKLEN(tick) + tickdelta; 496146806Srwatson timedelta -= tickdelta; 497146806Srwatson } 498160798Sjhb BUMPTIME(&mono_time, time_update); 499160798Sjhb 500146806Srwatson /* 501146806Srwatson * Compute the phase adjustment. If the low-order bits 502160798Sjhb * (time_phase) of the update overflow, bump the high-order bits 503146806Srwatson * (time_update). 504160798Sjhb */ 505146806Srwatson time_phase += time_adj; 506160798Sjhb if (time_phase <= -FINEUSEC) { 507160798Sjhb ltemp = -time_phase >> SHIFT_SCALE; 508160798Sjhb time_phase += ltemp << SHIFT_SCALE; 509146806Srwatson time_update -= ltemp; 510146806Srwatson } 511146806Srwatson else if (time_phase >= FINEUSEC) { 512146806Srwatson ltemp = time_phase >> SHIFT_SCALE; 513146806Srwatson time_phase -= ltemp << SHIFT_SCALE; 514146806Srwatson time_update += ltemp; 515146806Srwatson } 516146806Srwatson 517147813Sjhb newtime.tv_usec += time_update; 518161952Srwatson /* 519147813Sjhb * On rollover of the second the phase adjustment to be used for 520161952Srwatson * the next second is calculated. Also, the maximum error is 521147813Sjhb * increased by the tolerance. If the PPS frequency discipline 522146806Srwatson * code is present, the phase is increased to compensate for the 523146806Srwatson * CPU clock oscillator frequency error. 524146806Srwatson * 525146806Srwatson * On a 32-bit machine and given parameters in the timex.h 526146806Srwatson * header file, the maximum phase adjustment is +-512 ms and 527146806Srwatson * maximum frequency offset is a tad less than) +-512 ppm. On a 52851138Salfred * 64-bit machine, you shouldn't need to ask. 529160798Sjhb */ 530146806Srwatson if (newtime.tv_usec >= 1000000) { 531146806Srwatson newtime.tv_usec -= 1000000; 532160798Sjhb newtime.tv_sec++; 533146806Srwatson time_maxerror += time_tolerance >> SHIFT_USEC; 534160798Sjhb 535146806Srwatson /* 53625537Sdfr * Compute the phase adjustment for the next second. In 537160798Sjhb * PLL mode, the offset is reduced by a fixed factor 538160798Sjhb * times the time constant. In FLL mode the offset is 539146806Srwatson * used directly. In either mode, the maximum phase 540160798Sjhb * adjustment for each second is clamped so as to spread 541160798Sjhb * the adjustment over not more than the number of 542160798Sjhb * seconds between updates. 543160798Sjhb */ 544160798Sjhb if (time_offset < 0) { 545160798Sjhb ltemp = -time_offset; 546160798Sjhb if (!(time_status & STA_FLL)) 547146806Srwatson ltemp >>= SHIFT_KG + time_constant; 548160798Sjhb if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) 549160798Sjhb ltemp = (MAXPHASE / MINSEC) << 550160798Sjhb SHIFT_UPDATE; 551146806Srwatson time_offset += ltemp; 552160798Sjhb time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - 553146806Srwatson SHIFT_UPDATE); 554146806Srwatson } else { 555160798Sjhb ltemp = time_offset; 556160798Sjhb if (!(time_status & STA_FLL)) 557146806Srwatson ltemp >>= SHIFT_KG + time_constant; 558146806Srwatson if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) 559160798Sjhb ltemp = (MAXPHASE / MINSEC) << 560146806Srwatson SHIFT_UPDATE; 561160798Sjhb time_offset -= ltemp; 562160798Sjhb time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - 563160798Sjhb SHIFT_UPDATE); 564160798Sjhb } 565151867Sdavidxu 566151867Sdavidxu /* 567160798Sjhb * Compute the frequency estimate and additional phase 568146806Srwatson * adjustment due to frequency error for the next 569146806Srwatson * second. When the PPS signal is engaged, gnaw on the 570160798Sjhb * watchdog counter and update the frequency computed by 571160798Sjhb * the pll and the PPS signal. 572161952Srwatson */ 57334925Sdufault#ifdef PPS_SYNC 574160798Sjhb pps_valid++; 575146806Srwatson if (pps_valid == PPS_VALID) { 576160798Sjhb pps_jitter = MAXTIME; 577146806Srwatson pps_stabil = MAXFREQ; 57834925Sdufault time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER | 579160798Sjhb STA_PPSWANDER | STA_PPSERROR); 580146806Srwatson } 581146806Srwatson ltemp = time_freq + pps_freq; 582160798Sjhb#else 58334925Sdufault ltemp = time_freq; 584160798Sjhb#endif /* PPS_SYNC */ 585160798Sjhb if (ltemp < 0) 586160798Sjhb time_adj -= -ltemp >> 587160798Sjhb (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); 588146806Srwatson else 589160798Sjhb time_adj += ltemp >> 590160798Sjhb (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); 591146806Srwatson 592146806Srwatson#if SHIFT_HZ == 7 593146806Srwatson /* 594160798Sjhb * When the CPU clock oscillator frequency is not a 595146806Srwatson * power of two in Hz, the SHIFT_HZ is only an 596160798Sjhb * approximate scale factor. In the SunOS kernel, this 597211998Skib * results in a PLL gain factor of 1/1.28 = 0.78 what it 598211998Skib * should be. In the following code the overall gain is 599211998Skib * increased by a factor of 1.25, which results in a 600160798Sjhb * residual error less than 3 percent. 601146806Srwatson */ 602160798Sjhb /* Same thing applies for FreeBSD --GAW */ 603160798Sjhb if (hz == 100) { 604146806Srwatson if (time_adj < 0) 605146806Srwatson time_adj -= -time_adj >> 2; 606160798Sjhb else 607160798Sjhb time_adj += time_adj >> 2; 608146806Srwatson } 609160798Sjhb#endif /* SHIFT_HZ */ 610146806Srwatson 611146806Srwatson /* XXX - this is really bogus, but can't be fixed until 612160798Sjhb xntpd's idea of the system clock is fixed to know how 613146806Srwatson the user wants leap seconds handled; in the mean time, 614160798Sjhb we assume that users of NTP are running without proper 615146806Srwatson leap second support (this is now the default anyway) */ 616160798Sjhb /* 617146806Srwatson * Leap second processing. If in leap-insert state at 618160798Sjhb * the end of the day, the system clock is set back one 619146806Srwatson * second; if in leap-delete state, the system clock is 620160798Sjhb * set ahead one second. The microtime() routine or 621146806Srwatson * external clock driver will insure that reported time 622160798Sjhb * is always monotonic. The ugly divides should be 623146806Srwatson * replaced. 624160798Sjhb */ 625146806Srwatson switch (time_state) { 626160798Sjhb 627146806Srwatson case TIME_OK: 628160798Sjhb if (time_status & STA_INS) 629146806Srwatson time_state = TIME_INS; 630160798Sjhb else if (time_status & STA_DEL) 631146806Srwatson time_state = TIME_DEL; 632146806Srwatson break; 633160798Sjhb 634160111Swsalamon case TIME_INS: 635160111Swsalamon if (newtime.tv_sec % 86400 == 0) { 636160111Swsalamon newtime.tv_sec--; 637160798Sjhb time_state = TIME_OOP; 638160111Swsalamon } 639160111Swsalamon break; 640160111Swsalamon 641160798Sjhb case TIME_DEL: 642146806Srwatson if ((newtime.tv_sec + 1) % 86400 == 0) { 643146806Srwatson newtime.tv_sec++; 644160798Sjhb time_state = TIME_WAIT; 645146806Srwatson } 646146806Srwatson break; 647160798Sjhb 648146806Srwatson case TIME_OOP: 649160798Sjhb time_state = TIME_WAIT; 650146806Srwatson break; 651161952Srwatson 652160798Sjhb case TIME_WAIT: 653146806Srwatson if (!(time_status & (STA_INS | STA_DEL))) 654146806Srwatson time_state = TIME_OK; 655146806Srwatson } 656146806Srwatson } 657146806Srwatson CPU_CLOCKUPDATE(&time, &newtime); 658146806Srwatson } 659146806Srwatson 660146806Srwatson /* 661146806Srwatson * Process callouts at a very low cpu priority, so we don't keep the 662183361Sjhb * relatively high clock interrupt priority any longer than necessary. 663160798Sjhb */ 664146806Srwatson if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) { 665146806Srwatson if (CLKF_BASEPRI(frame)) { 666160798Sjhb /* 667146806Srwatson * Save the overhead of a software interrupt; 668146806Srwatson * it will happen as soon as we return, so do it now. 669160798Sjhb */ 670146806Srwatson (void)splsoftclock(); 671146806Srwatson softclock(); 672160798Sjhb } else 673194383Sjhb setsoftclock(); 674160798Sjhb } else if (softticks + 1 == ticks) { 675211998Skib ++softticks; 676211998Skib } 677211998Skib} 678160798Sjhb 679146806Srwatson/* 680177091Sjeff * The callout mechanism is based on the work of Adam M. Costello and 681177091Sjeff * George Varghese, published in a technical report entitled "Redesigning 682177091Sjeff * the BSD Callout and Timer Facilities" and modified slightly for inclusion 683177091Sjeff * in FreeBSD by Justin T. Gibbs. The original work on the data structures 684177091Sjeff * used in this implementation was published by G.Varghese and A. Lauck in 685160798Sjhb * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for 686160798Sjhb * the Efficient Implementation of a Timer Facility" in the Proceedings of 687160798Sjhb * the 11th ACM Annual Symposium on Operating Systems Principles, 688146806Srwatson * Austin, Texas Nov 1987. 689160798Sjhb */ 690146806Srwatson/* 691160798Sjhb * Software (low priority) clock interrupt. 692146806Srwatson * Run periodic events from timeout queue. 693160798Sjhb */ 694146806Srwatson/*ARGSUSED*/ 695160798Sjhbvoid 696146806Srwatsonsoftclock() 697160798Sjhb{ 698160798Sjhb register struct callout *c; 699146806Srwatson register int s; 700160798Sjhb register int steps; /* 701146806Srwatson * Number of steps taken since 702146806Srwatson * we last allowed interrupts. 703160798Sjhb */ 704146806Srwatson 705160798Sjhb #ifndef MAX_SOFTCLOCK_STEPS 706146806Srwatson #define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */ 707160798Sjhb #endif /* MAX_SOFTCLOCK_STEPS */ 708146806Srwatson 709160798Sjhb steps = 0; 710161952Srwatson s = splhigh(); 711146806Srwatson while (softticks != ticks) { 712146806Srwatson c = TAILQ_FIRST(&callwheel[++softticks & callwheelmask]); 713160798Sjhb while (c) { 714160798Sjhb if (c->c_time > 0) { 715160798Sjhb c->c_time--; 716160798Sjhb c = TAILQ_NEXT(c, c_links.tqe); 717160798Sjhb ++steps; 718146806Srwatson if (steps >= MAX_SOFTCLOCK_STEPS) { 719160798Sjhb nextsoftcheck = c; 720146806Srwatson splx(s); 721146806Srwatson /* Give hardclock() a chance. */ 722160798Sjhb s = splhigh(); 723160798Sjhb c = nextsoftcheck; 724160798Sjhb steps = 0; 725160798Sjhb } 726146806Srwatson } else { 727160798Sjhb void (*c_func)(void *); 728146806Srwatson void *c_arg; 729160798Sjhb 730146806Srwatson nextsoftcheck = TAILQ_NEXT(c, c_links.tqe); 731160798Sjhb TAILQ_REMOVE(c->c_bucket, c, c_links.tqe); 732160111Swsalamon c_func = c->c_func; 733160111Swsalamon c_arg = c->c_arg; 734160111Swsalamon c->c_func = NULL; 735160798Sjhb SLIST_INSERT_HEAD(&callfree, c, c_links.sle); 736160111Swsalamon splx(s); 737160111Swsalamon c_func(c_arg); 738160111Swsalamon s = splhigh(); 739160798Sjhb steps = 0; 740160111Swsalamon c = nextsoftcheck; 741146806Srwatson } 742160798Sjhb } 743146806Srwatson } 744160798Sjhb nextsoftcheck = NULL; 745146806Srwatson splx(s); 746146806Srwatson} 747160798Sjhb 748146806Srwatson/* 749146806Srwatson * timeout -- 750146806Srwatson * Execute a function after a specified length of time. 751146806Srwatson * 752160798Sjhb * untimeout -- 753160798Sjhb * Cancel previous timeout function call. 754146806Srwatson * 755160798Sjhb * callout_handle_init -- 756146806Srwatson * Initialize a handle so that using it with untimeout is benign. 757160798Sjhb * 758160798Sjhb * See AT&T BCI Driver Reference Manual for specification. This 759146806Srwatson * implementation differs from that one in that although an 760160798Sjhb * identification value is returned from timeout, the original 761146806Srwatson * arguments to timeout as well as the identifier are used to 762160798Sjhb * identify entries for untimeout. 763146806Srwatson */ 764160798Sjhbstruct callout_handle 765146806Srwatsontimeout(ftn, arg, to_ticks) 766160798Sjhb timeout_t ftn; 767146806Srwatson void *arg; 768160798Sjhb register int to_ticks; 769146806Srwatson{ 770160798Sjhb int s; 771160798Sjhb struct callout *new; 772160798Sjhb struct callout_handle handle; 773160798Sjhb 774160798Sjhb if (to_ticks <= 0) 775160798Sjhb to_ticks = 1; 776160798Sjhb 777146806Srwatson /* Lock out the clock. */ 778146806Srwatson s = splhigh(); 779160798Sjhb 780146806Srwatson /* Fill in the next free callout structure. */ 781146806Srwatson new = SLIST_FIRST(&callfree); 782160798Sjhb if (new == NULL) 783146806Srwatson /* XXX Attempt to malloc first */ 784146806Srwatson panic("timeout table full"); 785177091Sjeff 786160798Sjhb SLIST_REMOVE_HEAD(&callfree, c_links.sle); 787151445Sstefanf new->c_arg = arg; 788160798Sjhb new->c_func = ftn; 789146806Srwatson new->c_time = to_ticks >> callwheelbits; 790160798Sjhb new->c_bucket = &callwheel[(ticks + to_ticks) & callwheelmask]; 791161952Srwatson TAILQ_INSERT_TAIL(new->c_bucket, new, c_links.tqe); 792160798Sjhb 793146806Srwatson splx(s); 794160798Sjhb handle.callout = new; 795146806Srwatson return (handle); 796160798Sjhb} 797160798Sjhb 798160798Sjhbvoid 799160798Sjhbuntimeout(ftn, arg, handle) 800160798Sjhb timeout_t ftn; 801146806Srwatson void *arg; 802146806Srwatson struct callout_handle handle; 803160798Sjhb{ 804146806Srwatson register struct callout *p, *t; 805146806Srwatson register int s; 806160798Sjhb 807161678Sdavidxu /* 808163449Sdavidxu * Check for a handle that was initialized 809160798Sjhb * by callout_handle_init, but never used 810146806Srwatson * for a real timeout. 811160798Sjhb */ 812160798Sjhb if (handle.callout == NULL) 813152845Sdavidxu return; 814160798Sjhb 815152845Sdavidxu s = splhigh(); 816152845Sdavidxu if ((handle.callout->c_func == ftn) 817160798Sjhb && (handle.callout->c_arg == arg)) { 818152845Sdavidxu if (nextsoftcheck == handle.callout) { 819152845Sdavidxu nextsoftcheck = TAILQ_NEXT(handle.callout, c_links.tqe); 820152845Sdavidxu } 821160798Sjhb TAILQ_REMOVE(handle.callout->c_bucket, 822152845Sdavidxu handle.callout, c_links.tqe); 823152845Sdavidxu handle.callout->c_func = NULL; 824152845Sdavidxu SLIST_INSERT_HEAD(&callfree, handle.callout, c_links.sle); 825160798Sjhb } 826152845Sdavidxu splx(s); 827160798Sjhb} 828160798Sjhb 829160798Sjhbvoid 830160798Sjhbcallout_handle_init(struct callout_handle *handle) 831162497Sdavidxu{ 832162497Sdavidxu handle->callout = NULL; 833162497Sdavidxu} 834162497Sdavidxu 835161367Spetervoid 836161367Spetergettime(struct timeval *tvp) 837163953Srrs{ 838163953Srrs int s; 839163953Srrs 840163953Srrs s = splclock(); 841163953Srrs /* XXX should use microtime() iff tv_usec is used. */ 842163953Srrs *tvp = time; 843163953Srrs splx(s); 844163953Srrs} 845163953Srrs 846163953Srrs/* 847171209Speter * Compute number of hz until specified time. Used to 848171209Speter * compute third argument to timeout() from an absolute time. 849171209Speter */ 850171209Speterint 851171209Speterhzto(tv) 852171209Speter struct timeval *tv; 853171209Speter{ 854171209Speter register unsigned long ticks; 855171209Speter register long sec, usec; 856171209Speter int s; 857171859Sdavidxu 858175517Srwatson /* 859175164Sjhb * If the number of usecs in the whole seconds part of the time 860175517Srwatson * difference fits in a long, then the total number of usecs will 861176730Sjeff * fit in an unsigned long. Compute the total and convert it to 862176730Sjeff * ticks, rounding up and adding 1 to allow for the current tick 863176730Sjeff * to expire. Rounding also depends on unsigned long arithmetic 864176730Sjeff * to avoid overflow. 865176730Sjeff * 866176730Sjeff * Otherwise, if the number of ticks in the whole seconds part of 867176730Sjeff * the time difference fits in a long, then convert the parts to 868177597Sru * ticks separately and add, using similar rounding methods and 869177597Sru * overflow avoidance. This method would work in the previous 870176730Sjeff * case but it is slightly slower and assumes that hz is integral. 871177597Sru * 872177597Sru * Otherwise, round the time difference down to the maximum 873177788Skib * representable value. 874177788Skib * 875177788Skib * If ints have 32 bits, then the maximum value for any timeout in 876177788Skib * 10ms ticks is 248 days. 877177788Skib */ 878177788Skib s = splclock(); 879177788Skib sec = tv->tv_sec - time.tv_sec; 880177788Skib usec = tv->tv_usec - time.tv_usec; 881177788Skib splx(s); 882177788Skib if (usec < 0) { 883177788Skib sec--; 884177788Skib usec += 1000000; 885177788Skib } 886177788Skib if (sec < 0) { 887177788Skib#ifdef DIAGNOSTIC 888177788Skib printf("hzto: negative time difference %ld sec %ld usec\n", 889177788Skib sec, usec); 890177788Skib#endif 891177788Skib ticks = 1; 892177788Skib } else if (sec <= LONG_MAX / 1000000) 893177788Skib ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) 894177788Skib / tick + 1; 895177788Skib else if (sec <= LONG_MAX / hz) 896177788Skib ticks = sec * hz 897177788Skib + ((unsigned long)usec + (tick - 1)) / tick + 1; 898177788Skib else 899177788Skib ticks = LONG_MAX; 900177788Skib if (ticks > INT_MAX) 901182123Srwatson ticks = INT_MAX; 902184588Sdfr return (ticks); 903184588Sdfr} 904191673Sjamie 905191673Sjamie/* 906191673Sjamie * Start profiling on a process. 907191673Sjamie * 908191673Sjamie * Kernel profiling passes proc0 which never exits and hence 909194262Sjhb * keeps the profile clock running constantly. 910194910Sjhb */ 911194910Sjhbvoid 912194910Sjhbstartprofclock(p) 913194910Sjhb register struct proc *p; 914194910Sjhb{ 915194910Sjhb int s; 916195458Strasz 917197636Srwatson if ((p->p_flag & P_PROFIL) == 0) { 918197636Srwatson p->p_flag |= P_PROFIL; 919197636Srwatson if (++profprocs == 1 && stathz != 0) { 920197636Srwatson s = splstatclock(); 921197636Srwatson psdiv = pscnt = psratio; 922197636Srwatson setstatclockrate(profhz); 923197636Srwatson splx(s); 924197636Srwatson } 925198508Skib } 926198508Skib} 927198508Skib 928198508Skib/* 929105144Speter * Stop profiling on a process. 930123408Speter */ 931void 932stopprofclock(p) 933 register struct proc *p; 934{ 935 int s; 936 937 if (p->p_flag & P_PROFIL) { 938 p->p_flag &= ~P_PROFIL; 939 if (--profprocs == 0 && stathz != 0) { 940 s = splstatclock(); 941 psdiv = pscnt = 1; 942 setstatclockrate(stathz); 943 splx(s); 944 } 945 } 946} 947 948/* 949 * Statistics clock. Grab profile sample, and if divider reaches 0, 950 * do process and kernel statistics. 951 */ 952void 953statclock(frame) 954 register struct clockframe *frame; 955{ 956#ifdef GPROF 957 register struct gmonparam *g; 958#endif 959 register struct proc *p; 960 register int i; 961 struct pstats *pstats; 962 long rss; 963 struct rusage *ru; 964 struct vmspace *vm; 965 966 if (CLKF_USERMODE(frame)) { 967 p = curproc; 968 if (p->p_flag & P_PROFIL) 969 addupc_intr(p, CLKF_PC(frame), 1); 970 if (--pscnt > 0) 971 return; 972 /* 973 * Came from user mode; CPU was in user state. 974 * If this process is being profiled record the tick. 975 */ 976 p->p_uticks++; 977 if (p->p_nice > NZERO) 978 cp_time[CP_NICE]++; 979 else 980 cp_time[CP_USER]++; 981 } else { 982#ifdef GPROF 983 /* 984 * Kernel statistics are just like addupc_intr, only easier. 985 */ 986 g = &_gmonparam; 987 if (g->state == GMON_PROF_ON) { 988 i = CLKF_PC(frame) - g->lowpc; 989 if (i < g->textsize) { 990 i /= HISTFRACTION * sizeof(*g->kcount); 991 g->kcount[i]++; 992 } 993 } 994#endif 995 if (--pscnt > 0) 996 return; 997 /* 998 * Came from kernel mode, so we were: 999 * - handling an interrupt, 1000 * - doing syscall or trap work on behalf of the current 1001 * user process, or 1002 * - spinning in the idle loop. 1003 * Whichever it is, charge the time as appropriate. 1004 * Note that we charge interrupts to the current process, 1005 * regardless of whether they are ``for'' that process, 1006 * so that we know how much of its real time was spent 1007 * in ``non-process'' (i.e., interrupt) work. 1008 */ 1009 p = curproc; 1010 if (CLKF_INTR(frame)) { 1011 if (p != NULL) 1012 p->p_iticks++; 1013 cp_time[CP_INTR]++; 1014 } else if (p != NULL && !(p->p_flag & P_IDLEPROC)) { 1015 p->p_sticks++; 1016 cp_time[CP_SYS]++; 1017 } else 1018 cp_time[CP_IDLE]++; 1019 } 1020 pscnt = psdiv; 1021 1022 /* 1023 * We maintain statistics shown by user-level statistics 1024 * programs: the amount of time in each cpu state, and 1025 * the amount of time each of DK_NDRIVE ``drives'' is busy. 1026 * 1027 * XXX should either run linked list of drives, or (better) 1028 * grab timestamps in the start & done code. 1029 */ 1030 for (i = 0; i < DK_NDRIVE; i++) 1031 if (dk_busy & (1 << i)) 1032 dk_time[i]++; 1033 1034 /* 1035 * We adjust the priority of the current process. The priority of 1036 * a process gets worse as it accumulates CPU time. The cpu usage 1037 * estimator (p_estcpu) is increased here. The formula for computing 1038 * priorities (in kern_synch.c) will compute a different value each 1039 * time p_estcpu increases by 4. The cpu usage estimator ramps up 1040 * quite quickly when the process is running (linearly), and decays 1041 * away exponentially, at a rate which is proportionally slower when 1042 * the system is busy. The basic principal is that the system will 1043 * 90% forget that the process used a lot of CPU time in 5 * loadav 1044 * seconds. This causes the system to favor processes which haven't 1045 * run much recently, and to round-robin among other processes. 1046 */ 1047 if (p != NULL) { 1048 p->p_cpticks++; 1049 if (++p->p_estcpu == 0) 1050 p->p_estcpu--; 1051 if ((p->p_estcpu & 3) == 0) { 1052 resetpriority(p); 1053 if (p->p_priority >= PUSER) 1054 p->p_priority = p->p_usrpri; 1055 } 1056 1057 /* Update resource usage integrals and maximums. */ 1058 if ((pstats = p->p_stats) != NULL && 1059 (ru = &pstats->p_ru) != NULL && 1060 (vm = p->p_vmspace) != NULL) { 1061 ru->ru_ixrss += vm->vm_tsize * PAGE_SIZE / 1024; 1062 ru->ru_idrss += vm->vm_dsize * PAGE_SIZE / 1024; 1063 ru->ru_isrss += vm->vm_ssize * PAGE_SIZE / 1024; 1064 rss = vm->vm_pmap.pm_stats.resident_count * 1065 PAGE_SIZE / 1024; 1066 if (ru->ru_maxrss < rss) 1067 ru->ru_maxrss = rss; 1068 } 1069 } 1070} 1071 1072/* 1073 * Return information about system clocks. 1074 */ 1075static int 1076sysctl_kern_clockrate SYSCTL_HANDLER_ARGS 1077{ 1078 struct clockinfo clkinfo; 1079 /* 1080 * Construct clockinfo structure. 1081 */ 1082 clkinfo.hz = hz; 1083 clkinfo.tick = tick; 1084 clkinfo.tickadj = tickadj; 1085 clkinfo.profhz = profhz; 1086 clkinfo.stathz = stathz ? stathz : hz; 1087 return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); 1088} 1089 1090SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD, 1091 0, 0, sysctl_kern_clockrate, "S,clockinfo",""); 1092 1093#ifdef PPS_SYNC 1094/* 1095 * hardpps() - discipline CPU clock oscillator to external PPS signal 1096 * 1097 * This routine is called at each PPS interrupt in order to discipline 1098 * the CPU clock oscillator to the PPS signal. It measures the PPS phase 1099 * and leaves it in a handy spot for the hardclock() routine. It 1100 * integrates successive PPS phase differences and calculates the 1101 * frequency offset. This is used in hardclock() to discipline the CPU 1102 * clock oscillator so that intrinsic frequency error is cancelled out. 1103 * The code requires the caller to capture the time and hardware counter 1104 * value at the on-time PPS signal transition. 1105 * 1106 * Note that, on some Unix systems, this routine runs at an interrupt 1107 * priority level higher than the timer interrupt routine hardclock(). 1108 * Therefore, the variables used are distinct from the hardclock() 1109 * variables, except for certain exceptions: The PPS frequency pps_freq 1110 * and phase pps_offset variables are determined by this routine and 1111 * updated atomically. The time_tolerance variable can be considered a 1112 * constant, since it is infrequently changed, and then only when the 1113 * PPS signal is disabled. The watchdog counter pps_valid is updated 1114 * once per second by hardclock() and is atomically cleared in this 1115 * routine. 1116 */ 1117void 1118hardpps(tvp, usec) 1119 struct timeval *tvp; /* time at PPS */ 1120 long usec; /* hardware counter at PPS */ 1121{ 1122 long u_usec, v_usec, bigtick; 1123 long cal_sec, cal_usec; 1124 1125 /* 1126 * An occasional glitch can be produced when the PPS interrupt 1127 * occurs in the hardclock() routine before the time variable is 1128 * updated. Here the offset is discarded when the difference 1129 * between it and the last one is greater than tick/2, but not 1130 * if the interval since the first discard exceeds 30 s. 1131 */ 1132 time_status |= STA_PPSSIGNAL; 1133 time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); 1134 pps_valid = 0; 1135 u_usec = -tvp->tv_usec; 1136 if (u_usec < -500000) 1137 u_usec += 1000000; 1138 v_usec = pps_offset - u_usec; 1139 if (v_usec < 0) 1140 v_usec = -v_usec; 1141 if (v_usec > (tick >> 1)) { 1142 if (pps_glitch > MAXGLITCH) { 1143 pps_glitch = 0; 1144 pps_tf[2] = u_usec; 1145 pps_tf[1] = u_usec; 1146 } else { 1147 pps_glitch++; 1148 u_usec = pps_offset; 1149 } 1150 } else 1151 pps_glitch = 0; 1152 1153 /* 1154 * A three-stage median filter is used to help deglitch the pps 1155 * time. The median sample becomes the time offset estimate; the 1156 * difference between the other two samples becomes the time 1157 * dispersion (jitter) estimate. 1158 */ 1159 pps_tf[2] = pps_tf[1]; 1160 pps_tf[1] = pps_tf[0]; 1161 pps_tf[0] = u_usec; 1162 if (pps_tf[0] > pps_tf[1]) { 1163 if (pps_tf[1] > pps_tf[2]) { 1164 pps_offset = pps_tf[1]; /* 0 1 2 */ 1165 v_usec = pps_tf[0] - pps_tf[2]; 1166 } else if (pps_tf[2] > pps_tf[0]) { 1167 pps_offset = pps_tf[0]; /* 2 0 1 */ 1168 v_usec = pps_tf[2] - pps_tf[1]; 1169 } else { 1170 pps_offset = pps_tf[2]; /* 0 2 1 */ 1171 v_usec = pps_tf[0] - pps_tf[1]; 1172 } 1173 } else { 1174 if (pps_tf[1] < pps_tf[2]) { 1175 pps_offset = pps_tf[1]; /* 2 1 0 */ 1176 v_usec = pps_tf[2] - pps_tf[0]; 1177 } else if (pps_tf[2] < pps_tf[0]) { 1178 pps_offset = pps_tf[0]; /* 1 0 2 */ 1179 v_usec = pps_tf[1] - pps_tf[2]; 1180 } else { 1181 pps_offset = pps_tf[2]; /* 1 2 0 */ 1182 v_usec = pps_tf[1] - pps_tf[0]; 1183 } 1184 } 1185 if (v_usec > MAXTIME) 1186 pps_jitcnt++; 1187 v_usec = (v_usec << PPS_AVG) - pps_jitter; 1188 if (v_usec < 0) 1189 pps_jitter -= -v_usec >> PPS_AVG; 1190 else 1191 pps_jitter += v_usec >> PPS_AVG; 1192 if (pps_jitter > (MAXTIME >> 1)) 1193 time_status |= STA_PPSJITTER; 1194 1195 /* 1196 * During the calibration interval adjust the starting time when 1197 * the tick overflows. At the end of the interval compute the 1198 * duration of the interval and the difference of the hardware 1199 * counters at the beginning and end of the interval. This code 1200 * is deliciously complicated by the fact valid differences may 1201 * exceed the value of tick when using long calibration 1202 * intervals and small ticks. Note that the counter can be 1203 * greater than tick if caught at just the wrong instant, but 1204 * the values returned and used here are correct. 1205 */ 1206 bigtick = (long)tick << SHIFT_USEC; 1207 pps_usec -= pps_freq; 1208 if (pps_usec >= bigtick) 1209 pps_usec -= bigtick; 1210 if (pps_usec < 0) 1211 pps_usec += bigtick; 1212 pps_time.tv_sec++; 1213 pps_count++; 1214 if (pps_count < (1 << pps_shift)) 1215 return; 1216 pps_count = 0; 1217 pps_calcnt++; 1218 u_usec = usec << SHIFT_USEC; 1219 v_usec = pps_usec - u_usec; 1220 if (v_usec >= bigtick >> 1) 1221 v_usec -= bigtick; 1222 if (v_usec < -(bigtick >> 1)) 1223 v_usec += bigtick; 1224 if (v_usec < 0) 1225 v_usec = -(-v_usec >> pps_shift); 1226 else 1227 v_usec = v_usec >> pps_shift; 1228 pps_usec = u_usec; 1229 cal_sec = tvp->tv_sec; 1230 cal_usec = tvp->tv_usec; 1231 cal_sec -= pps_time.tv_sec; 1232 cal_usec -= pps_time.tv_usec; 1233 if (cal_usec < 0) { 1234 cal_usec += 1000000; 1235 cal_sec--; 1236 } 1237 pps_time = *tvp; 1238 1239 /* 1240 * Check for lost interrupts, noise, excessive jitter and 1241 * excessive frequency error. The number of timer ticks during 1242 * the interval may vary +-1 tick. Add to this a margin of one 1243 * tick for the PPS signal jitter and maximum frequency 1244 * deviation. If the limits are exceeded, the calibration 1245 * interval is reset to the minimum and we start over. 1246 */ 1247 u_usec = (long)tick << 1; 1248 if (!((cal_sec == -1 && cal_usec > (1000000 - u_usec)) 1249 || (cal_sec == 0 && cal_usec < u_usec)) 1250 || v_usec > time_tolerance || v_usec < -time_tolerance) { 1251 pps_errcnt++; 1252 pps_shift = PPS_SHIFT; 1253 pps_intcnt = 0; 1254 time_status |= STA_PPSERROR; 1255 return; 1256 } 1257 1258 /* 1259 * A three-stage median filter is used to help deglitch the pps 1260 * frequency. The median sample becomes the frequency offset 1261 * estimate; the difference between the other two samples 1262 * becomes the frequency dispersion (stability) estimate. 1263 */ 1264 pps_ff[2] = pps_ff[1]; 1265 pps_ff[1] = pps_ff[0]; 1266 pps_ff[0] = v_usec; 1267 if (pps_ff[0] > pps_ff[1]) { 1268 if (pps_ff[1] > pps_ff[2]) { 1269 u_usec = pps_ff[1]; /* 0 1 2 */ 1270 v_usec = pps_ff[0] - pps_ff[2]; 1271 } else if (pps_ff[2] > pps_ff[0]) { 1272 u_usec = pps_ff[0]; /* 2 0 1 */ 1273 v_usec = pps_ff[2] - pps_ff[1]; 1274 } else { 1275 u_usec = pps_ff[2]; /* 0 2 1 */ 1276 v_usec = pps_ff[0] - pps_ff[1]; 1277 } 1278 } else { 1279 if (pps_ff[1] < pps_ff[2]) { 1280 u_usec = pps_ff[1]; /* 2 1 0 */ 1281 v_usec = pps_ff[2] - pps_ff[0]; 1282 } else if (pps_ff[2] < pps_ff[0]) { 1283 u_usec = pps_ff[0]; /* 1 0 2 */ 1284 v_usec = pps_ff[1] - pps_ff[2]; 1285 } else { 1286 u_usec = pps_ff[2]; /* 1 2 0 */ 1287 v_usec = pps_ff[1] - pps_ff[0]; 1288 } 1289 } 1290 1291 /* 1292 * Here the frequency dispersion (stability) is updated. If it 1293 * is less than one-fourth the maximum (MAXFREQ), the frequency 1294 * offset is updated as well, but clamped to the tolerance. It 1295 * will be processed later by the hardclock() routine. 1296 */ 1297 v_usec = (v_usec >> 1) - pps_stabil; 1298 if (v_usec < 0) 1299 pps_stabil -= -v_usec >> PPS_AVG; 1300 else 1301 pps_stabil += v_usec >> PPS_AVG; 1302 if (pps_stabil > MAXFREQ >> 2) { 1303 pps_stbcnt++; 1304 time_status |= STA_PPSWANDER; 1305 return; 1306 } 1307 if (time_status & STA_PPSFREQ) { 1308 if (u_usec < 0) { 1309 pps_freq -= -u_usec >> PPS_AVG; 1310 if (pps_freq < -time_tolerance) 1311 pps_freq = -time_tolerance; 1312 u_usec = -u_usec; 1313 } else { 1314 pps_freq += u_usec >> PPS_AVG; 1315 if (pps_freq > time_tolerance) 1316 pps_freq = time_tolerance; 1317 } 1318 } 1319 1320 /* 1321 * Here the calibration interval is adjusted. If the maximum 1322 * time difference is greater than tick / 4, reduce the interval 1323 * by half. If this is not the case for four consecutive 1324 * intervals, double the interval. 1325 */ 1326 if (u_usec << pps_shift > bigtick >> 2) { 1327 pps_intcnt = 0; 1328 if (pps_shift > PPS_SHIFT) 1329 pps_shift--; 1330 } else if (pps_intcnt >= 4) { 1331 pps_intcnt = 0; 1332 if (pps_shift < PPS_SHIFTMAX) 1333 pps_shift++; 1334 } else 1335 pps_intcnt++; 1336} 1337#endif /* PPS_SYNC */ 1338