kern_time.c revision 1.131
1/*	$NetBSD: kern_time.c,v 1.131 2007/11/15 20:12:04 ad Exp $	*/
2
3/*-
4 * Copyright (c) 2000, 2004, 2005, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Christopher G. Demetriou.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the NetBSD
21 *	Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 *    contributors may be used to endorse or promote products derived
24 *    from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39/*
40 * Copyright (c) 1982, 1986, 1989, 1993
41 *	The Regents of the University of California.  All rights reserved.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 *    notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 *    notice, this list of conditions and the following disclaimer in the
50 *    documentation and/or other materials provided with the distribution.
51 * 3. Neither the name of the University nor the names of its contributors
52 *    may be used to endorse or promote products derived from this software
53 *    without specific prior written permission.
54 *
55 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
56 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
57 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
58 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
59 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
60 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
61 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
62 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
63 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * SUCH DAMAGE.
66 *
67 *	@(#)kern_time.c	8.4 (Berkeley) 5/26/95
68 */
69
70#include <sys/cdefs.h>
71__KERNEL_RCSID(0, "$NetBSD: kern_time.c,v 1.131 2007/11/15 20:12:04 ad Exp $");
72
73#include <sys/param.h>
74#include <sys/resourcevar.h>
75#include <sys/kernel.h>
76#include <sys/systm.h>
77#include <sys/proc.h>
78#include <sys/vnode.h>
79#include <sys/signalvar.h>
80#include <sys/syslog.h>
81#include <sys/timetc.h>
82#ifndef __HAVE_TIMECOUNTER
83#include <sys/timevar.h>
84#endif /* !__HAVE_TIMECOUNTER */
85#include <sys/kauth.h>
86
87#include <sys/mount.h>
88#include <sys/syscallargs.h>
89
90#include <uvm/uvm_extern.h>
91
92#include <sys/cpu.h>
93
94kmutex_t	time_lock;
95
96POOL_INIT(ptimer_pool, sizeof(struct ptimer), 0, 0, 0, "ptimerpl",
97    &pool_allocator_nointr, IPL_NONE);
98POOL_INIT(ptimers_pool, sizeof(struct ptimers), 0, 0, 0, "ptimerspl",
99    &pool_allocator_nointr, IPL_NONE);
100
101/*
102 * Initialize timekeeping.
103 */
104void
105time_init(void)
106{
107
108	mutex_init(&time_lock, MUTEX_DEFAULT, IPL_NONE);
109}
110
111/* Time of day and interval timer support.
112 *
113 * These routines provide the kernel entry points to get and set
114 * the time-of-day and per-process interval timers.  Subroutines
115 * here provide support for adding and subtracting timeval structures
116 * and decrementing interval timers, optionally reloading the interval
117 * timers when they expire.
118 */
119
120/* This function is used by clock_settime and settimeofday */
121int
122settime(struct proc *p, struct timespec *ts)
123{
124	struct timeval delta, tv;
125#ifdef __HAVE_TIMECOUNTER
126	struct timeval now;
127	struct timespec ts1;
128#endif /* !__HAVE_TIMECOUNTER */
129	lwp_t *l;
130	int s;
131
132	/*
133	 * Don't allow the time to be set forward so far it will wrap
134	 * and become negative, thus allowing an attacker to bypass
135	 * the next check below.  The cutoff is 1 year before rollover
136	 * occurs, so even if the attacker uses adjtime(2) to move
137	 * the time past the cutoff, it will take a very long time
138	 * to get to the wrap point.
139	 *
140	 * XXX: we check against INT_MAX since on 64-bit
141	 *	platforms, sizeof(int) != sizeof(long) and
142	 *	time_t is 32 bits even when atv.tv_sec is 64 bits.
143	 */
144	if (ts->tv_sec > INT_MAX - 365*24*60*60) {
145		struct proc *pp;
146
147		mutex_enter(&proclist_lock);
148		pp = p->p_pptr;
149		mutex_enter(&pp->p_mutex);
150		log(LOG_WARNING, "pid %d (%s) "
151		    "invoked by uid %d ppid %d (%s) "
152		    "tried to set clock forward to %ld\n",
153		    p->p_pid, p->p_comm, kauth_cred_geteuid(pp->p_cred),
154		    pp->p_pid, pp->p_comm, (long)ts->tv_sec);
155		mutex_exit(&pp->p_mutex);
156		mutex_exit(&proclist_lock);
157		return (EPERM);
158	}
159	TIMESPEC_TO_TIMEVAL(&tv, ts);
160
161	/* WHAT DO WE DO ABOUT PENDING REAL-TIME TIMEOUTS??? */
162	s = splclock();
163#ifdef __HAVE_TIMECOUNTER
164	microtime(&now);
165	timersub(&tv, &now, &delta);
166#else /* !__HAVE_TIMECOUNTER */
167	timersub(&tv, &time, &delta);
168#endif /* !__HAVE_TIMECOUNTER */
169	if ((delta.tv_sec < 0 || delta.tv_usec < 0) &&
170	    kauth_authorize_system(p->p_cred, KAUTH_SYSTEM_TIME,
171	    KAUTH_REQ_SYSTEM_TIME_BACKWARDS, NULL, NULL, NULL)) {
172		splx(s);
173		return (EPERM);
174	}
175#ifdef notyet
176	if ((delta.tv_sec < 86400) && securelevel > 0) { /* XXX elad - notyet */
177		splx(s);
178		return (EPERM);
179	}
180#endif
181
182#ifdef __HAVE_TIMECOUNTER
183	TIMEVAL_TO_TIMESPEC(&tv, &ts1);
184	tc_setclock(&ts1);
185#else /* !__HAVE_TIMECOUNTER */
186	time = tv;
187#endif /* !__HAVE_TIMECOUNTER */
188
189	timeradd(&boottime, &delta, &boottime);
190
191	/*
192	 * XXXSMP: There is a short race between setting the time above
193	 * and adjusting LWP's run times.  Fixing this properly means
194	 * pausing all CPUs while we adjust the clock.
195	 */
196	mutex_enter(&proclist_lock);
197	LIST_FOREACH(l, &alllwp, l_list) {
198		lwp_lock(l);
199		timeradd(&l->l_stime, &delta, &l->l_stime);
200		lwp_unlock(l);
201	}
202	mutex_exit(&proclist_lock);
203	resettodr();
204	splx(s);
205
206	return (0);
207}
208
209/* ARGSUSED */
210int
211sys_clock_gettime(struct lwp *l, void *v, register_t *retval)
212{
213	struct sys_clock_gettime_args /* {
214		syscallarg(clockid_t) clock_id;
215		syscallarg(struct timespec *) tp;
216	} */ *uap = v;
217	clockid_t clock_id;
218	struct timespec ats;
219
220	clock_id = SCARG(uap, clock_id);
221	switch (clock_id) {
222	case CLOCK_REALTIME:
223		nanotime(&ats);
224		break;
225	case CLOCK_MONOTONIC:
226		nanouptime(&ats);
227		break;
228	default:
229		return (EINVAL);
230	}
231
232	return copyout(&ats, SCARG(uap, tp), sizeof(ats));
233}
234
235/* ARGSUSED */
236int
237sys_clock_settime(struct lwp *l, void *v, register_t *retval)
238{
239	struct sys_clock_settime_args /* {
240		syscallarg(clockid_t) clock_id;
241		syscallarg(const struct timespec *) tp;
242	} */ *uap = v;
243	int error;
244
245	if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_TIME,
246	    KAUTH_REQ_SYSTEM_TIME_SYSTEM, NULL, NULL, NULL)) != 0)
247		return (error);
248
249	return clock_settime1(l->l_proc, SCARG(uap, clock_id), SCARG(uap, tp));
250}
251
252
253int
254clock_settime1(struct proc *p, clockid_t clock_id, const struct timespec *tp)
255{
256	struct timespec ats;
257	int error;
258
259	if ((error = copyin(tp, &ats, sizeof(ats))) != 0)
260		return (error);
261
262	switch (clock_id) {
263	case CLOCK_REALTIME:
264		if ((error = settime(p, &ats)) != 0)
265			return (error);
266		break;
267	case CLOCK_MONOTONIC:
268		return (EINVAL);	/* read-only clock */
269	default:
270		return (EINVAL);
271	}
272
273	return 0;
274}
275
276int
277sys_clock_getres(struct lwp *l, void *v, register_t *retval)
278{
279	struct sys_clock_getres_args /* {
280		syscallarg(clockid_t) clock_id;
281		syscallarg(struct timespec *) tp;
282	} */ *uap = v;
283	clockid_t clock_id;
284	struct timespec ts;
285	int error = 0;
286
287	clock_id = SCARG(uap, clock_id);
288	switch (clock_id) {
289	case CLOCK_REALTIME:
290	case CLOCK_MONOTONIC:
291		ts.tv_sec = 0;
292		if (tc_getfrequency() > 1000000000)
293			ts.tv_nsec = 1;
294		else
295			ts.tv_nsec = 1000000000 / tc_getfrequency();
296		break;
297	default:
298		return (EINVAL);
299	}
300
301	if (SCARG(uap, tp))
302		error = copyout(&ts, SCARG(uap, tp), sizeof(ts));
303
304	return error;
305}
306
307/* ARGSUSED */
308int
309sys_nanosleep(struct lwp *l, void *v, register_t *retval)
310{
311	struct sys_nanosleep_args/* {
312		syscallarg(struct timespec *) rqtp;
313		syscallarg(struct timespec *) rmtp;
314	} */ *uap = v;
315	struct timespec rmt, rqt;
316	int error, error1;
317
318	error = copyin(SCARG(uap, rqtp), &rqt, sizeof(struct timespec));
319	if (error)
320		return (error);
321
322	error = nanosleep1(l, &rqt, SCARG(uap, rmtp) ? &rmt : NULL);
323	if (SCARG(uap, rmtp) == NULL || (error != 0 && error != EINTR))
324		return error;
325
326	error1 = copyout(&rmt, SCARG(uap, rmtp), sizeof(rmt));
327	return error1 ? error1 : error;
328}
329
330int
331nanosleep1(struct lwp *l, struct timespec *rqt, struct timespec *rmt)
332{
333#ifdef __HAVE_TIMECOUNTER
334	int error, timo;
335
336	if (itimespecfix(rqt))
337		return (EINVAL);
338
339	timo = tstohz(rqt);
340	/*
341	 * Avoid inadvertantly sleeping forever
342	 */
343	if (timo == 0)
344		timo = 1;
345
346	if (rmt != NULL)
347		getnanouptime(rmt);
348
349	error = kpause("nanoslp", true, timo, NULL);
350	if (error == ERESTART)
351		error = EINTR;
352	if (error == EWOULDBLOCK)
353		error = 0;
354
355	if (rmt!= NULL) {
356		struct timespec rmtend;
357
358		getnanouptime(&rmtend);
359
360		timespecsub(&rmtend, rmt, rmt);
361		timespecsub(rqt, rmt, rmt);
362		if (rmt->tv_sec < 0)
363			timespecclear(rmt);
364	}
365
366	return error;
367#else /* !__HAVE_TIMECOUNTER */
368	struct timeval atv, utv;
369	int error, s, timo;
370
371	TIMESPEC_TO_TIMEVAL(&atv, rqt);
372	if (itimerfix(&atv))
373		return (EINVAL);
374
375	s = splclock();
376	timeradd(&atv,&time,&atv);
377	timo = hzto(&atv);
378	/*
379	 * Avoid inadvertantly sleeping forever
380	 */
381	if (timo == 0)
382		timo = 1;
383	splx(s);
384
385	error = kpause("nanoslp", true, timo, NULL);
386	if (error == ERESTART)
387		error = EINTR;
388	if (error == EWOULDBLOCK)
389		error = 0;
390
391	if (rmt != NULL) {
392		s = splclock();
393		utv = time;
394		splx(s);
395
396		timersub(&atv, &utv, &utv);
397		if (utv.tv_sec < 0)
398			timerclear(&utv);
399
400		TIMEVAL_TO_TIMESPEC(&utv, rmt);
401	}
402
403	return error;
404#endif /* !__HAVE_TIMECOUNTER */
405}
406
407/* ARGSUSED */
408int
409sys_gettimeofday(struct lwp *l, void *v, register_t *retval)
410{
411	struct sys_gettimeofday_args /* {
412		syscallarg(struct timeval *) tp;
413		syscallarg(void *) tzp;		really "struct timezone *"
414	} */ *uap = v;
415	struct timeval atv;
416	int error = 0;
417	struct timezone tzfake;
418
419	if (SCARG(uap, tp)) {
420		microtime(&atv);
421		error = copyout(&atv, SCARG(uap, tp), sizeof(atv));
422		if (error)
423			return (error);
424	}
425	if (SCARG(uap, tzp)) {
426		/*
427		 * NetBSD has no kernel notion of time zone, so we just
428		 * fake up a timezone struct and return it if demanded.
429		 */
430		tzfake.tz_minuteswest = 0;
431		tzfake.tz_dsttime = 0;
432		error = copyout(&tzfake, SCARG(uap, tzp), sizeof(tzfake));
433	}
434	return (error);
435}
436
437/* ARGSUSED */
438int
439sys_settimeofday(struct lwp *l, void *v, register_t *retval)
440{
441	struct sys_settimeofday_args /* {
442		syscallarg(const struct timeval *) tv;
443		syscallarg(const void *) tzp;	really "const struct timezone *"
444	} */ *uap = v;
445
446	return settimeofday1(SCARG(uap, tv), true, SCARG(uap, tzp), l, true);
447}
448
449int
450settimeofday1(const struct timeval *utv, bool userspace,
451    const void *utzp, struct lwp *l, bool check_kauth)
452{
453	struct timeval atv;
454	struct timespec ts;
455	int error;
456
457	/* Verify all parameters before changing time. */
458
459	if (check_kauth) {
460		error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_TIME,
461		    KAUTH_REQ_SYSTEM_TIME_SYSTEM, NULL, NULL, NULL);
462		if (error != 0)
463			return (error);
464	}
465
466	/*
467	 * NetBSD has no kernel notion of time zone, and only an
468	 * obsolete program would try to set it, so we log a warning.
469	 */
470	if (utzp)
471		log(LOG_WARNING, "pid %d attempted to set the "
472		    "(obsolete) kernel time zone\n", l->l_proc->p_pid);
473
474	if (utv == NULL)
475		return 0;
476
477	if (userspace) {
478		if ((error = copyin(utv, &atv, sizeof(atv))) != 0)
479			return error;
480		utv = &atv;
481	}
482
483	TIMEVAL_TO_TIMESPEC(utv, &ts);
484	return settime(l->l_proc, &ts);
485}
486
487#ifndef __HAVE_TIMECOUNTER
488int	tickdelta;			/* current clock skew, us. per tick */
489long	timedelta;			/* unapplied time correction, us. */
490long	bigadj = 1000000;		/* use 10x skew above bigadj us. */
491#endif
492
493int	time_adjusted;			/* set if an adjustment is made */
494
495/* ARGSUSED */
496int
497sys_adjtime(struct lwp *l, void *v, register_t *retval)
498{
499	struct sys_adjtime_args /* {
500		syscallarg(const struct timeval *) delta;
501		syscallarg(struct timeval *) olddelta;
502	} */ *uap = v;
503	int error;
504
505	if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_TIME,
506	    KAUTH_REQ_SYSTEM_TIME_ADJTIME, NULL, NULL, NULL)) != 0)
507		return (error);
508
509	return adjtime1(SCARG(uap, delta), SCARG(uap, olddelta), l->l_proc);
510}
511
512int
513adjtime1(const struct timeval *delta, struct timeval *olddelta, struct proc *p)
514{
515	struct timeval atv;
516	int error = 0;
517
518#ifdef __HAVE_TIMECOUNTER
519	extern int64_t time_adjtime;  /* in kern_ntptime.c */
520#else /* !__HAVE_TIMECOUNTER */
521	long ndelta, ntickdelta, odelta;
522	int s;
523#endif /* !__HAVE_TIMECOUNTER */
524
525#ifdef __HAVE_TIMECOUNTER
526	if (olddelta) {
527		atv.tv_sec = time_adjtime / 1000000;
528		atv.tv_usec = time_adjtime % 1000000;
529		if (atv.tv_usec < 0) {
530			atv.tv_usec += 1000000;
531			atv.tv_sec--;
532		}
533		error = copyout(&atv, olddelta, sizeof(struct timeval));
534		if (error)
535			return (error);
536	}
537
538	if (delta) {
539		error = copyin(delta, &atv, sizeof(struct timeval));
540		if (error)
541			return (error);
542
543		time_adjtime = (int64_t)atv.tv_sec * 1000000 +
544			atv.tv_usec;
545
546		if (time_adjtime)
547			/* We need to save the system time during shutdown */
548			time_adjusted |= 1;
549	}
550#else /* !__HAVE_TIMECOUNTER */
551	error = copyin(delta, &atv, sizeof(struct timeval));
552	if (error)
553		return (error);
554
555	/*
556	 * Compute the total correction and the rate at which to apply it.
557	 * Round the adjustment down to a whole multiple of the per-tick
558	 * delta, so that after some number of incremental changes in
559	 * hardclock(), tickdelta will become zero, lest the correction
560	 * overshoot and start taking us away from the desired final time.
561	 */
562	ndelta = atv.tv_sec * 1000000 + atv.tv_usec;
563	if (ndelta > bigadj || ndelta < -bigadj)
564		ntickdelta = 10 * tickadj;
565	else
566		ntickdelta = tickadj;
567	if (ndelta % ntickdelta)
568		ndelta = ndelta / ntickdelta * ntickdelta;
569
570	/*
571	 * To make hardclock()'s job easier, make the per-tick delta negative
572	 * if we want time to run slower; then hardclock can simply compute
573	 * tick + tickdelta, and subtract tickdelta from timedelta.
574	 */
575	if (ndelta < 0)
576		ntickdelta = -ntickdelta;
577	if (ndelta != 0)
578		/* We need to save the system clock time during shutdown */
579		time_adjusted |= 1;
580	s = splclock();
581	odelta = timedelta;
582	timedelta = ndelta;
583	tickdelta = ntickdelta;
584	splx(s);
585
586	if (olddelta) {
587		atv.tv_sec = odelta / 1000000;
588		atv.tv_usec = odelta % 1000000;
589		error = copyout(&atv, olddelta, sizeof(struct timeval));
590	}
591#endif /* __HAVE_TIMECOUNTER */
592
593	return error;
594}
595
596/*
597 * Interval timer support. Both the BSD getitimer() family and the POSIX
598 * timer_*() family of routines are supported.
599 *
600 * All timers are kept in an array pointed to by p_timers, which is
601 * allocated on demand - many processes don't use timers at all. The
602 * first three elements in this array are reserved for the BSD timers:
603 * element 0 is ITIMER_REAL, element 1 is ITIMER_VIRTUAL, and element
604 * 2 is ITIMER_PROF. The rest may be allocated by the timer_create()
605 * syscall.
606 *
607 * Realtime timers are kept in the ptimer structure as an absolute
608 * time; virtual time timers are kept as a linked list of deltas.
609 * Virtual time timers are processed in the hardclock() routine of
610 * kern_clock.c.  The real time timer is processed by a callout
611 * routine, called from the softclock() routine.  Since a callout may
612 * be delayed in real time due to interrupt processing in the system,
613 * it is possible for the real time timeout routine (realtimeexpire,
614 * given below), to be delayed in real time past when it is supposed
615 * to occur.  It does not suffice, therefore, to reload the real timer
616 * .it_value from the real time timers .it_interval.  Rather, we
617 * compute the next time in absolute time the timer should go off.  */
618
619/* Allocate a POSIX realtime timer. */
620int
621sys_timer_create(struct lwp *l, void *v, register_t *retval)
622{
623	struct sys_timer_create_args /* {
624		syscallarg(clockid_t) clock_id;
625		syscallarg(struct sigevent *) evp;
626		syscallarg(timer_t *) timerid;
627	} */ *uap = v;
628
629	return timer_create1(SCARG(uap, timerid), SCARG(uap, clock_id),
630	    SCARG(uap, evp), copyin, l);
631}
632
633int
634timer_create1(timer_t *tid, clockid_t id, struct sigevent *evp,
635    copyin_t fetch_event, struct lwp *l)
636{
637	int error;
638	timer_t timerid;
639	struct ptimer *pt;
640	struct proc *p;
641
642	p = l->l_proc;
643
644	if (id < CLOCK_REALTIME ||
645	    id > CLOCK_PROF)
646		return (EINVAL);
647
648	if (p->p_timers == NULL)
649		timers_alloc(p);
650
651	/* Find a free timer slot, skipping those reserved for setitimer(). */
652	for (timerid = 3; timerid < TIMER_MAX; timerid++)
653		if (p->p_timers->pts_timers[timerid] == NULL)
654			break;
655
656	if (timerid == TIMER_MAX)
657		return EAGAIN;
658
659	pt = pool_get(&ptimer_pool, PR_WAITOK);
660	if (evp) {
661		if (((error =
662		    (*fetch_event)(evp, &pt->pt_ev, sizeof(pt->pt_ev))) != 0) ||
663		    ((pt->pt_ev.sigev_notify < SIGEV_NONE) ||
664			(pt->pt_ev.sigev_notify > SIGEV_SA))) {
665			pool_put(&ptimer_pool, pt);
666			return (error ? error : EINVAL);
667		}
668	} else {
669		pt->pt_ev.sigev_notify = SIGEV_SIGNAL;
670		switch (id) {
671		case CLOCK_REALTIME:
672			pt->pt_ev.sigev_signo = SIGALRM;
673			break;
674		case CLOCK_VIRTUAL:
675			pt->pt_ev.sigev_signo = SIGVTALRM;
676			break;
677		case CLOCK_PROF:
678			pt->pt_ev.sigev_signo = SIGPROF;
679			break;
680		}
681		pt->pt_ev.sigev_value.sival_int = timerid;
682	}
683	pt->pt_info.ksi_signo = pt->pt_ev.sigev_signo;
684	pt->pt_info.ksi_errno = 0;
685	pt->pt_info.ksi_code = 0;
686	pt->pt_info.ksi_pid = p->p_pid;
687	pt->pt_info.ksi_uid = kauth_cred_getuid(l->l_cred);
688	pt->pt_info.ksi_value = pt->pt_ev.sigev_value;
689
690	pt->pt_type = id;
691	pt->pt_proc = p;
692	pt->pt_overruns = 0;
693	pt->pt_poverruns = 0;
694	pt->pt_entry = timerid;
695	timerclear(&pt->pt_time.it_value);
696	if (id == CLOCK_REALTIME)
697		callout_init(&pt->pt_ch, 0);
698	else
699		pt->pt_active = 0;
700
701	p->p_timers->pts_timers[timerid] = pt;
702
703	return copyout(&timerid, tid, sizeof(timerid));
704}
705
706/* Delete a POSIX realtime timer */
707int
708sys_timer_delete(struct lwp *l, void *v, register_t *retval)
709{
710	struct sys_timer_delete_args /*  {
711		syscallarg(timer_t) timerid;
712	} */ *uap = v;
713	struct proc *p = l->l_proc;
714	timer_t timerid;
715	struct ptimer *pt, *ptn;
716	int s;
717
718	timerid = SCARG(uap, timerid);
719
720	if ((p->p_timers == NULL) ||
721	    (timerid < 2) || (timerid >= TIMER_MAX) ||
722	    ((pt = p->p_timers->pts_timers[timerid]) == NULL))
723		return (EINVAL);
724
725	if (pt->pt_type == CLOCK_REALTIME) {
726		callout_stop(&pt->pt_ch);
727		callout_destroy(&pt->pt_ch);
728	} else if (pt->pt_active) {
729		s = splclock();
730		ptn = LIST_NEXT(pt, pt_list);
731		LIST_REMOVE(pt, pt_list);
732		for ( ; ptn; ptn = LIST_NEXT(ptn, pt_list))
733			timeradd(&pt->pt_time.it_value, &ptn->pt_time.it_value,
734			    &ptn->pt_time.it_value);
735		splx(s);
736	}
737
738	p->p_timers->pts_timers[timerid] = NULL;
739	pool_put(&ptimer_pool, pt);
740
741	return (0);
742}
743
744/*
745 * Set up the given timer. The value in pt->pt_time.it_value is taken
746 * to be an absolute time for CLOCK_REALTIME timers and a relative
747 * time for virtual timers.
748 * Must be called at splclock().
749 */
750void
751timer_settime(struct ptimer *pt)
752{
753	struct ptimer *ptn, *pptn;
754	struct ptlist *ptl;
755
756	if (pt->pt_type == CLOCK_REALTIME) {
757		callout_stop(&pt->pt_ch);
758		if (timerisset(&pt->pt_time.it_value)) {
759			/*
760			 * Don't need to check hzto() return value, here.
761			 * callout_reset() does it for us.
762			 */
763			callout_reset(&pt->pt_ch, hzto(&pt->pt_time.it_value),
764			    realtimerexpire, pt);
765		}
766	} else {
767		if (pt->pt_active) {
768			ptn = LIST_NEXT(pt, pt_list);
769			LIST_REMOVE(pt, pt_list);
770			for ( ; ptn; ptn = LIST_NEXT(ptn, pt_list))
771				timeradd(&pt->pt_time.it_value,
772				    &ptn->pt_time.it_value,
773				    &ptn->pt_time.it_value);
774		}
775		if (timerisset(&pt->pt_time.it_value)) {
776			if (pt->pt_type == CLOCK_VIRTUAL)
777				ptl = &pt->pt_proc->p_timers->pts_virtual;
778			else
779				ptl = &pt->pt_proc->p_timers->pts_prof;
780
781			for (ptn = LIST_FIRST(ptl), pptn = NULL;
782			     ptn && timercmp(&pt->pt_time.it_value,
783				 &ptn->pt_time.it_value, >);
784			     pptn = ptn, ptn = LIST_NEXT(ptn, pt_list))
785				timersub(&pt->pt_time.it_value,
786				    &ptn->pt_time.it_value,
787				    &pt->pt_time.it_value);
788
789			if (pptn)
790				LIST_INSERT_AFTER(pptn, pt, pt_list);
791			else
792				LIST_INSERT_HEAD(ptl, pt, pt_list);
793
794			for ( ; ptn ; ptn = LIST_NEXT(ptn, pt_list))
795				timersub(&ptn->pt_time.it_value,
796				    &pt->pt_time.it_value,
797				    &ptn->pt_time.it_value);
798
799			pt->pt_active = 1;
800		} else
801			pt->pt_active = 0;
802	}
803}
804
805void
806timer_gettime(struct ptimer *pt, struct itimerval *aitv)
807{
808#ifdef __HAVE_TIMECOUNTER
809	struct timeval now;
810#endif
811	struct ptimer *ptn;
812
813	*aitv = pt->pt_time;
814	if (pt->pt_type == CLOCK_REALTIME) {
815		/*
816		 * Convert from absolute to relative time in .it_value
817		 * part of real time timer.  If time for real time
818		 * timer has passed return 0, else return difference
819		 * between current time and time for the timer to go
820		 * off.
821		 */
822		if (timerisset(&aitv->it_value)) {
823#ifdef __HAVE_TIMECOUNTER
824			getmicrotime(&now);
825			if (timercmp(&aitv->it_value, &now, <))
826				timerclear(&aitv->it_value);
827			else
828				timersub(&aitv->it_value, &now,
829				    &aitv->it_value);
830#else /* !__HAVE_TIMECOUNTER */
831			if (timercmp(&aitv->it_value, &time, <))
832				timerclear(&aitv->it_value);
833			else
834				timersub(&aitv->it_value, &time,
835				    &aitv->it_value);
836#endif /* !__HAVE_TIMECOUNTER */
837		}
838	} else if (pt->pt_active) {
839		if (pt->pt_type == CLOCK_VIRTUAL)
840			ptn = LIST_FIRST(&pt->pt_proc->p_timers->pts_virtual);
841		else
842			ptn = LIST_FIRST(&pt->pt_proc->p_timers->pts_prof);
843		for ( ; ptn && ptn != pt; ptn = LIST_NEXT(ptn, pt_list))
844			timeradd(&aitv->it_value,
845			    &ptn->pt_time.it_value, &aitv->it_value);
846		KASSERT(ptn != NULL); /* pt should be findable on the list */
847	} else
848		timerclear(&aitv->it_value);
849}
850
851
852
853/* Set and arm a POSIX realtime timer */
854int
855sys_timer_settime(struct lwp *l, void *v, register_t *retval)
856{
857	struct sys_timer_settime_args /* {
858		syscallarg(timer_t) timerid;
859		syscallarg(int) flags;
860		syscallarg(const struct itimerspec *) value;
861		syscallarg(struct itimerspec *) ovalue;
862	} */ *uap = v;
863	int error;
864	struct itimerspec value, ovalue, *ovp = NULL;
865
866	if ((error = copyin(SCARG(uap, value), &value,
867	    sizeof(struct itimerspec))) != 0)
868		return (error);
869
870	if (SCARG(uap, ovalue))
871		ovp = &ovalue;
872
873	if ((error = dotimer_settime(SCARG(uap, timerid), &value, ovp,
874	    SCARG(uap, flags), l->l_proc)) != 0)
875		return error;
876
877	if (ovp)
878		return copyout(&ovalue, SCARG(uap, ovalue),
879		    sizeof(struct itimerspec));
880	return 0;
881}
882
883int
884dotimer_settime(int timerid, struct itimerspec *value,
885    struct itimerspec *ovalue, int flags, struct proc *p)
886{
887#ifdef __HAVE_TIMECOUNTER
888	struct timeval now;
889#endif
890	struct itimerval val, oval;
891	struct ptimer *pt;
892	int s;
893
894	if ((p->p_timers == NULL) ||
895	    (timerid < 2) || (timerid >= TIMER_MAX) ||
896	    ((pt = p->p_timers->pts_timers[timerid]) == NULL))
897		return (EINVAL);
898
899	TIMESPEC_TO_TIMEVAL(&val.it_value, &value->it_value);
900	TIMESPEC_TO_TIMEVAL(&val.it_interval, &value->it_interval);
901	if (itimerfix(&val.it_value) || itimerfix(&val.it_interval))
902		return (EINVAL);
903
904	oval = pt->pt_time;
905	pt->pt_time = val;
906
907	s = splclock();
908	/*
909	 * If we've been passed a relative time for a realtime timer,
910	 * convert it to absolute; if an absolute time for a virtual
911	 * timer, convert it to relative and make sure we don't set it
912	 * to zero, which would cancel the timer, or let it go
913	 * negative, which would confuse the comparison tests.
914	 */
915	if (timerisset(&pt->pt_time.it_value)) {
916		if (pt->pt_type == CLOCK_REALTIME) {
917#ifdef __HAVE_TIMECOUNTER
918			if ((flags & TIMER_ABSTIME) == 0) {
919				getmicrotime(&now);
920				timeradd(&pt->pt_time.it_value, &now,
921				    &pt->pt_time.it_value);
922			}
923#else /* !__HAVE_TIMECOUNTER */
924			if ((flags & TIMER_ABSTIME) == 0)
925				timeradd(&pt->pt_time.it_value, &time,
926				    &pt->pt_time.it_value);
927#endif /* !__HAVE_TIMECOUNTER */
928		} else {
929			if ((flags & TIMER_ABSTIME) != 0) {
930#ifdef __HAVE_TIMECOUNTER
931				getmicrotime(&now);
932				timersub(&pt->pt_time.it_value, &now,
933				    &pt->pt_time.it_value);
934#else /* !__HAVE_TIMECOUNTER */
935				timersub(&pt->pt_time.it_value, &time,
936				    &pt->pt_time.it_value);
937#endif /* !__HAVE_TIMECOUNTER */
938				if (!timerisset(&pt->pt_time.it_value) ||
939				    pt->pt_time.it_value.tv_sec < 0) {
940					pt->pt_time.it_value.tv_sec = 0;
941					pt->pt_time.it_value.tv_usec = 1;
942				}
943			}
944		}
945	}
946
947	timer_settime(pt);
948	splx(s);
949
950	if (ovalue) {
951		TIMEVAL_TO_TIMESPEC(&oval.it_value, &ovalue->it_value);
952		TIMEVAL_TO_TIMESPEC(&oval.it_interval, &ovalue->it_interval);
953	}
954
955	return (0);
956}
957
958/* Return the time remaining until a POSIX timer fires. */
959int
960sys_timer_gettime(struct lwp *l, void *v, register_t *retval)
961{
962	struct sys_timer_gettime_args /* {
963		syscallarg(timer_t) timerid;
964		syscallarg(struct itimerspec *) value;
965	} */ *uap = v;
966	struct itimerspec its;
967	int error;
968
969	if ((error = dotimer_gettime(SCARG(uap, timerid), l->l_proc,
970	    &its)) != 0)
971		return error;
972
973	return copyout(&its, SCARG(uap, value), sizeof(its));
974}
975
976int
977dotimer_gettime(int timerid, struct proc *p, struct itimerspec *its)
978{
979	int s;
980	struct ptimer *pt;
981	struct itimerval aitv;
982
983	if ((p->p_timers == NULL) ||
984	    (timerid < 2) || (timerid >= TIMER_MAX) ||
985	    ((pt = p->p_timers->pts_timers[timerid]) == NULL))
986		return (EINVAL);
987
988	s = splclock();
989	timer_gettime(pt, &aitv);
990	splx(s);
991
992	TIMEVAL_TO_TIMESPEC(&aitv.it_interval, &its->it_interval);
993	TIMEVAL_TO_TIMESPEC(&aitv.it_value, &its->it_value);
994
995	return 0;
996}
997
998/*
999 * Return the count of the number of times a periodic timer expired
1000 * while a notification was already pending. The counter is reset when
1001 * a timer expires and a notification can be posted.
1002 */
1003int
1004sys_timer_getoverrun(struct lwp *l, void *v, register_t *retval)
1005{
1006	struct sys_timer_getoverrun_args /* {
1007		syscallarg(timer_t) timerid;
1008	} */ *uap = v;
1009	struct proc *p = l->l_proc;
1010	int timerid;
1011	struct ptimer *pt;
1012
1013	timerid = SCARG(uap, timerid);
1014
1015	if ((p->p_timers == NULL) ||
1016	    (timerid < 2) || (timerid >= TIMER_MAX) ||
1017	    ((pt = p->p_timers->pts_timers[timerid]) == NULL))
1018		return (EINVAL);
1019
1020	*retval = pt->pt_poverruns;
1021
1022	return (0);
1023}
1024
1025/*
1026 * Real interval timer expired:
1027 * send process whose timer expired an alarm signal.
1028 * If time is not set up to reload, then just return.
1029 * Else compute next time timer should go off which is > current time.
1030 * This is where delay in processing this timeout causes multiple
1031 * SIGALRM calls to be compressed into one.
1032 */
1033void
1034realtimerexpire(void *arg)
1035{
1036#ifdef __HAVE_TIMECOUNTER
1037	struct timeval now;
1038#endif
1039	struct ptimer *pt;
1040	int s;
1041
1042	pt = (struct ptimer *)arg;
1043
1044	itimerfire(pt);
1045
1046	if (!timerisset(&pt->pt_time.it_interval)) {
1047		timerclear(&pt->pt_time.it_value);
1048		return;
1049	}
1050#ifdef __HAVE_TIMECOUNTER
1051	for (;;) {
1052		s = splclock();	/* XXX need spl now? */
1053		timeradd(&pt->pt_time.it_value,
1054		    &pt->pt_time.it_interval, &pt->pt_time.it_value);
1055		getmicrotime(&now);
1056		if (timercmp(&pt->pt_time.it_value, &now, >)) {
1057			/*
1058			 * Don't need to check hzto() return value, here.
1059			 * callout_reset() does it for us.
1060			 */
1061			callout_reset(&pt->pt_ch, hzto(&pt->pt_time.it_value),
1062			    realtimerexpire, pt);
1063			splx(s);
1064			return;
1065		}
1066		splx(s);
1067		pt->pt_overruns++;
1068	}
1069#else /* !__HAVE_TIMECOUNTER */
1070	for (;;) {
1071		s = splclock();
1072		timeradd(&pt->pt_time.it_value,
1073		    &pt->pt_time.it_interval, &pt->pt_time.it_value);
1074		if (timercmp(&pt->pt_time.it_value, &time, >)) {
1075			/*
1076			 * Don't need to check hzto() return value, here.
1077			 * callout_reset() does it for us.
1078			 */
1079			callout_reset(&pt->pt_ch, hzto(&pt->pt_time.it_value),
1080			    realtimerexpire, pt);
1081			splx(s);
1082			return;
1083		}
1084		splx(s);
1085		pt->pt_overruns++;
1086	}
1087#endif /* !__HAVE_TIMECOUNTER */
1088}
1089
1090/* BSD routine to get the value of an interval timer. */
1091/* ARGSUSED */
1092int
1093sys_getitimer(struct lwp *l, void *v, register_t *retval)
1094{
1095	struct sys_getitimer_args /* {
1096		syscallarg(int) which;
1097		syscallarg(struct itimerval *) itv;
1098	} */ *uap = v;
1099	struct proc *p = l->l_proc;
1100	struct itimerval aitv;
1101	int error;
1102
1103	error = dogetitimer(p, SCARG(uap, which), &aitv);
1104	if (error)
1105		return error;
1106	return (copyout(&aitv, SCARG(uap, itv), sizeof(struct itimerval)));
1107}
1108
1109int
1110dogetitimer(struct proc *p, int which, struct itimerval *itvp)
1111{
1112	int s;
1113
1114	if ((u_int)which > ITIMER_PROF)
1115		return (EINVAL);
1116
1117	if ((p->p_timers == NULL) || (p->p_timers->pts_timers[which] == NULL)){
1118		timerclear(&itvp->it_value);
1119		timerclear(&itvp->it_interval);
1120	} else {
1121		s = splclock();
1122		timer_gettime(p->p_timers->pts_timers[which], itvp);
1123		splx(s);
1124	}
1125
1126	return 0;
1127}
1128
1129/* BSD routine to set/arm an interval timer. */
1130/* ARGSUSED */
1131int
1132sys_setitimer(struct lwp *l, void *v, register_t *retval)
1133{
1134	struct sys_setitimer_args /* {
1135		syscallarg(int) which;
1136		syscallarg(const struct itimerval *) itv;
1137		syscallarg(struct itimerval *) oitv;
1138	} */ *uap = v;
1139	struct proc *p = l->l_proc;
1140	int which = SCARG(uap, which);
1141	struct sys_getitimer_args getargs;
1142	const struct itimerval *itvp;
1143	struct itimerval aitv;
1144	int error;
1145
1146	if ((u_int)which > ITIMER_PROF)
1147		return (EINVAL);
1148	itvp = SCARG(uap, itv);
1149	if (itvp &&
1150	    (error = copyin(itvp, &aitv, sizeof(struct itimerval)) != 0))
1151		return (error);
1152	if (SCARG(uap, oitv) != NULL) {
1153		SCARG(&getargs, which) = which;
1154		SCARG(&getargs, itv) = SCARG(uap, oitv);
1155		if ((error = sys_getitimer(l, &getargs, retval)) != 0)
1156			return (error);
1157	}
1158	if (itvp == 0)
1159		return (0);
1160
1161	return dosetitimer(p, which, &aitv);
1162}
1163
1164int
1165dosetitimer(struct proc *p, int which, struct itimerval *itvp)
1166{
1167#ifdef __HAVE_TIMECOUNTER
1168	struct timeval now;
1169#endif
1170	struct ptimer *pt;
1171	int s;
1172
1173	if (itimerfix(&itvp->it_value) || itimerfix(&itvp->it_interval))
1174		return (EINVAL);
1175
1176	/*
1177	 * Don't bother allocating data structures if the process just
1178	 * wants to clear the timer.
1179	 */
1180	if (!timerisset(&itvp->it_value) &&
1181	    ((p->p_timers == NULL) ||(p->p_timers->pts_timers[which] == NULL)))
1182		return (0);
1183
1184	if (p->p_timers == NULL)
1185		timers_alloc(p);
1186	if (p->p_timers->pts_timers[which] == NULL) {
1187		pt = pool_get(&ptimer_pool, PR_WAITOK);
1188		pt->pt_ev.sigev_notify = SIGEV_SIGNAL;
1189		pt->pt_ev.sigev_value.sival_int = which;
1190		pt->pt_overruns = 0;
1191		pt->pt_proc = p;
1192		pt->pt_type = which;
1193		pt->pt_entry = which;
1194		switch (which) {
1195		case ITIMER_REAL:
1196			callout_init(&pt->pt_ch, 0);
1197			pt->pt_ev.sigev_signo = SIGALRM;
1198			break;
1199		case ITIMER_VIRTUAL:
1200			pt->pt_active = 0;
1201			pt->pt_ev.sigev_signo = SIGVTALRM;
1202			break;
1203		case ITIMER_PROF:
1204			pt->pt_active = 0;
1205			pt->pt_ev.sigev_signo = SIGPROF;
1206			break;
1207		}
1208	} else
1209		pt = p->p_timers->pts_timers[which];
1210
1211	pt->pt_time = *itvp;
1212	p->p_timers->pts_timers[which] = pt;
1213
1214	s = splclock();
1215	if ((which == ITIMER_REAL) && timerisset(&pt->pt_time.it_value)) {
1216		/* Convert to absolute time */
1217#ifdef __HAVE_TIMECOUNTER
1218		/* XXX need to wrap in splclock for timecounters case? */
1219		getmicrotime(&now);
1220		timeradd(&pt->pt_time.it_value, &now, &pt->pt_time.it_value);
1221#else /* !__HAVE_TIMECOUNTER */
1222		timeradd(&pt->pt_time.it_value, &time, &pt->pt_time.it_value);
1223#endif /* !__HAVE_TIMECOUNTER */
1224	}
1225	timer_settime(pt);
1226	splx(s);
1227
1228	return (0);
1229}
1230
1231/* Utility routines to manage the array of pointers to timers. */
1232void
1233timers_alloc(struct proc *p)
1234{
1235	int i;
1236	struct ptimers *pts;
1237
1238	pts = pool_get(&ptimers_pool, PR_WAITOK);
1239	LIST_INIT(&pts->pts_virtual);
1240	LIST_INIT(&pts->pts_prof);
1241	for (i = 0; i < TIMER_MAX; i++)
1242		pts->pts_timers[i] = NULL;
1243	pts->pts_fired = 0;
1244	p->p_timers = pts;
1245}
1246
1247/*
1248 * Clean up the per-process timers. If "which" is set to TIMERS_ALL,
1249 * then clean up all timers and free all the data structures. If
1250 * "which" is set to TIMERS_POSIX, only clean up the timers allocated
1251 * by timer_create(), not the BSD setitimer() timers, and only free the
1252 * structure if none of those remain.
1253 */
1254void
1255timers_free(struct proc *p, int which)
1256{
1257	int i, s;
1258	struct ptimers *pts;
1259	struct ptimer *pt, *ptn;
1260	struct timeval tv;
1261
1262	if (p->p_timers) {
1263		pts = p->p_timers;
1264		if (which == TIMERS_ALL)
1265			i = 0;
1266		else {
1267			s = splclock();
1268			timerclear(&tv);
1269			for (ptn = LIST_FIRST(&p->p_timers->pts_virtual);
1270			     ptn && ptn != pts->pts_timers[ITIMER_VIRTUAL];
1271			     ptn = LIST_NEXT(ptn, pt_list))
1272				timeradd(&tv, &ptn->pt_time.it_value, &tv);
1273			LIST_FIRST(&p->p_timers->pts_virtual) = NULL;
1274			if (ptn) {
1275				timeradd(&tv, &ptn->pt_time.it_value,
1276				    &ptn->pt_time.it_value);
1277				LIST_INSERT_HEAD(&p->p_timers->pts_virtual,
1278				    ptn, pt_list);
1279			}
1280
1281			timerclear(&tv);
1282			for (ptn = LIST_FIRST(&p->p_timers->pts_prof);
1283			     ptn && ptn != pts->pts_timers[ITIMER_PROF];
1284			     ptn = LIST_NEXT(ptn, pt_list))
1285				timeradd(&tv, &ptn->pt_time.it_value, &tv);
1286			LIST_FIRST(&p->p_timers->pts_prof) = NULL;
1287			if (ptn) {
1288				timeradd(&tv, &ptn->pt_time.it_value,
1289				    &ptn->pt_time.it_value);
1290				LIST_INSERT_HEAD(&p->p_timers->pts_prof, ptn,
1291				    pt_list);
1292			}
1293			splx(s);
1294			i = 3;
1295		}
1296		for ( ; i < TIMER_MAX; i++)
1297			if ((pt = pts->pts_timers[i]) != NULL) {
1298				if (pt->pt_type == CLOCK_REALTIME) {
1299					callout_stop(&pt->pt_ch);
1300					callout_destroy(&pt->pt_ch);
1301				}
1302				pts->pts_timers[i] = NULL;
1303				pool_put(&ptimer_pool, pt);
1304			}
1305		if ((pts->pts_timers[0] == NULL) &&
1306		    (pts->pts_timers[1] == NULL) &&
1307		    (pts->pts_timers[2] == NULL)) {
1308			p->p_timers = NULL;
1309			pool_put(&ptimers_pool, pts);
1310		}
1311	}
1312}
1313
1314/*
1315 * Decrement an interval timer by a specified number
1316 * of microseconds, which must be less than a second,
1317 * i.e. < 1000000.  If the timer expires, then reload
1318 * it.  In this case, carry over (usec - old value) to
1319 * reduce the value reloaded into the timer so that
1320 * the timer does not drift.  This routine assumes
1321 * that it is called in a context where the timers
1322 * on which it is operating cannot change in value.
1323 */
1324int
1325itimerdecr(struct ptimer *pt, int usec)
1326{
1327	struct itimerval *itp;
1328
1329	itp = &pt->pt_time;
1330	if (itp->it_value.tv_usec < usec) {
1331		if (itp->it_value.tv_sec == 0) {
1332			/* expired, and already in next interval */
1333			usec -= itp->it_value.tv_usec;
1334			goto expire;
1335		}
1336		itp->it_value.tv_usec += 1000000;
1337		itp->it_value.tv_sec--;
1338	}
1339	itp->it_value.tv_usec -= usec;
1340	usec = 0;
1341	if (timerisset(&itp->it_value))
1342		return (1);
1343	/* expired, exactly at end of interval */
1344expire:
1345	if (timerisset(&itp->it_interval)) {
1346		itp->it_value = itp->it_interval;
1347		itp->it_value.tv_usec -= usec;
1348		if (itp->it_value.tv_usec < 0) {
1349			itp->it_value.tv_usec += 1000000;
1350			itp->it_value.tv_sec--;
1351		}
1352		timer_settime(pt);
1353	} else
1354		itp->it_value.tv_usec = 0;		/* sec is already 0 */
1355	return (0);
1356}
1357
1358void
1359itimerfire(struct ptimer *pt)
1360{
1361	struct proc *p = pt->pt_proc;
1362
1363	if (pt->pt_ev.sigev_notify == SIGEV_SIGNAL) {
1364		/*
1365		 * No RT signal infrastructure exists at this time;
1366		 * just post the signal number and throw away the
1367		 * value.
1368		 */
1369		if (sigismember(&p->p_sigpend.sp_set, pt->pt_ev.sigev_signo))
1370			pt->pt_overruns++;
1371		else {
1372			ksiginfo_t ksi;
1373			KSI_INIT(&ksi);
1374			ksi.ksi_signo = pt->pt_ev.sigev_signo;
1375			ksi.ksi_code = SI_TIMER;
1376			ksi.ksi_value = pt->pt_ev.sigev_value;
1377			pt->pt_poverruns = pt->pt_overruns;
1378			pt->pt_overruns = 0;
1379			mutex_enter(&proclist_mutex);
1380			kpsignal(p, &ksi, NULL);
1381			mutex_exit(&proclist_mutex);
1382		}
1383	}
1384}
1385
1386/*
1387 * ratecheck(): simple time-based rate-limit checking.  see ratecheck(9)
1388 * for usage and rationale.
1389 */
1390int
1391ratecheck(struct timeval *lasttime, const struct timeval *mininterval)
1392{
1393	struct timeval tv, delta;
1394	int rv = 0;
1395#ifndef __HAVE_TIMECOUNTER
1396	int s;
1397#endif
1398
1399#ifdef __HAVE_TIMECOUNTER
1400	getmicrouptime(&tv);
1401#else /* !__HAVE_TIMECOUNTER */
1402	s = splclock();
1403	tv = mono_time;
1404	splx(s);
1405#endif /* !__HAVE_TIMECOUNTER */
1406	timersub(&tv, lasttime, &delta);
1407
1408	/*
1409	 * check for 0,0 is so that the message will be seen at least once,
1410	 * even if interval is huge.
1411	 */
1412	if (timercmp(&delta, mininterval, >=) ||
1413	    (lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) {
1414		*lasttime = tv;
1415		rv = 1;
1416	}
1417
1418	return (rv);
1419}
1420
1421/*
1422 * ppsratecheck(): packets (or events) per second limitation.
1423 */
1424int
1425ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps)
1426{
1427	struct timeval tv, delta;
1428	int rv;
1429#ifndef __HAVE_TIMECOUNTER
1430	int s;
1431#endif
1432
1433#ifdef __HAVE_TIMECOUNTER
1434	getmicrouptime(&tv);
1435#else /* !__HAVE_TIMECOUNTER */
1436	s = splclock();
1437	tv = mono_time;
1438	splx(s);
1439#endif /* !__HAVE_TIMECOUNTER */
1440	timersub(&tv, lasttime, &delta);
1441
1442	/*
1443	 * check for 0,0 is so that the message will be seen at least once.
1444	 * if more than one second have passed since the last update of
1445	 * lasttime, reset the counter.
1446	 *
1447	 * we do increment *curpps even in *curpps < maxpps case, as some may
1448	 * try to use *curpps for stat purposes as well.
1449	 */
1450	if ((lasttime->tv_sec == 0 && lasttime->tv_usec == 0) ||
1451	    delta.tv_sec >= 1) {
1452		*lasttime = tv;
1453		*curpps = 0;
1454	}
1455	if (maxpps < 0)
1456		rv = 1;
1457	else if (*curpps < maxpps)
1458		rv = 1;
1459	else
1460		rv = 0;
1461
1462#if 1 /*DIAGNOSTIC?*/
1463	/* be careful about wrap-around */
1464	if (*curpps + 1 > *curpps)
1465		*curpps = *curpps + 1;
1466#else
1467	/*
1468	 * assume that there's not too many calls to this function.
1469	 * not sure if the assumption holds, as it depends on *caller's*
1470	 * behavior, not the behavior of this function.
1471	 * IMHO it is wrong to make assumption on the caller's behavior,
1472	 * so the above #if is #if 1, not #ifdef DIAGNOSTIC.
1473	 */
1474	*curpps = *curpps + 1;
1475#endif
1476
1477	return (rv);
1478}
1479