kern_time.c revision 1.171
1/*	$NetBSD: kern_time.c,v 1.171 2011/12/18 22:30:25 christos Exp $	*/
2
3/*-
4 * Copyright (c) 2000, 2004, 2005, 2007, 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Christopher G. Demetriou, and by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*
33 * Copyright (c) 1982, 1986, 1989, 1993
34 *	The Regents of the University of California.  All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 *    notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 *    notice, this list of conditions and the following disclaimer in the
43 *    documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 *    may be used to endorse or promote products derived from this software
46 *    without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 *	@(#)kern_time.c	8.4 (Berkeley) 5/26/95
61 */
62
63#include <sys/cdefs.h>
64__KERNEL_RCSID(0, "$NetBSD: kern_time.c,v 1.171 2011/12/18 22:30:25 christos Exp $");
65
66#include <sys/param.h>
67#include <sys/resourcevar.h>
68#include <sys/kernel.h>
69#include <sys/systm.h>
70#include <sys/proc.h>
71#include <sys/vnode.h>
72#include <sys/signalvar.h>
73#include <sys/syslog.h>
74#include <sys/timetc.h>
75#include <sys/timex.h>
76#include <sys/kauth.h>
77#include <sys/mount.h>
78#include <sys/sa.h>
79#include <sys/savar.h>
80#include <sys/syscallargs.h>
81#include <sys/cpu.h>
82
83#include "opt_sa.h"
84
85static void	timer_intr(void *);
86static void	itimerfire(struct ptimer *);
87static void	itimerfree(struct ptimers *, int);
88
89kmutex_t	timer_lock;
90
91static void	*timer_sih;
92static TAILQ_HEAD(, ptimer) timer_queue;
93
94struct pool ptimer_pool, ptimers_pool;
95
96#define	CLOCK_VIRTUAL_P(clockid)	\
97	((clockid) == CLOCK_VIRTUAL || (clockid) == CLOCK_PROF)
98
99CTASSERT(ITIMER_REAL == CLOCK_REALTIME);
100CTASSERT(ITIMER_VIRTUAL == CLOCK_VIRTUAL);
101CTASSERT(ITIMER_PROF == CLOCK_PROF);
102CTASSERT(ITIMER_MONOTONIC == CLOCK_MONOTONIC);
103
104/*
105 * Initialize timekeeping.
106 */
107void
108time_init(void)
109{
110
111	pool_init(&ptimer_pool, sizeof(struct ptimer), 0, 0, 0, "ptimerpl",
112	    &pool_allocator_nointr, IPL_NONE);
113	pool_init(&ptimers_pool, sizeof(struct ptimers), 0, 0, 0, "ptimerspl",
114	    &pool_allocator_nointr, IPL_NONE);
115}
116
117void
118time_init2(void)
119{
120
121	TAILQ_INIT(&timer_queue);
122	mutex_init(&timer_lock, MUTEX_DEFAULT, IPL_SCHED);
123	timer_sih = softint_establish(SOFTINT_CLOCK | SOFTINT_MPSAFE,
124	    timer_intr, NULL);
125}
126
127/* Time of day and interval timer support.
128 *
129 * These routines provide the kernel entry points to get and set
130 * the time-of-day and per-process interval timers.  Subroutines
131 * here provide support for adding and subtracting timeval structures
132 * and decrementing interval timers, optionally reloading the interval
133 * timers when they expire.
134 */
135
136/* This function is used by clock_settime and settimeofday */
137static int
138settime1(struct proc *p, const struct timespec *ts, bool check_kauth)
139{
140	struct timespec delta, now;
141	int s;
142
143	/* WHAT DO WE DO ABOUT PENDING REAL-TIME TIMEOUTS??? */
144	s = splclock();
145	nanotime(&now);
146	timespecsub(ts, &now, &delta);
147
148	if (check_kauth && kauth_authorize_system(kauth_cred_get(),
149	    KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_SYSTEM, __UNCONST(ts),
150	    &delta, KAUTH_ARG(check_kauth ? false : true)) != 0) {
151		splx(s);
152		return (EPERM);
153	}
154
155#ifdef notyet
156	if ((delta.tv_sec < 86400) && securelevel > 0) { /* XXX elad - notyet */
157		splx(s);
158		return (EPERM);
159	}
160#endif
161
162	tc_setclock(ts);
163
164	timespecadd(&boottime, &delta, &boottime);
165
166	resettodr();
167	splx(s);
168
169	return (0);
170}
171
172int
173settime(struct proc *p, struct timespec *ts)
174{
175	return (settime1(p, ts, true));
176}
177
178/* ARGSUSED */
179int
180sys___clock_gettime50(struct lwp *l,
181    const struct sys___clock_gettime50_args *uap, register_t *retval)
182{
183	/* {
184		syscallarg(clockid_t) clock_id;
185		syscallarg(struct timespec *) tp;
186	} */
187	int error;
188	struct timespec ats;
189
190	error = clock_gettime1(SCARG(uap, clock_id), &ats);
191	if (error != 0)
192		return error;
193
194	return copyout(&ats, SCARG(uap, tp), sizeof(ats));
195}
196
197int
198clock_gettime1(clockid_t clock_id, struct timespec *ts)
199{
200
201	switch (clock_id) {
202	case CLOCK_REALTIME:
203		nanotime(ts);
204		break;
205	case CLOCK_MONOTONIC:
206		nanouptime(ts);
207		break;
208	default:
209		return EINVAL;
210	}
211
212	return 0;
213}
214
215/* ARGSUSED */
216int
217sys___clock_settime50(struct lwp *l,
218    const struct sys___clock_settime50_args *uap, register_t *retval)
219{
220	/* {
221		syscallarg(clockid_t) clock_id;
222		syscallarg(const struct timespec *) tp;
223	} */
224	int error;
225	struct timespec ats;
226
227	if ((error = copyin(SCARG(uap, tp), &ats, sizeof(ats))) != 0)
228		return error;
229
230	return clock_settime1(l->l_proc, SCARG(uap, clock_id), &ats, true);
231}
232
233
234int
235clock_settime1(struct proc *p, clockid_t clock_id, const struct timespec *tp,
236    bool check_kauth)
237{
238	int error;
239
240	switch (clock_id) {
241	case CLOCK_REALTIME:
242		if ((error = settime1(p, tp, check_kauth)) != 0)
243			return (error);
244		break;
245	case CLOCK_MONOTONIC:
246		return (EINVAL);	/* read-only clock */
247	default:
248		return (EINVAL);
249	}
250
251	return 0;
252}
253
254int
255sys___clock_getres50(struct lwp *l, const struct sys___clock_getres50_args *uap,
256    register_t *retval)
257{
258	/* {
259		syscallarg(clockid_t) clock_id;
260		syscallarg(struct timespec *) tp;
261	} */
262	struct timespec ts;
263	int error = 0;
264
265	if ((error = clock_getres1(SCARG(uap, clock_id), &ts)) != 0)
266		return error;
267
268	if (SCARG(uap, tp))
269		error = copyout(&ts, SCARG(uap, tp), sizeof(ts));
270
271	return error;
272}
273
274int
275clock_getres1(clockid_t clock_id, struct timespec *ts)
276{
277
278	switch (clock_id) {
279	case CLOCK_REALTIME:
280	case CLOCK_MONOTONIC:
281		ts->tv_sec = 0;
282		if (tc_getfrequency() > 1000000000)
283			ts->tv_nsec = 1;
284		else
285			ts->tv_nsec = 1000000000 / tc_getfrequency();
286		break;
287	default:
288		return EINVAL;
289	}
290
291	return 0;
292}
293
294/* ARGSUSED */
295int
296sys___nanosleep50(struct lwp *l, const struct sys___nanosleep50_args *uap,
297    register_t *retval)
298{
299	/* {
300		syscallarg(struct timespec *) rqtp;
301		syscallarg(struct timespec *) rmtp;
302	} */
303	struct timespec rmt, rqt;
304	int error, error1;
305
306	error = copyin(SCARG(uap, rqtp), &rqt, sizeof(struct timespec));
307	if (error)
308		return (error);
309
310	error = nanosleep1(l, &rqt, SCARG(uap, rmtp) ? &rmt : NULL);
311	if (SCARG(uap, rmtp) == NULL || (error != 0 && error != EINTR))
312		return error;
313
314	error1 = copyout(&rmt, SCARG(uap, rmtp), sizeof(rmt));
315	return error1 ? error1 : error;
316}
317
318int
319nanosleep1(struct lwp *l, struct timespec *rqt, struct timespec *rmt)
320{
321	struct timespec rmtstart;
322	int error, timo;
323
324	if ((error = itimespecfix(rqt)) != 0)
325		return error;
326
327	timo = tstohz(rqt);
328	/*
329	 * Avoid inadvertantly sleeping forever
330	 */
331	if (timo == 0)
332		timo = 1;
333	getnanouptime(&rmtstart);
334again:
335	error = kpause("nanoslp", true, timo, NULL);
336	if (rmt != NULL || error == 0) {
337		struct timespec rmtend;
338		struct timespec t0;
339		struct timespec *t;
340
341		getnanouptime(&rmtend);
342		t = (rmt != NULL) ? rmt : &t0;
343		timespecsub(&rmtend, &rmtstart, t);
344		timespecsub(rqt, t, t);
345		if (t->tv_sec < 0)
346			timespecclear(t);
347		if (error == 0) {
348			timo = tstohz(t);
349			if (timo > 0)
350				goto again;
351		}
352	}
353
354	if (error == ERESTART)
355		error = EINTR;
356	if (error == EWOULDBLOCK)
357		error = 0;
358
359	return error;
360}
361
362/* ARGSUSED */
363int
364sys___gettimeofday50(struct lwp *l, const struct sys___gettimeofday50_args *uap,
365    register_t *retval)
366{
367	/* {
368		syscallarg(struct timeval *) tp;
369		syscallarg(void *) tzp;		really "struct timezone *";
370	} */
371	struct timeval atv;
372	int error = 0;
373	struct timezone tzfake;
374
375	if (SCARG(uap, tp)) {
376		microtime(&atv);
377		error = copyout(&atv, SCARG(uap, tp), sizeof(atv));
378		if (error)
379			return (error);
380	}
381	if (SCARG(uap, tzp)) {
382		/*
383		 * NetBSD has no kernel notion of time zone, so we just
384		 * fake up a timezone struct and return it if demanded.
385		 */
386		tzfake.tz_minuteswest = 0;
387		tzfake.tz_dsttime = 0;
388		error = copyout(&tzfake, SCARG(uap, tzp), sizeof(tzfake));
389	}
390	return (error);
391}
392
393/* ARGSUSED */
394int
395sys___settimeofday50(struct lwp *l, const struct sys___settimeofday50_args *uap,
396    register_t *retval)
397{
398	/* {
399		syscallarg(const struct timeval *) tv;
400		syscallarg(const void *) tzp; really "const struct timezone *";
401	} */
402
403	return settimeofday1(SCARG(uap, tv), true, SCARG(uap, tzp), l, true);
404}
405
406int
407settimeofday1(const struct timeval *utv, bool userspace,
408    const void *utzp, struct lwp *l, bool check_kauth)
409{
410	struct timeval atv;
411	struct timespec ts;
412	int error;
413
414	/* Verify all parameters before changing time. */
415
416	/*
417	 * NetBSD has no kernel notion of time zone, and only an
418	 * obsolete program would try to set it, so we log a warning.
419	 */
420	if (utzp)
421		log(LOG_WARNING, "pid %d attempted to set the "
422		    "(obsolete) kernel time zone\n", l->l_proc->p_pid);
423
424	if (utv == NULL)
425		return 0;
426
427	if (userspace) {
428		if ((error = copyin(utv, &atv, sizeof(atv))) != 0)
429			return error;
430		utv = &atv;
431	}
432
433	TIMEVAL_TO_TIMESPEC(utv, &ts);
434	return settime1(l->l_proc, &ts, check_kauth);
435}
436
437int	time_adjusted;			/* set if an adjustment is made */
438
439/* ARGSUSED */
440int
441sys___adjtime50(struct lwp *l, const struct sys___adjtime50_args *uap,
442    register_t *retval)
443{
444	/* {
445		syscallarg(const struct timeval *) delta;
446		syscallarg(struct timeval *) olddelta;
447	} */
448	int error = 0;
449	struct timeval atv, oldatv;
450
451	if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_TIME,
452	    KAUTH_REQ_SYSTEM_TIME_ADJTIME, NULL, NULL, NULL)) != 0)
453		return error;
454
455	if (SCARG(uap, delta)) {
456		error = copyin(SCARG(uap, delta), &atv,
457		    sizeof(*SCARG(uap, delta)));
458		if (error)
459			return (error);
460	}
461	adjtime1(SCARG(uap, delta) ? &atv : NULL,
462	    SCARG(uap, olddelta) ? &oldatv : NULL, l->l_proc);
463	if (SCARG(uap, olddelta))
464		error = copyout(&oldatv, SCARG(uap, olddelta),
465		    sizeof(*SCARG(uap, olddelta)));
466	return error;
467}
468
469void
470adjtime1(const struct timeval *delta, struct timeval *olddelta, struct proc *p)
471{
472	extern int64_t time_adjtime;  /* in kern_ntptime.c */
473
474	if (olddelta) {
475		mutex_spin_enter(&timecounter_lock);
476		olddelta->tv_sec = time_adjtime / 1000000;
477		olddelta->tv_usec = time_adjtime % 1000000;
478		if (olddelta->tv_usec < 0) {
479			olddelta->tv_usec += 1000000;
480			olddelta->tv_sec--;
481		}
482		mutex_spin_exit(&timecounter_lock);
483	}
484
485	if (delta) {
486		mutex_spin_enter(&timecounter_lock);
487		time_adjtime = delta->tv_sec * 1000000 + delta->tv_usec;
488
489		if (time_adjtime) {
490			/* We need to save the system time during shutdown */
491			time_adjusted |= 1;
492		}
493		mutex_spin_exit(&timecounter_lock);
494	}
495}
496
497/*
498 * Interval timer support. Both the BSD getitimer() family and the POSIX
499 * timer_*() family of routines are supported.
500 *
501 * All timers are kept in an array pointed to by p_timers, which is
502 * allocated on demand - many processes don't use timers at all. The
503 * first three elements in this array are reserved for the BSD timers:
504 * element 0 is ITIMER_REAL, element 1 is ITIMER_VIRTUAL, element
505 * 2 is ITIMER_PROF, and element 3 is ITIMER_MONOTONIC. The rest may be
506 * allocated by the timer_create() syscall.
507 *
508 * Realtime timers are kept in the ptimer structure as an absolute
509 * time; virtual time timers are kept as a linked list of deltas.
510 * Virtual time timers are processed in the hardclock() routine of
511 * kern_clock.c.  The real time timer is processed by a callout
512 * routine, called from the softclock() routine.  Since a callout may
513 * be delayed in real time due to interrupt processing in the system,
514 * it is possible for the real time timeout routine (realtimeexpire,
515 * given below), to be delayed in real time past when it is supposed
516 * to occur.  It does not suffice, therefore, to reload the real timer
517 * .it_value from the real time timers .it_interval.  Rather, we
518 * compute the next time in absolute time the timer should go off.  */
519
520/* Allocate a POSIX realtime timer. */
521int
522sys_timer_create(struct lwp *l, const struct sys_timer_create_args *uap,
523    register_t *retval)
524{
525	/* {
526		syscallarg(clockid_t) clock_id;
527		syscallarg(struct sigevent *) evp;
528		syscallarg(timer_t *) timerid;
529	} */
530
531	return timer_create1(SCARG(uap, timerid), SCARG(uap, clock_id),
532	    SCARG(uap, evp), copyin, l);
533}
534
535int
536timer_create1(timer_t *tid, clockid_t id, struct sigevent *evp,
537    copyin_t fetch_event, struct lwp *l)
538{
539	int error;
540	timer_t timerid;
541	struct ptimers *pts;
542	struct ptimer *pt;
543	struct proc *p;
544
545	p = l->l_proc;
546
547	if ((u_int)id > CLOCK_MONOTONIC)
548		return (EINVAL);
549
550	if ((pts = p->p_timers) == NULL)
551		pts = timers_alloc(p);
552
553	pt = pool_get(&ptimer_pool, PR_WAITOK);
554	if (evp != NULL) {
555		if (((error =
556		    (*fetch_event)(evp, &pt->pt_ev, sizeof(pt->pt_ev))) != 0) ||
557		    ((pt->pt_ev.sigev_notify < SIGEV_NONE) ||
558			(pt->pt_ev.sigev_notify > SIGEV_SA)) ||
559			(pt->pt_ev.sigev_notify == SIGEV_SIGNAL &&
560			 (pt->pt_ev.sigev_signo <= 0 ||
561			  pt->pt_ev.sigev_signo >= NSIG))) {
562			pool_put(&ptimer_pool, pt);
563			return (error ? error : EINVAL);
564		}
565	}
566
567	/* Find a free timer slot, skipping those reserved for setitimer(). */
568	mutex_spin_enter(&timer_lock);
569	for (timerid = 3; timerid < TIMER_MAX; timerid++)
570		if (pts->pts_timers[timerid] == NULL)
571			break;
572	if (timerid == TIMER_MAX) {
573		mutex_spin_exit(&timer_lock);
574		pool_put(&ptimer_pool, pt);
575		return EAGAIN;
576	}
577	if (evp == NULL) {
578		pt->pt_ev.sigev_notify = SIGEV_SIGNAL;
579		switch (id) {
580		case CLOCK_REALTIME:
581		case CLOCK_MONOTONIC:
582			pt->pt_ev.sigev_signo = SIGALRM;
583			break;
584		case CLOCK_VIRTUAL:
585			pt->pt_ev.sigev_signo = SIGVTALRM;
586			break;
587		case CLOCK_PROF:
588			pt->pt_ev.sigev_signo = SIGPROF;
589			break;
590		}
591		pt->pt_ev.sigev_value.sival_int = timerid;
592	}
593	pt->pt_info.ksi_signo = pt->pt_ev.sigev_signo;
594	pt->pt_info.ksi_errno = 0;
595	pt->pt_info.ksi_code = 0;
596	pt->pt_info.ksi_pid = p->p_pid;
597	pt->pt_info.ksi_uid = kauth_cred_getuid(l->l_cred);
598	pt->pt_info.ksi_value = pt->pt_ev.sigev_value;
599	pt->pt_type = id;
600	pt->pt_proc = p;
601	pt->pt_overruns = 0;
602	pt->pt_poverruns = 0;
603	pt->pt_entry = timerid;
604	pt->pt_queued = false;
605	timespecclear(&pt->pt_time.it_value);
606	if (!CLOCK_VIRTUAL_P(id))
607		callout_init(&pt->pt_ch, CALLOUT_MPSAFE);
608	else
609		pt->pt_active = 0;
610
611	pts->pts_timers[timerid] = pt;
612	mutex_spin_exit(&timer_lock);
613
614	return copyout(&timerid, tid, sizeof(timerid));
615}
616
617/* Delete a POSIX realtime timer */
618int
619sys_timer_delete(struct lwp *l, const struct sys_timer_delete_args *uap,
620    register_t *retval)
621{
622	/* {
623		syscallarg(timer_t) timerid;
624	} */
625	struct proc *p = l->l_proc;
626	timer_t timerid;
627	struct ptimers *pts;
628	struct ptimer *pt, *ptn;
629
630	timerid = SCARG(uap, timerid);
631	pts = p->p_timers;
632
633	if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX)
634		return (EINVAL);
635
636	mutex_spin_enter(&timer_lock);
637	if ((pt = pts->pts_timers[timerid]) == NULL) {
638		mutex_spin_exit(&timer_lock);
639		return (EINVAL);
640	}
641	if (CLOCK_VIRTUAL_P(pt->pt_type)) {
642		if (pt->pt_active) {
643			ptn = LIST_NEXT(pt, pt_list);
644			LIST_REMOVE(pt, pt_list);
645			for ( ; ptn; ptn = LIST_NEXT(ptn, pt_list))
646				timespecadd(&pt->pt_time.it_value,
647				    &ptn->pt_time.it_value,
648				    &ptn->pt_time.it_value);
649			pt->pt_active = 0;
650		}
651	}
652	itimerfree(pts, timerid);
653
654	return (0);
655}
656
657/*
658 * Set up the given timer. The value in pt->pt_time.it_value is taken
659 * to be an absolute time for CLOCK_REALTIME/CLOCK_MONOTONIC timers and
660 * a relative time for CLOCK_VIRTUAL/CLOCK_PROF timers.
661 */
662void
663timer_settime(struct ptimer *pt)
664{
665	struct ptimer *ptn, *pptn;
666	struct ptlist *ptl;
667
668	KASSERT(mutex_owned(&timer_lock));
669
670	if (!CLOCK_VIRTUAL_P(pt->pt_type)) {
671		callout_halt(&pt->pt_ch, &timer_lock);
672		if (timespecisset(&pt->pt_time.it_value)) {
673			/*
674			 * Don't need to check tshzto() return value, here.
675			 * callout_reset() does it for us.
676			 */
677			callout_reset(&pt->pt_ch,
678			    pt->pt_type == CLOCK_MONOTONIC ?
679			    tshztoup(&pt->pt_time.it_value) :
680			    tshzto(&pt->pt_time.it_value),
681			    realtimerexpire, pt);
682		}
683	} else {
684		if (pt->pt_active) {
685			ptn = LIST_NEXT(pt, pt_list);
686			LIST_REMOVE(pt, pt_list);
687			for ( ; ptn; ptn = LIST_NEXT(ptn, pt_list))
688				timespecadd(&pt->pt_time.it_value,
689				    &ptn->pt_time.it_value,
690				    &ptn->pt_time.it_value);
691		}
692		if (timespecisset(&pt->pt_time.it_value)) {
693			if (pt->pt_type == CLOCK_VIRTUAL)
694				ptl = &pt->pt_proc->p_timers->pts_virtual;
695			else
696				ptl = &pt->pt_proc->p_timers->pts_prof;
697
698			for (ptn = LIST_FIRST(ptl), pptn = NULL;
699			     ptn && timespeccmp(&pt->pt_time.it_value,
700				 &ptn->pt_time.it_value, >);
701			     pptn = ptn, ptn = LIST_NEXT(ptn, pt_list))
702				timespecsub(&pt->pt_time.it_value,
703				    &ptn->pt_time.it_value,
704				    &pt->pt_time.it_value);
705
706			if (pptn)
707				LIST_INSERT_AFTER(pptn, pt, pt_list);
708			else
709				LIST_INSERT_HEAD(ptl, pt, pt_list);
710
711			for ( ; ptn ; ptn = LIST_NEXT(ptn, pt_list))
712				timespecsub(&ptn->pt_time.it_value,
713				    &pt->pt_time.it_value,
714				    &ptn->pt_time.it_value);
715
716			pt->pt_active = 1;
717		} else
718			pt->pt_active = 0;
719	}
720}
721
722void
723timer_gettime(struct ptimer *pt, struct itimerspec *aits)
724{
725	struct timespec now;
726	struct ptimer *ptn;
727
728	KASSERT(mutex_owned(&timer_lock));
729
730	*aits = pt->pt_time;
731	if (!CLOCK_VIRTUAL_P(pt->pt_type)) {
732		/*
733		 * Convert from absolute to relative time in .it_value
734		 * part of real time timer.  If time for real time
735		 * timer has passed return 0, else return difference
736		 * between current time and time for the timer to go
737		 * off.
738		 */
739		if (timespecisset(&aits->it_value)) {
740			if (pt->pt_type == CLOCK_REALTIME) {
741				getnanotime(&now);
742			} else { /* CLOCK_MONOTONIC */
743				getnanouptime(&now);
744			}
745			if (timespeccmp(&aits->it_value, &now, <))
746				timespecclear(&aits->it_value);
747			else
748				timespecsub(&aits->it_value, &now,
749				    &aits->it_value);
750		}
751	} else if (pt->pt_active) {
752		if (pt->pt_type == CLOCK_VIRTUAL)
753			ptn = LIST_FIRST(&pt->pt_proc->p_timers->pts_virtual);
754		else
755			ptn = LIST_FIRST(&pt->pt_proc->p_timers->pts_prof);
756		for ( ; ptn && ptn != pt; ptn = LIST_NEXT(ptn, pt_list))
757			timespecadd(&aits->it_value,
758			    &ptn->pt_time.it_value, &aits->it_value);
759		KASSERT(ptn != NULL); /* pt should be findable on the list */
760	} else
761		timespecclear(&aits->it_value);
762}
763
764
765
766/* Set and arm a POSIX realtime timer */
767int
768sys___timer_settime50(struct lwp *l,
769    const struct sys___timer_settime50_args *uap,
770    register_t *retval)
771{
772	/* {
773		syscallarg(timer_t) timerid;
774		syscallarg(int) flags;
775		syscallarg(const struct itimerspec *) value;
776		syscallarg(struct itimerspec *) ovalue;
777	} */
778	int error;
779	struct itimerspec value, ovalue, *ovp = NULL;
780
781	if ((error = copyin(SCARG(uap, value), &value,
782	    sizeof(struct itimerspec))) != 0)
783		return (error);
784
785	if (SCARG(uap, ovalue))
786		ovp = &ovalue;
787
788	if ((error = dotimer_settime(SCARG(uap, timerid), &value, ovp,
789	    SCARG(uap, flags), l->l_proc)) != 0)
790		return error;
791
792	if (ovp)
793		return copyout(&ovalue, SCARG(uap, ovalue),
794		    sizeof(struct itimerspec));
795	return 0;
796}
797
798int
799dotimer_settime(int timerid, struct itimerspec *value,
800    struct itimerspec *ovalue, int flags, struct proc *p)
801{
802	struct timespec now;
803	struct itimerspec val, oval;
804	struct ptimers *pts;
805	struct ptimer *pt;
806	int error;
807
808	pts = p->p_timers;
809
810	if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX)
811		return EINVAL;
812	val = *value;
813	if ((error = itimespecfix(&val.it_value)) != 0 ||
814	    (error = itimespecfix(&val.it_interval)) != 0)
815		return error;
816
817	mutex_spin_enter(&timer_lock);
818	if ((pt = pts->pts_timers[timerid]) == NULL) {
819		mutex_spin_exit(&timer_lock);
820		return EINVAL;
821	}
822
823	oval = pt->pt_time;
824	pt->pt_time = val;
825
826	/*
827	 * If we've been passed a relative time for a realtime timer,
828	 * convert it to absolute; if an absolute time for a virtual
829	 * timer, convert it to relative and make sure we don't set it
830	 * to zero, which would cancel the timer, or let it go
831	 * negative, which would confuse the comparison tests.
832	 */
833	if (timespecisset(&pt->pt_time.it_value)) {
834		if (!CLOCK_VIRTUAL_P(pt->pt_type)) {
835			if ((flags & TIMER_ABSTIME) == 0) {
836				if (pt->pt_type == CLOCK_REALTIME) {
837					getnanotime(&now);
838				} else { /* CLOCK_MONOTONIC */
839					getnanouptime(&now);
840				}
841				timespecadd(&pt->pt_time.it_value, &now,
842				    &pt->pt_time.it_value);
843			}
844		} else {
845			if ((flags & TIMER_ABSTIME) != 0) {
846				getnanotime(&now);
847				timespecsub(&pt->pt_time.it_value, &now,
848				    &pt->pt_time.it_value);
849				if (!timespecisset(&pt->pt_time.it_value) ||
850				    pt->pt_time.it_value.tv_sec < 0) {
851					pt->pt_time.it_value.tv_sec = 0;
852					pt->pt_time.it_value.tv_nsec = 1;
853				}
854			}
855		}
856	}
857
858	timer_settime(pt);
859	mutex_spin_exit(&timer_lock);
860
861	if (ovalue)
862		*ovalue = oval;
863
864	return (0);
865}
866
867/* Return the time remaining until a POSIX timer fires. */
868int
869sys___timer_gettime50(struct lwp *l,
870    const struct sys___timer_gettime50_args *uap, register_t *retval)
871{
872	/* {
873		syscallarg(timer_t) timerid;
874		syscallarg(struct itimerspec *) value;
875	} */
876	struct itimerspec its;
877	int error;
878
879	if ((error = dotimer_gettime(SCARG(uap, timerid), l->l_proc,
880	    &its)) != 0)
881		return error;
882
883	return copyout(&its, SCARG(uap, value), sizeof(its));
884}
885
886int
887dotimer_gettime(int timerid, struct proc *p, struct itimerspec *its)
888{
889	struct ptimer *pt;
890	struct ptimers *pts;
891
892	pts = p->p_timers;
893	if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX)
894		return (EINVAL);
895	mutex_spin_enter(&timer_lock);
896	if ((pt = pts->pts_timers[timerid]) == NULL) {
897		mutex_spin_exit(&timer_lock);
898		return (EINVAL);
899	}
900	timer_gettime(pt, its);
901	mutex_spin_exit(&timer_lock);
902
903	return 0;
904}
905
906/*
907 * Return the count of the number of times a periodic timer expired
908 * while a notification was already pending. The counter is reset when
909 * a timer expires and a notification can be posted.
910 */
911int
912sys_timer_getoverrun(struct lwp *l, const struct sys_timer_getoverrun_args *uap,
913    register_t *retval)
914{
915	/* {
916		syscallarg(timer_t) timerid;
917	} */
918	struct proc *p = l->l_proc;
919	struct ptimers *pts;
920	int timerid;
921	struct ptimer *pt;
922
923	timerid = SCARG(uap, timerid);
924
925	pts = p->p_timers;
926	if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX)
927		return (EINVAL);
928	mutex_spin_enter(&timer_lock);
929	if ((pt = pts->pts_timers[timerid]) == NULL) {
930		mutex_spin_exit(&timer_lock);
931		return (EINVAL);
932	}
933	*retval = pt->pt_poverruns;
934	mutex_spin_exit(&timer_lock);
935
936	return (0);
937}
938
939#ifdef KERN_SA
940/* Glue function that triggers an upcall; called from userret(). */
941void
942timerupcall(struct lwp *l)
943{
944	struct ptimers *pt = l->l_proc->p_timers;
945	struct proc *p = l->l_proc;
946	unsigned int i, fired, done;
947
948	KDASSERT(l->l_proc->p_sa);
949	/* Bail out if we do not own the virtual processor */
950	if (l->l_savp->savp_lwp != l)
951		return ;
952
953	mutex_enter(p->p_lock);
954
955	fired = pt->pts_fired;
956	done = 0;
957	while ((i = ffs(fired)) != 0) {
958		siginfo_t *si;
959		int mask = 1 << --i;
960		int f;
961
962		f = ~l->l_pflag & LP_SA_NOBLOCK;
963		l->l_pflag |= LP_SA_NOBLOCK;
964		si = siginfo_alloc(PR_WAITOK);
965		si->_info = pt->pts_timers[i]->pt_info.ksi_info;
966		if (sa_upcall(l, SA_UPCALL_SIGEV | SA_UPCALL_DEFER, NULL, l,
967		    sizeof(*si), si, siginfo_free) != 0) {
968			siginfo_free(si);
969			/* XXX What do we do here?? */
970		} else
971			done |= mask;
972		fired &= ~mask;
973		l->l_pflag ^= f;
974	}
975	pt->pts_fired &= ~done;
976	if (pt->pts_fired == 0)
977		l->l_proc->p_timerpend = 0;
978
979	mutex_exit(p->p_lock);
980}
981#endif /* KERN_SA */
982
983/*
984 * Real interval timer expired:
985 * send process whose timer expired an alarm signal.
986 * If time is not set up to reload, then just return.
987 * Else compute next time timer should go off which is > current time.
988 * This is where delay in processing this timeout causes multiple
989 * SIGALRM calls to be compressed into one.
990 */
991void
992realtimerexpire(void *arg)
993{
994	uint64_t last_val, next_val, interval, now_ns;
995	struct timespec now, next;
996	struct ptimer *pt;
997	int backwards;
998
999	pt = arg;
1000
1001	mutex_spin_enter(&timer_lock);
1002	itimerfire(pt);
1003
1004	if (!timespecisset(&pt->pt_time.it_interval)) {
1005		timespecclear(&pt->pt_time.it_value);
1006		mutex_spin_exit(&timer_lock);
1007		return;
1008	}
1009
1010	if (pt->pt_type == CLOCK_MONOTONIC) {
1011		getnanouptime(&now);
1012	} else {
1013		getnanotime(&now);
1014	}
1015	backwards = (timespeccmp(&pt->pt_time.it_value, &now, >));
1016	timespecadd(&pt->pt_time.it_value, &pt->pt_time.it_interval, &next);
1017	/* Handle the easy case of non-overflown timers first. */
1018	if (!backwards && timespeccmp(&next, &now, >)) {
1019		pt->pt_time.it_value = next;
1020	} else {
1021		now_ns = timespec2ns(&now);
1022		last_val = timespec2ns(&pt->pt_time.it_value);
1023		interval = timespec2ns(&pt->pt_time.it_interval);
1024
1025		next_val = now_ns +
1026		    (now_ns - last_val + interval - 1) % interval;
1027
1028		if (backwards)
1029			next_val += interval;
1030		else
1031			pt->pt_overruns += (now_ns - last_val) / interval;
1032
1033		pt->pt_time.it_value.tv_sec = next_val / 1000000000;
1034		pt->pt_time.it_value.tv_nsec = next_val % 1000000000;
1035	}
1036
1037	/*
1038	 * Don't need to check tshzto() return value, here.
1039	 * callout_reset() does it for us.
1040	 */
1041	callout_reset(&pt->pt_ch, pt->pt_type == CLOCK_MONOTONIC ?
1042	    tshztoup(&pt->pt_time.it_value) : tshzto(&pt->pt_time.it_value),
1043	    realtimerexpire, pt);
1044	mutex_spin_exit(&timer_lock);
1045}
1046
1047/* BSD routine to get the value of an interval timer. */
1048/* ARGSUSED */
1049int
1050sys___getitimer50(struct lwp *l, const struct sys___getitimer50_args *uap,
1051    register_t *retval)
1052{
1053	/* {
1054		syscallarg(int) which;
1055		syscallarg(struct itimerval *) itv;
1056	} */
1057	struct proc *p = l->l_proc;
1058	struct itimerval aitv;
1059	int error;
1060
1061	error = dogetitimer(p, SCARG(uap, which), &aitv);
1062	if (error)
1063		return error;
1064	return (copyout(&aitv, SCARG(uap, itv), sizeof(struct itimerval)));
1065}
1066
1067int
1068dogetitimer(struct proc *p, int which, struct itimerval *itvp)
1069{
1070	struct ptimers *pts;
1071	struct ptimer *pt;
1072	struct itimerspec its;
1073
1074	if ((u_int)which > ITIMER_MONOTONIC)
1075		return (EINVAL);
1076
1077	mutex_spin_enter(&timer_lock);
1078	pts = p->p_timers;
1079	if (pts == NULL || (pt = pts->pts_timers[which]) == NULL) {
1080		timerclear(&itvp->it_value);
1081		timerclear(&itvp->it_interval);
1082	} else {
1083		timer_gettime(pt, &its);
1084		TIMESPEC_TO_TIMEVAL(&itvp->it_value, &its.it_value);
1085		TIMESPEC_TO_TIMEVAL(&itvp->it_interval, &its.it_interval);
1086	}
1087	mutex_spin_exit(&timer_lock);
1088
1089	return 0;
1090}
1091
1092/* BSD routine to set/arm an interval timer. */
1093/* ARGSUSED */
1094int
1095sys___setitimer50(struct lwp *l, const struct sys___setitimer50_args *uap,
1096    register_t *retval)
1097{
1098	/* {
1099		syscallarg(int) which;
1100		syscallarg(const struct itimerval *) itv;
1101		syscallarg(struct itimerval *) oitv;
1102	} */
1103	struct proc *p = l->l_proc;
1104	int which = SCARG(uap, which);
1105	struct sys___getitimer50_args getargs;
1106	const struct itimerval *itvp;
1107	struct itimerval aitv;
1108	int error;
1109
1110	if ((u_int)which > ITIMER_MONOTONIC)
1111		return (EINVAL);
1112	itvp = SCARG(uap, itv);
1113	if (itvp &&
1114	    (error = copyin(itvp, &aitv, sizeof(struct itimerval)) != 0))
1115		return (error);
1116	if (SCARG(uap, oitv) != NULL) {
1117		SCARG(&getargs, which) = which;
1118		SCARG(&getargs, itv) = SCARG(uap, oitv);
1119		if ((error = sys___getitimer50(l, &getargs, retval)) != 0)
1120			return (error);
1121	}
1122	if (itvp == 0)
1123		return (0);
1124
1125	return dosetitimer(p, which, &aitv);
1126}
1127
1128int
1129dosetitimer(struct proc *p, int which, struct itimerval *itvp)
1130{
1131	struct timespec now;
1132	struct ptimers *pts;
1133	struct ptimer *pt, *spare;
1134
1135	KASSERT((u_int)which <= CLOCK_MONOTONIC);
1136	if (itimerfix(&itvp->it_value) || itimerfix(&itvp->it_interval))
1137		return (EINVAL);
1138
1139	/*
1140	 * Don't bother allocating data structures if the process just
1141	 * wants to clear the timer.
1142	 */
1143	spare = NULL;
1144	pts = p->p_timers;
1145 retry:
1146	if (!timerisset(&itvp->it_value) && (pts == NULL ||
1147	    pts->pts_timers[which] == NULL))
1148		return (0);
1149	if (pts == NULL)
1150		pts = timers_alloc(p);
1151	mutex_spin_enter(&timer_lock);
1152	pt = pts->pts_timers[which];
1153	if (pt == NULL) {
1154		if (spare == NULL) {
1155			mutex_spin_exit(&timer_lock);
1156			spare = pool_get(&ptimer_pool, PR_WAITOK);
1157			goto retry;
1158		}
1159		pt = spare;
1160		spare = NULL;
1161		pt->pt_ev.sigev_notify = SIGEV_SIGNAL;
1162		pt->pt_ev.sigev_value.sival_int = which;
1163		pt->pt_overruns = 0;
1164		pt->pt_proc = p;
1165		pt->pt_type = which;
1166		pt->pt_entry = which;
1167		pt->pt_queued = false;
1168		if (pt->pt_type == CLOCK_REALTIME)
1169			callout_init(&pt->pt_ch, CALLOUT_MPSAFE);
1170		else
1171			pt->pt_active = 0;
1172
1173		switch (which) {
1174		case ITIMER_REAL:
1175		case ITIMER_MONOTONIC:
1176			pt->pt_ev.sigev_signo = SIGALRM;
1177			break;
1178		case ITIMER_VIRTUAL:
1179			pt->pt_ev.sigev_signo = SIGVTALRM;
1180			break;
1181		case ITIMER_PROF:
1182			pt->pt_ev.sigev_signo = SIGPROF;
1183			break;
1184		}
1185		pts->pts_timers[which] = pt;
1186	}
1187
1188	TIMEVAL_TO_TIMESPEC(&itvp->it_value, &pt->pt_time.it_value);
1189	TIMEVAL_TO_TIMESPEC(&itvp->it_interval, &pt->pt_time.it_interval);
1190
1191	if (timespecisset(&pt->pt_time.it_value)) {
1192		/* Convert to absolute time */
1193		/* XXX need to wrap in splclock for timecounters case? */
1194		switch (which) {
1195		case ITIMER_REAL:
1196			getnanotime(&now);
1197			timespecadd(&pt->pt_time.it_value, &now,
1198			    &pt->pt_time.it_value);
1199			break;
1200		case ITIMER_MONOTONIC:
1201			getnanouptime(&now);
1202			timespecadd(&pt->pt_time.it_value, &now,
1203			    &pt->pt_time.it_value);
1204			break;
1205		default:
1206			break;
1207		}
1208	}
1209	timer_settime(pt);
1210	mutex_spin_exit(&timer_lock);
1211	if (spare != NULL)
1212		pool_put(&ptimer_pool, spare);
1213
1214	return (0);
1215}
1216
1217/* Utility routines to manage the array of pointers to timers. */
1218struct ptimers *
1219timers_alloc(struct proc *p)
1220{
1221	struct ptimers *pts;
1222	int i;
1223
1224	pts = pool_get(&ptimers_pool, PR_WAITOK);
1225	LIST_INIT(&pts->pts_virtual);
1226	LIST_INIT(&pts->pts_prof);
1227	for (i = 0; i < TIMER_MAX; i++)
1228		pts->pts_timers[i] = NULL;
1229	pts->pts_fired = 0;
1230	mutex_spin_enter(&timer_lock);
1231	if (p->p_timers == NULL) {
1232		p->p_timers = pts;
1233		mutex_spin_exit(&timer_lock);
1234		return pts;
1235	}
1236	mutex_spin_exit(&timer_lock);
1237	pool_put(&ptimers_pool, pts);
1238	return p->p_timers;
1239}
1240
1241/*
1242 * Clean up the per-process timers. If "which" is set to TIMERS_ALL,
1243 * then clean up all timers and free all the data structures. If
1244 * "which" is set to TIMERS_POSIX, only clean up the timers allocated
1245 * by timer_create(), not the BSD setitimer() timers, and only free the
1246 * structure if none of those remain.
1247 */
1248void
1249timers_free(struct proc *p, int which)
1250{
1251	struct ptimers *pts;
1252	struct ptimer *ptn;
1253	struct timespec ts;
1254	int i;
1255
1256	if (p->p_timers == NULL)
1257		return;
1258
1259	pts = p->p_timers;
1260	mutex_spin_enter(&timer_lock);
1261	if (which == TIMERS_ALL) {
1262		p->p_timers = NULL;
1263		i = 0;
1264	} else {
1265		timespecclear(&ts);
1266		for (ptn = LIST_FIRST(&pts->pts_virtual);
1267		     ptn && ptn != pts->pts_timers[ITIMER_VIRTUAL];
1268		     ptn = LIST_NEXT(ptn, pt_list)) {
1269			KASSERT(ptn->pt_type == CLOCK_VIRTUAL);
1270			timespecadd(&ts, &ptn->pt_time.it_value, &ts);
1271		}
1272		LIST_FIRST(&pts->pts_virtual) = NULL;
1273		if (ptn) {
1274			KASSERT(ptn->pt_type == CLOCK_VIRTUAL);
1275			timespecadd(&ts, &ptn->pt_time.it_value,
1276			    &ptn->pt_time.it_value);
1277			LIST_INSERT_HEAD(&pts->pts_virtual, ptn, pt_list);
1278		}
1279		timespecclear(&ts);
1280		for (ptn = LIST_FIRST(&pts->pts_prof);
1281		     ptn && ptn != pts->pts_timers[ITIMER_PROF];
1282		     ptn = LIST_NEXT(ptn, pt_list)) {
1283			KASSERT(ptn->pt_type == CLOCK_PROF);
1284			timespecadd(&ts, &ptn->pt_time.it_value, &ts);
1285		}
1286		LIST_FIRST(&pts->pts_prof) = NULL;
1287		if (ptn) {
1288			KASSERT(ptn->pt_type == CLOCK_PROF);
1289			timespecadd(&ts, &ptn->pt_time.it_value,
1290			    &ptn->pt_time.it_value);
1291			LIST_INSERT_HEAD(&pts->pts_prof, ptn, pt_list);
1292		}
1293		i = 3;
1294	}
1295	for ( ; i < TIMER_MAX; i++) {
1296		if (pts->pts_timers[i] != NULL) {
1297			itimerfree(pts, i);
1298			mutex_spin_enter(&timer_lock);
1299		}
1300	}
1301	if (pts->pts_timers[0] == NULL && pts->pts_timers[1] == NULL &&
1302	    pts->pts_timers[2] == NULL) {
1303		p->p_timers = NULL;
1304		mutex_spin_exit(&timer_lock);
1305		pool_put(&ptimers_pool, pts);
1306	} else
1307		mutex_spin_exit(&timer_lock);
1308}
1309
1310static void
1311itimerfree(struct ptimers *pts, int index)
1312{
1313	struct ptimer *pt;
1314
1315	KASSERT(mutex_owned(&timer_lock));
1316
1317	pt = pts->pts_timers[index];
1318	pts->pts_timers[index] = NULL;
1319	if (!CLOCK_VIRTUAL_P(pt->pt_type))
1320		callout_halt(&pt->pt_ch, &timer_lock);
1321	if (pt->pt_queued)
1322		TAILQ_REMOVE(&timer_queue, pt, pt_chain);
1323	mutex_spin_exit(&timer_lock);
1324	if (!CLOCK_VIRTUAL_P(pt->pt_type))
1325		callout_destroy(&pt->pt_ch);
1326	pool_put(&ptimer_pool, pt);
1327}
1328
1329/*
1330 * Decrement an interval timer by a specified number
1331 * of nanoseconds, which must be less than a second,
1332 * i.e. < 1000000000.  If the timer expires, then reload
1333 * it.  In this case, carry over (nsec - old value) to
1334 * reduce the value reloaded into the timer so that
1335 * the timer does not drift.  This routine assumes
1336 * that it is called in a context where the timers
1337 * on which it is operating cannot change in value.
1338 */
1339static int
1340itimerdecr(struct ptimer *pt, int nsec)
1341{
1342	struct itimerspec *itp;
1343
1344	KASSERT(mutex_owned(&timer_lock));
1345	KASSERT(CLOCK_VIRTUAL_P(pt->pt_type));
1346
1347	itp = &pt->pt_time;
1348	if (itp->it_value.tv_nsec < nsec) {
1349		if (itp->it_value.tv_sec == 0) {
1350			/* expired, and already in next interval */
1351			nsec -= itp->it_value.tv_nsec;
1352			goto expire;
1353		}
1354		itp->it_value.tv_nsec += 1000000000;
1355		itp->it_value.tv_sec--;
1356	}
1357	itp->it_value.tv_nsec -= nsec;
1358	nsec = 0;
1359	if (timespecisset(&itp->it_value))
1360		return (1);
1361	/* expired, exactly at end of interval */
1362expire:
1363	if (timespecisset(&itp->it_interval)) {
1364		itp->it_value = itp->it_interval;
1365		itp->it_value.tv_nsec -= nsec;
1366		if (itp->it_value.tv_nsec < 0) {
1367			itp->it_value.tv_nsec += 1000000000;
1368			itp->it_value.tv_sec--;
1369		}
1370		timer_settime(pt);
1371	} else
1372		itp->it_value.tv_nsec = 0;		/* sec is already 0 */
1373	return (0);
1374}
1375
1376static void
1377itimerfire(struct ptimer *pt)
1378{
1379
1380	KASSERT(mutex_owned(&timer_lock));
1381
1382	/*
1383	 * XXX Can overrun, but we don't do signal queueing yet, anyway.
1384	 * XXX Relying on the clock interrupt is stupid.
1385	 */
1386	if ((pt->pt_ev.sigev_notify == SIGEV_SA && pt->pt_proc->p_sa == NULL) ||
1387	    (pt->pt_ev.sigev_notify != SIGEV_SIGNAL &&
1388	    pt->pt_ev.sigev_notify != SIGEV_SA) || pt->pt_queued)
1389		return;
1390	TAILQ_INSERT_TAIL(&timer_queue, pt, pt_chain);
1391	pt->pt_queued = true;
1392	softint_schedule(timer_sih);
1393}
1394
1395void
1396timer_tick(lwp_t *l, bool user)
1397{
1398	struct ptimers *pts;
1399	struct ptimer *pt;
1400	proc_t *p;
1401
1402	p = l->l_proc;
1403	if (p->p_timers == NULL)
1404		return;
1405
1406	mutex_spin_enter(&timer_lock);
1407	if ((pts = l->l_proc->p_timers) != NULL) {
1408		/*
1409		 * Run current process's virtual and profile time, as needed.
1410		 */
1411		if (user && (pt = LIST_FIRST(&pts->pts_virtual)) != NULL)
1412			if (itimerdecr(pt, tick * 1000) == 0)
1413				itimerfire(pt);
1414		if ((pt = LIST_FIRST(&pts->pts_prof)) != NULL)
1415			if (itimerdecr(pt, tick * 1000) == 0)
1416				itimerfire(pt);
1417	}
1418	mutex_spin_exit(&timer_lock);
1419}
1420
1421#ifdef KERN_SA
1422/*
1423 * timer_sa_intr:
1424 *
1425 *	SIGEV_SA handling for timer_intr(). We are called (and return)
1426 * with the timer lock held. We know that the process had SA enabled
1427 * when this timer was enqueued. As timer_intr() is a soft interrupt
1428 * handler, SA should still be enabled by the time we get here.
1429 */
1430static void
1431timer_sa_intr(struct ptimer *pt, proc_t *p)
1432{
1433	unsigned int		i;
1434	struct sadata		*sa;
1435	struct sadata_vp	*vp;
1436
1437	/* Cause the process to generate an upcall when it returns. */
1438	if (!p->p_timerpend) {
1439		/*
1440		 * XXX stop signals can be processed inside tsleep,
1441		 * which can be inside sa_yield's inner loop, which
1442		 * makes testing for sa_idle alone insuffucent to
1443		 * determine if we really should call setrunnable.
1444		 */
1445		pt->pt_poverruns = pt->pt_overruns;
1446		pt->pt_overruns = 0;
1447		i = 1 << pt->pt_entry;
1448		p->p_timers->pts_fired = i;
1449		p->p_timerpend = 1;
1450
1451		sa = p->p_sa;
1452		mutex_enter(&sa->sa_mutex);
1453		SLIST_FOREACH(vp, &sa->sa_vps, savp_next) {
1454			struct lwp *vp_lwp = vp->savp_lwp;
1455			lwp_lock(vp_lwp);
1456			lwp_need_userret(vp_lwp);
1457			if (vp_lwp->l_flag & LW_SA_IDLE) {
1458				vp_lwp->l_flag &= ~LW_SA_IDLE;
1459				lwp_unsleep(vp_lwp, true);
1460				break;
1461			}
1462			lwp_unlock(vp_lwp);
1463		}
1464		mutex_exit(&sa->sa_mutex);
1465	} else {
1466		i = 1 << pt->pt_entry;
1467		if ((p->p_timers->pts_fired & i) == 0) {
1468			pt->pt_poverruns = pt->pt_overruns;
1469			pt->pt_overruns = 0;
1470			p->p_timers->pts_fired |= i;
1471		} else
1472			pt->pt_overruns++;
1473	}
1474}
1475#endif /* KERN_SA */
1476
1477static void
1478timer_intr(void *cookie)
1479{
1480	ksiginfo_t ksi;
1481	struct ptimer *pt;
1482	proc_t *p;
1483
1484	mutex_enter(proc_lock);
1485	mutex_spin_enter(&timer_lock);
1486	while ((pt = TAILQ_FIRST(&timer_queue)) != NULL) {
1487		TAILQ_REMOVE(&timer_queue, pt, pt_chain);
1488		KASSERT(pt->pt_queued);
1489		pt->pt_queued = false;
1490
1491		if (pt->pt_proc->p_timers == NULL) {
1492			/* Process is dying. */
1493			continue;
1494		}
1495		p = pt->pt_proc;
1496#ifdef KERN_SA
1497		if (pt->pt_ev.sigev_notify == SIGEV_SA) {
1498			timer_sa_intr(pt, p);
1499			continue;
1500		}
1501#endif /* KERN_SA */
1502		if (pt->pt_ev.sigev_notify != SIGEV_SIGNAL)
1503			continue;
1504		if (sigismember(&p->p_sigpend.sp_set, pt->pt_ev.sigev_signo)) {
1505			pt->pt_overruns++;
1506			continue;
1507		}
1508
1509		KSI_INIT(&ksi);
1510		ksi.ksi_signo = pt->pt_ev.sigev_signo;
1511		ksi.ksi_code = SI_TIMER;
1512		ksi.ksi_value = pt->pt_ev.sigev_value;
1513		pt->pt_poverruns = pt->pt_overruns;
1514		pt->pt_overruns = 0;
1515		mutex_spin_exit(&timer_lock);
1516		kpsignal(p, &ksi, NULL);
1517		mutex_spin_enter(&timer_lock);
1518	}
1519	mutex_spin_exit(&timer_lock);
1520	mutex_exit(proc_lock);
1521}
1522
1523/*
1524 * Check if the time will wrap if set to ts.
1525 *
1526 * ts - timespec describing the new time
1527 * delta - the delta between the current time and ts
1528 */
1529bool
1530time_wraps(struct timespec *ts, struct timespec *delta)
1531{
1532
1533	/*
1534	 * Don't allow the time to be set forward so far it
1535	 * will wrap and become negative, thus allowing an
1536	 * attacker to bypass the next check below.  The
1537	 * cutoff is 1 year before rollover occurs, so even
1538	 * if the attacker uses adjtime(2) to move the time
1539	 * past the cutoff, it will take a very long time
1540	 * to get to the wrap point.
1541	 */
1542	if ((ts->tv_sec > LLONG_MAX - 365*24*60*60) ||
1543	    (delta->tv_sec < 0 || delta->tv_nsec < 0))
1544		return true;
1545
1546	return false;
1547}
1548