kern_time.c revision 1.138
1/*	$NetBSD: kern_time.c,v 1.138 2008/01/20 18:09:12 joerg Exp $	*/
2
3/*-
4 * Copyright (c) 2000, 2004, 2005, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Christopher G. Demetriou.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the NetBSD
21 *	Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 *    contributors may be used to endorse or promote products derived
24 *    from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39/*
40 * Copyright (c) 1982, 1986, 1989, 1993
41 *	The Regents of the University of California.  All rights reserved.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 *    notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 *    notice, this list of conditions and the following disclaimer in the
50 *    documentation and/or other materials provided with the distribution.
51 * 3. Neither the name of the University nor the names of its contributors
52 *    may be used to endorse or promote products derived from this software
53 *    without specific prior written permission.
54 *
55 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
56 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
57 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
58 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
59 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
60 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
61 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
62 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
63 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * SUCH DAMAGE.
66 *
67 *	@(#)kern_time.c	8.4 (Berkeley) 5/26/95
68 */
69
70#include <sys/cdefs.h>
71__KERNEL_RCSID(0, "$NetBSD: kern_time.c,v 1.138 2008/01/20 18:09:12 joerg Exp $");
72
73#include <sys/param.h>
74#include <sys/resourcevar.h>
75#include <sys/kernel.h>
76#include <sys/systm.h>
77#include <sys/proc.h>
78#include <sys/vnode.h>
79#include <sys/signalvar.h>
80#include <sys/syslog.h>
81#include <sys/timetc.h>
82#include <sys/kauth.h>
83
84#include <sys/mount.h>
85#include <sys/syscallargs.h>
86
87#include <uvm/uvm_extern.h>
88
89#include <sys/cpu.h>
90
91kmutex_t	time_lock;
92
93POOL_INIT(ptimer_pool, sizeof(struct ptimer), 0, 0, 0, "ptimerpl",
94    &pool_allocator_nointr, IPL_NONE);
95POOL_INIT(ptimers_pool, sizeof(struct ptimers), 0, 0, 0, "ptimerspl",
96    &pool_allocator_nointr, IPL_NONE);
97
98/*
99 * Initialize timekeeping.
100 */
101void
102time_init(void)
103{
104
105	mutex_init(&time_lock, MUTEX_DEFAULT, IPL_NONE);
106}
107
108/* Time of day and interval timer support.
109 *
110 * These routines provide the kernel entry points to get and set
111 * the time-of-day and per-process interval timers.  Subroutines
112 * here provide support for adding and subtracting timeval structures
113 * and decrementing interval timers, optionally reloading the interval
114 * timers when they expire.
115 */
116
117/* This function is used by clock_settime and settimeofday */
118static int
119settime1(struct proc *p, struct timespec *ts, bool check_kauth)
120{
121	struct timeval delta, tv;
122	struct timeval now;
123	struct timespec ts1;
124	struct bintime btdelta;
125	lwp_t *l;
126	int s;
127
128	TIMESPEC_TO_TIMEVAL(&tv, ts);
129
130	/* WHAT DO WE DO ABOUT PENDING REAL-TIME TIMEOUTS??? */
131	s = splclock();
132	microtime(&now);
133	timersub(&tv, &now, &delta);
134
135	if (check_kauth && kauth_authorize_system(kauth_cred_get(),
136	    KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_SYSTEM, ts, &delta,
137	    KAUTH_ARG(check_kauth ? false : true)) != 0) {
138		splx(s);
139		return (EPERM);
140	}
141
142#ifdef notyet
143	if ((delta.tv_sec < 86400) && securelevel > 0) { /* XXX elad - notyet */
144		splx(s);
145		return (EPERM);
146	}
147#endif
148
149	TIMEVAL_TO_TIMESPEC(&tv, &ts1);
150	tc_setclock(&ts1);
151
152	timeradd(&boottime, &delta, &boottime);
153
154	/*
155	 * XXXSMP: There is a short race between setting the time above
156	 * and adjusting LWP's run times.  Fixing this properly means
157	 * pausing all CPUs while we adjust the clock.
158	 */
159	timeval2bintime(&delta, &btdelta);
160	mutex_enter(&proclist_lock);
161	LIST_FOREACH(l, &alllwp, l_list) {
162		lwp_lock(l);
163		bintime_add(&l->l_stime, &btdelta);
164		lwp_unlock(l);
165	}
166	mutex_exit(&proclist_lock);
167	resettodr();
168	splx(s);
169
170	return (0);
171}
172
173int
174settime(struct proc *p, struct timespec *ts)
175{
176	return (settime1(p, ts, true));
177}
178
179/* ARGSUSED */
180int
181sys_clock_gettime(struct lwp *l, const struct sys_clock_gettime_args *uap, register_t *retval)
182{
183	/* {
184		syscallarg(clockid_t) clock_id;
185		syscallarg(struct timespec *) tp;
186	} */
187	clockid_t clock_id;
188	struct timespec ats;
189
190	clock_id = SCARG(uap, clock_id);
191	switch (clock_id) {
192	case CLOCK_REALTIME:
193		nanotime(&ats);
194		break;
195	case CLOCK_MONOTONIC:
196		nanouptime(&ats);
197		break;
198	default:
199		return (EINVAL);
200	}
201
202	return copyout(&ats, SCARG(uap, tp), sizeof(ats));
203}
204
205/* ARGSUSED */
206int
207sys_clock_settime(struct lwp *l, const struct sys_clock_settime_args *uap, register_t *retval)
208{
209	/* {
210		syscallarg(clockid_t) clock_id;
211		syscallarg(const struct timespec *) tp;
212	} */
213
214	return clock_settime1(l->l_proc, SCARG(uap, clock_id), SCARG(uap, tp),
215	    true);
216}
217
218
219int
220clock_settime1(struct proc *p, clockid_t clock_id, const struct timespec *tp,
221    bool check_kauth)
222{
223	struct timespec ats;
224	int error;
225
226	if ((error = copyin(tp, &ats, sizeof(ats))) != 0)
227		return (error);
228
229	switch (clock_id) {
230	case CLOCK_REALTIME:
231		if ((error = settime1(p, &ats, check_kauth)) != 0)
232			return (error);
233		break;
234	case CLOCK_MONOTONIC:
235		return (EINVAL);	/* read-only clock */
236	default:
237		return (EINVAL);
238	}
239
240	return 0;
241}
242
243int
244sys_clock_getres(struct lwp *l, const struct sys_clock_getres_args *uap, register_t *retval)
245{
246	/* {
247		syscallarg(clockid_t) clock_id;
248		syscallarg(struct timespec *) tp;
249	} */
250	clockid_t clock_id;
251	struct timespec ts;
252	int error = 0;
253
254	clock_id = SCARG(uap, clock_id);
255	switch (clock_id) {
256	case CLOCK_REALTIME:
257	case CLOCK_MONOTONIC:
258		ts.tv_sec = 0;
259		if (tc_getfrequency() > 1000000000)
260			ts.tv_nsec = 1;
261		else
262			ts.tv_nsec = 1000000000 / tc_getfrequency();
263		break;
264	default:
265		return (EINVAL);
266	}
267
268	if (SCARG(uap, tp))
269		error = copyout(&ts, SCARG(uap, tp), sizeof(ts));
270
271	return error;
272}
273
274/* ARGSUSED */
275int
276sys_nanosleep(struct lwp *l, const struct sys_nanosleep_args *uap, register_t *retval)
277{
278	/* {
279		syscallarg(struct timespec *) rqtp;
280		syscallarg(struct timespec *) rmtp;
281	} */
282	struct timespec rmt, rqt;
283	int error, error1;
284
285	error = copyin(SCARG(uap, rqtp), &rqt, sizeof(struct timespec));
286	if (error)
287		return (error);
288
289	error = nanosleep1(l, &rqt, SCARG(uap, rmtp) ? &rmt : NULL);
290	if (SCARG(uap, rmtp) == NULL || (error != 0 && error != EINTR))
291		return error;
292
293	error1 = copyout(&rmt, SCARG(uap, rmtp), sizeof(rmt));
294	return error1 ? error1 : error;
295}
296
297int
298nanosleep1(struct lwp *l, struct timespec *rqt, struct timespec *rmt)
299{
300	int error, timo;
301
302	if (itimespecfix(rqt))
303		return (EINVAL);
304
305	timo = tstohz(rqt);
306	/*
307	 * Avoid inadvertantly sleeping forever
308	 */
309	if (timo == 0)
310		timo = 1;
311
312	if (rmt != NULL)
313		getnanouptime(rmt);
314
315	error = kpause("nanoslp", true, timo, NULL);
316	if (error == ERESTART)
317		error = EINTR;
318	if (error == EWOULDBLOCK)
319		error = 0;
320
321	if (rmt!= NULL) {
322		struct timespec rmtend;
323
324		getnanouptime(&rmtend);
325
326		timespecsub(&rmtend, rmt, rmt);
327		timespecsub(rqt, rmt, rmt);
328		if (rmt->tv_sec < 0)
329			timespecclear(rmt);
330	}
331
332	return error;
333}
334
335/* ARGSUSED */
336int
337sys_gettimeofday(struct lwp *l, const struct sys_gettimeofday_args *uap, register_t *retval)
338{
339	/* {
340		syscallarg(struct timeval *) tp;
341		syscallarg(void *) tzp;		really "struct timezone *";
342	} */
343	struct timeval atv;
344	int error = 0;
345	struct timezone tzfake;
346
347	if (SCARG(uap, tp)) {
348		microtime(&atv);
349		error = copyout(&atv, SCARG(uap, tp), sizeof(atv));
350		if (error)
351			return (error);
352	}
353	if (SCARG(uap, tzp)) {
354		/*
355		 * NetBSD has no kernel notion of time zone, so we just
356		 * fake up a timezone struct and return it if demanded.
357		 */
358		tzfake.tz_minuteswest = 0;
359		tzfake.tz_dsttime = 0;
360		error = copyout(&tzfake, SCARG(uap, tzp), sizeof(tzfake));
361	}
362	return (error);
363}
364
365/* ARGSUSED */
366int
367sys_settimeofday(struct lwp *l, const struct sys_settimeofday_args *uap, register_t *retval)
368{
369	/* {
370		syscallarg(const struct timeval *) tv;
371		syscallarg(const void *) tzp;	really "const struct timezone *";
372	} */
373
374	return settimeofday1(SCARG(uap, tv), true, SCARG(uap, tzp), l, true);
375}
376
377int
378settimeofday1(const struct timeval *utv, bool userspace,
379    const void *utzp, struct lwp *l, bool check_kauth)
380{
381	struct timeval atv;
382	struct timespec ts;
383	int error;
384
385	/* Verify all parameters before changing time. */
386
387	/*
388	 * NetBSD has no kernel notion of time zone, and only an
389	 * obsolete program would try to set it, so we log a warning.
390	 */
391	if (utzp)
392		log(LOG_WARNING, "pid %d attempted to set the "
393		    "(obsolete) kernel time zone\n", l->l_proc->p_pid);
394
395	if (utv == NULL)
396		return 0;
397
398	if (userspace) {
399		if ((error = copyin(utv, &atv, sizeof(atv))) != 0)
400			return error;
401		utv = &atv;
402	}
403
404	TIMEVAL_TO_TIMESPEC(utv, &ts);
405	return settime1(l->l_proc, &ts, check_kauth);
406}
407
408int	time_adjusted;			/* set if an adjustment is made */
409
410/* ARGSUSED */
411int
412sys_adjtime(struct lwp *l, const struct sys_adjtime_args *uap, register_t *retval)
413{
414	/* {
415		syscallarg(const struct timeval *) delta;
416		syscallarg(struct timeval *) olddelta;
417	} */
418	int error;
419
420	if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_TIME,
421	    KAUTH_REQ_SYSTEM_TIME_ADJTIME, NULL, NULL, NULL)) != 0)
422		return (error);
423
424	return adjtime1(SCARG(uap, delta), SCARG(uap, olddelta), l->l_proc);
425}
426
427int
428adjtime1(const struct timeval *delta, struct timeval *olddelta, struct proc *p)
429{
430	struct timeval atv;
431	int error = 0;
432
433	extern int64_t time_adjtime;  /* in kern_ntptime.c */
434
435	if (olddelta) {
436		atv.tv_sec = time_adjtime / 1000000;
437		atv.tv_usec = time_adjtime % 1000000;
438		if (atv.tv_usec < 0) {
439			atv.tv_usec += 1000000;
440			atv.tv_sec--;
441		}
442		error = copyout(&atv, olddelta, sizeof(struct timeval));
443		if (error)
444			return (error);
445	}
446
447	if (delta) {
448		error = copyin(delta, &atv, sizeof(struct timeval));
449		if (error)
450			return (error);
451
452		time_adjtime = (int64_t)atv.tv_sec * 1000000 +
453			atv.tv_usec;
454
455		if (time_adjtime)
456			/* We need to save the system time during shutdown */
457			time_adjusted |= 1;
458	}
459
460	return error;
461}
462
463/*
464 * Interval timer support. Both the BSD getitimer() family and the POSIX
465 * timer_*() family of routines are supported.
466 *
467 * All timers are kept in an array pointed to by p_timers, which is
468 * allocated on demand - many processes don't use timers at all. The
469 * first three elements in this array are reserved for the BSD timers:
470 * element 0 is ITIMER_REAL, element 1 is ITIMER_VIRTUAL, and element
471 * 2 is ITIMER_PROF. The rest may be allocated by the timer_create()
472 * syscall.
473 *
474 * Realtime timers are kept in the ptimer structure as an absolute
475 * time; virtual time timers are kept as a linked list of deltas.
476 * Virtual time timers are processed in the hardclock() routine of
477 * kern_clock.c.  The real time timer is processed by a callout
478 * routine, called from the softclock() routine.  Since a callout may
479 * be delayed in real time due to interrupt processing in the system,
480 * it is possible for the real time timeout routine (realtimeexpire,
481 * given below), to be delayed in real time past when it is supposed
482 * to occur.  It does not suffice, therefore, to reload the real timer
483 * .it_value from the real time timers .it_interval.  Rather, we
484 * compute the next time in absolute time the timer should go off.  */
485
486/* Allocate a POSIX realtime timer. */
487int
488sys_timer_create(struct lwp *l, const struct sys_timer_create_args *uap, register_t *retval)
489{
490	/* {
491		syscallarg(clockid_t) clock_id;
492		syscallarg(struct sigevent *) evp;
493		syscallarg(timer_t *) timerid;
494	} */
495
496	return timer_create1(SCARG(uap, timerid), SCARG(uap, clock_id),
497	    SCARG(uap, evp), copyin, l);
498}
499
500int
501timer_create1(timer_t *tid, clockid_t id, struct sigevent *evp,
502    copyin_t fetch_event, struct lwp *l)
503{
504	int error;
505	timer_t timerid;
506	struct ptimer *pt;
507	struct proc *p;
508
509	p = l->l_proc;
510
511	if (id < CLOCK_REALTIME ||
512	    id > CLOCK_PROF)
513		return (EINVAL);
514
515	if (p->p_timers == NULL)
516		timers_alloc(p);
517
518	/* Find a free timer slot, skipping those reserved for setitimer(). */
519	for (timerid = 3; timerid < TIMER_MAX; timerid++)
520		if (p->p_timers->pts_timers[timerid] == NULL)
521			break;
522
523	if (timerid == TIMER_MAX)
524		return EAGAIN;
525
526	pt = pool_get(&ptimer_pool, PR_WAITOK);
527	if (evp) {
528		if (((error =
529		    (*fetch_event)(evp, &pt->pt_ev, sizeof(pt->pt_ev))) != 0) ||
530		    ((pt->pt_ev.sigev_notify < SIGEV_NONE) ||
531			(pt->pt_ev.sigev_notify > SIGEV_SA))) {
532			pool_put(&ptimer_pool, pt);
533			return (error ? error : EINVAL);
534		}
535	} else {
536		pt->pt_ev.sigev_notify = SIGEV_SIGNAL;
537		switch (id) {
538		case CLOCK_REALTIME:
539			pt->pt_ev.sigev_signo = SIGALRM;
540			break;
541		case CLOCK_VIRTUAL:
542			pt->pt_ev.sigev_signo = SIGVTALRM;
543			break;
544		case CLOCK_PROF:
545			pt->pt_ev.sigev_signo = SIGPROF;
546			break;
547		}
548		pt->pt_ev.sigev_value.sival_int = timerid;
549	}
550	pt->pt_info.ksi_signo = pt->pt_ev.sigev_signo;
551	pt->pt_info.ksi_errno = 0;
552	pt->pt_info.ksi_code = 0;
553	pt->pt_info.ksi_pid = p->p_pid;
554	pt->pt_info.ksi_uid = kauth_cred_getuid(l->l_cred);
555	pt->pt_info.ksi_value = pt->pt_ev.sigev_value;
556
557	pt->pt_type = id;
558	pt->pt_proc = p;
559	pt->pt_overruns = 0;
560	pt->pt_poverruns = 0;
561	pt->pt_entry = timerid;
562	timerclear(&pt->pt_time.it_value);
563	if (id == CLOCK_REALTIME)
564		callout_init(&pt->pt_ch, 0);
565	else
566		pt->pt_active = 0;
567
568	p->p_timers->pts_timers[timerid] = pt;
569
570	return copyout(&timerid, tid, sizeof(timerid));
571}
572
573/* Delete a POSIX realtime timer */
574int
575sys_timer_delete(struct lwp *l, const struct sys_timer_delete_args *uap, register_t *retval)
576{
577	/* {
578		syscallarg(timer_t) timerid;
579	} */
580	struct proc *p = l->l_proc;
581	timer_t timerid;
582	struct ptimer *pt, *ptn;
583	int s;
584
585	timerid = SCARG(uap, timerid);
586
587	if ((p->p_timers == NULL) ||
588	    (timerid < 2) || (timerid >= TIMER_MAX) ||
589	    ((pt = p->p_timers->pts_timers[timerid]) == NULL))
590		return (EINVAL);
591
592	if (pt->pt_type == CLOCK_REALTIME) {
593		callout_stop(&pt->pt_ch);
594		callout_destroy(&pt->pt_ch);
595	} else if (pt->pt_active) {
596		s = splclock();
597		ptn = LIST_NEXT(pt, pt_list);
598		LIST_REMOVE(pt, pt_list);
599		for ( ; ptn; ptn = LIST_NEXT(ptn, pt_list))
600			timeradd(&pt->pt_time.it_value, &ptn->pt_time.it_value,
601			    &ptn->pt_time.it_value);
602		splx(s);
603	}
604
605	p->p_timers->pts_timers[timerid] = NULL;
606	pool_put(&ptimer_pool, pt);
607
608	return (0);
609}
610
611/*
612 * Set up the given timer. The value in pt->pt_time.it_value is taken
613 * to be an absolute time for CLOCK_REALTIME timers and a relative
614 * time for virtual timers.
615 * Must be called at splclock().
616 */
617void
618timer_settime(struct ptimer *pt)
619{
620	struct ptimer *ptn, *pptn;
621	struct ptlist *ptl;
622
623	if (pt->pt_type == CLOCK_REALTIME) {
624		callout_stop(&pt->pt_ch);
625		if (timerisset(&pt->pt_time.it_value)) {
626			/*
627			 * Don't need to check hzto() return value, here.
628			 * callout_reset() does it for us.
629			 */
630			callout_reset(&pt->pt_ch, hzto(&pt->pt_time.it_value),
631			    realtimerexpire, pt);
632		}
633	} else {
634		if (pt->pt_active) {
635			ptn = LIST_NEXT(pt, pt_list);
636			LIST_REMOVE(pt, pt_list);
637			for ( ; ptn; ptn = LIST_NEXT(ptn, pt_list))
638				timeradd(&pt->pt_time.it_value,
639				    &ptn->pt_time.it_value,
640				    &ptn->pt_time.it_value);
641		}
642		if (timerisset(&pt->pt_time.it_value)) {
643			if (pt->pt_type == CLOCK_VIRTUAL)
644				ptl = &pt->pt_proc->p_timers->pts_virtual;
645			else
646				ptl = &pt->pt_proc->p_timers->pts_prof;
647
648			for (ptn = LIST_FIRST(ptl), pptn = NULL;
649			     ptn && timercmp(&pt->pt_time.it_value,
650				 &ptn->pt_time.it_value, >);
651			     pptn = ptn, ptn = LIST_NEXT(ptn, pt_list))
652				timersub(&pt->pt_time.it_value,
653				    &ptn->pt_time.it_value,
654				    &pt->pt_time.it_value);
655
656			if (pptn)
657				LIST_INSERT_AFTER(pptn, pt, pt_list);
658			else
659				LIST_INSERT_HEAD(ptl, pt, pt_list);
660
661			for ( ; ptn ; ptn = LIST_NEXT(ptn, pt_list))
662				timersub(&ptn->pt_time.it_value,
663				    &pt->pt_time.it_value,
664				    &ptn->pt_time.it_value);
665
666			pt->pt_active = 1;
667		} else
668			pt->pt_active = 0;
669	}
670}
671
672void
673timer_gettime(struct ptimer *pt, struct itimerval *aitv)
674{
675	struct timeval now;
676	struct ptimer *ptn;
677
678	*aitv = pt->pt_time;
679	if (pt->pt_type == CLOCK_REALTIME) {
680		/*
681		 * Convert from absolute to relative time in .it_value
682		 * part of real time timer.  If time for real time
683		 * timer has passed return 0, else return difference
684		 * between current time and time for the timer to go
685		 * off.
686		 */
687		if (timerisset(&aitv->it_value)) {
688			getmicrotime(&now);
689			if (timercmp(&aitv->it_value, &now, <))
690				timerclear(&aitv->it_value);
691			else
692				timersub(&aitv->it_value, &now,
693				    &aitv->it_value);
694		}
695	} else if (pt->pt_active) {
696		if (pt->pt_type == CLOCK_VIRTUAL)
697			ptn = LIST_FIRST(&pt->pt_proc->p_timers->pts_virtual);
698		else
699			ptn = LIST_FIRST(&pt->pt_proc->p_timers->pts_prof);
700		for ( ; ptn && ptn != pt; ptn = LIST_NEXT(ptn, pt_list))
701			timeradd(&aitv->it_value,
702			    &ptn->pt_time.it_value, &aitv->it_value);
703		KASSERT(ptn != NULL); /* pt should be findable on the list */
704	} else
705		timerclear(&aitv->it_value);
706}
707
708
709
710/* Set and arm a POSIX realtime timer */
711int
712sys_timer_settime(struct lwp *l, const struct sys_timer_settime_args *uap, register_t *retval)
713{
714	/* {
715		syscallarg(timer_t) timerid;
716		syscallarg(int) flags;
717		syscallarg(const struct itimerspec *) value;
718		syscallarg(struct itimerspec *) ovalue;
719	} */
720	int error;
721	struct itimerspec value, ovalue, *ovp = NULL;
722
723	if ((error = copyin(SCARG(uap, value), &value,
724	    sizeof(struct itimerspec))) != 0)
725		return (error);
726
727	if (SCARG(uap, ovalue))
728		ovp = &ovalue;
729
730	if ((error = dotimer_settime(SCARG(uap, timerid), &value, ovp,
731	    SCARG(uap, flags), l->l_proc)) != 0)
732		return error;
733
734	if (ovp)
735		return copyout(&ovalue, SCARG(uap, ovalue),
736		    sizeof(struct itimerspec));
737	return 0;
738}
739
740int
741dotimer_settime(int timerid, struct itimerspec *value,
742    struct itimerspec *ovalue, int flags, struct proc *p)
743{
744	struct timeval now;
745	struct itimerval val, oval;
746	struct ptimer *pt;
747	int s;
748
749	if ((p->p_timers == NULL) ||
750	    (timerid < 2) || (timerid >= TIMER_MAX) ||
751	    ((pt = p->p_timers->pts_timers[timerid]) == NULL))
752		return (EINVAL);
753
754	TIMESPEC_TO_TIMEVAL(&val.it_value, &value->it_value);
755	TIMESPEC_TO_TIMEVAL(&val.it_interval, &value->it_interval);
756	if (itimerfix(&val.it_value) || itimerfix(&val.it_interval))
757		return (EINVAL);
758
759	oval = pt->pt_time;
760	pt->pt_time = val;
761
762	s = splclock();
763	/*
764	 * If we've been passed a relative time for a realtime timer,
765	 * convert it to absolute; if an absolute time for a virtual
766	 * timer, convert it to relative and make sure we don't set it
767	 * to zero, which would cancel the timer, or let it go
768	 * negative, which would confuse the comparison tests.
769	 */
770	if (timerisset(&pt->pt_time.it_value)) {
771		if (pt->pt_type == CLOCK_REALTIME) {
772			if ((flags & TIMER_ABSTIME) == 0) {
773				getmicrotime(&now);
774				timeradd(&pt->pt_time.it_value, &now,
775				    &pt->pt_time.it_value);
776			}
777		} else {
778			if ((flags & TIMER_ABSTIME) != 0) {
779				getmicrotime(&now);
780				timersub(&pt->pt_time.it_value, &now,
781				    &pt->pt_time.it_value);
782				if (!timerisset(&pt->pt_time.it_value) ||
783				    pt->pt_time.it_value.tv_sec < 0) {
784					pt->pt_time.it_value.tv_sec = 0;
785					pt->pt_time.it_value.tv_usec = 1;
786				}
787			}
788		}
789	}
790
791	timer_settime(pt);
792	splx(s);
793
794	if (ovalue) {
795		TIMEVAL_TO_TIMESPEC(&oval.it_value, &ovalue->it_value);
796		TIMEVAL_TO_TIMESPEC(&oval.it_interval, &ovalue->it_interval);
797	}
798
799	return (0);
800}
801
802/* Return the time remaining until a POSIX timer fires. */
803int
804sys_timer_gettime(struct lwp *l, const struct sys_timer_gettime_args *uap, register_t *retval)
805{
806	/* {
807		syscallarg(timer_t) timerid;
808		syscallarg(struct itimerspec *) value;
809	} */
810	struct itimerspec its;
811	int error;
812
813	if ((error = dotimer_gettime(SCARG(uap, timerid), l->l_proc,
814	    &its)) != 0)
815		return error;
816
817	return copyout(&its, SCARG(uap, value), sizeof(its));
818}
819
820int
821dotimer_gettime(int timerid, struct proc *p, struct itimerspec *its)
822{
823	int s;
824	struct ptimer *pt;
825	struct itimerval aitv;
826
827	if ((p->p_timers == NULL) ||
828	    (timerid < 2) || (timerid >= TIMER_MAX) ||
829	    ((pt = p->p_timers->pts_timers[timerid]) == NULL))
830		return (EINVAL);
831
832	s = splclock();
833	timer_gettime(pt, &aitv);
834	splx(s);
835
836	TIMEVAL_TO_TIMESPEC(&aitv.it_interval, &its->it_interval);
837	TIMEVAL_TO_TIMESPEC(&aitv.it_value, &its->it_value);
838
839	return 0;
840}
841
842/*
843 * Return the count of the number of times a periodic timer expired
844 * while a notification was already pending. The counter is reset when
845 * a timer expires and a notification can be posted.
846 */
847int
848sys_timer_getoverrun(struct lwp *l, const struct sys_timer_getoverrun_args *uap, register_t *retval)
849{
850	/* {
851		syscallarg(timer_t) timerid;
852	} */
853	struct proc *p = l->l_proc;
854	int timerid;
855	struct ptimer *pt;
856
857	timerid = SCARG(uap, timerid);
858
859	if ((p->p_timers == NULL) ||
860	    (timerid < 2) || (timerid >= TIMER_MAX) ||
861	    ((pt = p->p_timers->pts_timers[timerid]) == NULL))
862		return (EINVAL);
863
864	*retval = pt->pt_poverruns;
865
866	return (0);
867}
868
869/*
870 * Real interval timer expired:
871 * send process whose timer expired an alarm signal.
872 * If time is not set up to reload, then just return.
873 * Else compute next time timer should go off which is > current time.
874 * This is where delay in processing this timeout causes multiple
875 * SIGALRM calls to be compressed into one.
876 */
877void
878realtimerexpire(void *arg)
879{
880	struct timeval now;
881	struct ptimer *pt;
882	int s;
883
884	pt = (struct ptimer *)arg;
885
886	itimerfire(pt);
887
888	if (!timerisset(&pt->pt_time.it_interval)) {
889		timerclear(&pt->pt_time.it_value);
890		return;
891	}
892	for (;;) {
893		s = splclock();	/* XXX need spl now? */
894		timeradd(&pt->pt_time.it_value,
895		    &pt->pt_time.it_interval, &pt->pt_time.it_value);
896		getmicrotime(&now);
897		if (timercmp(&pt->pt_time.it_value, &now, >)) {
898			/*
899			 * Don't need to check hzto() return value, here.
900			 * callout_reset() does it for us.
901			 */
902			callout_reset(&pt->pt_ch, hzto(&pt->pt_time.it_value),
903			    realtimerexpire, pt);
904			splx(s);
905			return;
906		}
907		splx(s);
908		pt->pt_overruns++;
909	}
910}
911
912/* BSD routine to get the value of an interval timer. */
913/* ARGSUSED */
914int
915sys_getitimer(struct lwp *l, const struct sys_getitimer_args *uap, register_t *retval)
916{
917	/* {
918		syscallarg(int) which;
919		syscallarg(struct itimerval *) itv;
920	} */
921	struct proc *p = l->l_proc;
922	struct itimerval aitv;
923	int error;
924
925	error = dogetitimer(p, SCARG(uap, which), &aitv);
926	if (error)
927		return error;
928	return (copyout(&aitv, SCARG(uap, itv), sizeof(struct itimerval)));
929}
930
931int
932dogetitimer(struct proc *p, int which, struct itimerval *itvp)
933{
934	int s;
935
936	if ((u_int)which > ITIMER_PROF)
937		return (EINVAL);
938
939	if ((p->p_timers == NULL) || (p->p_timers->pts_timers[which] == NULL)){
940		timerclear(&itvp->it_value);
941		timerclear(&itvp->it_interval);
942	} else {
943		s = splclock();
944		timer_gettime(p->p_timers->pts_timers[which], itvp);
945		splx(s);
946	}
947
948	return 0;
949}
950
951/* BSD routine to set/arm an interval timer. */
952/* ARGSUSED */
953int
954sys_setitimer(struct lwp *l, const struct sys_setitimer_args *uap, register_t *retval)
955{
956	/* {
957		syscallarg(int) which;
958		syscallarg(const struct itimerval *) itv;
959		syscallarg(struct itimerval *) oitv;
960	} */
961	struct proc *p = l->l_proc;
962	int which = SCARG(uap, which);
963	struct sys_getitimer_args getargs;
964	const struct itimerval *itvp;
965	struct itimerval aitv;
966	int error;
967
968	if ((u_int)which > ITIMER_PROF)
969		return (EINVAL);
970	itvp = SCARG(uap, itv);
971	if (itvp &&
972	    (error = copyin(itvp, &aitv, sizeof(struct itimerval)) != 0))
973		return (error);
974	if (SCARG(uap, oitv) != NULL) {
975		SCARG(&getargs, which) = which;
976		SCARG(&getargs, itv) = SCARG(uap, oitv);
977		if ((error = sys_getitimer(l, &getargs, retval)) != 0)
978			return (error);
979	}
980	if (itvp == 0)
981		return (0);
982
983	return dosetitimer(p, which, &aitv);
984}
985
986int
987dosetitimer(struct proc *p, int which, struct itimerval *itvp)
988{
989	struct timeval now;
990	struct ptimer *pt;
991	int s;
992
993	if (itimerfix(&itvp->it_value) || itimerfix(&itvp->it_interval))
994		return (EINVAL);
995
996	/*
997	 * Don't bother allocating data structures if the process just
998	 * wants to clear the timer.
999	 */
1000	if (!timerisset(&itvp->it_value) &&
1001	    ((p->p_timers == NULL) ||(p->p_timers->pts_timers[which] == NULL)))
1002		return (0);
1003
1004	if (p->p_timers == NULL)
1005		timers_alloc(p);
1006	if (p->p_timers->pts_timers[which] == NULL) {
1007		pt = pool_get(&ptimer_pool, PR_WAITOK);
1008		pt->pt_ev.sigev_notify = SIGEV_SIGNAL;
1009		pt->pt_ev.sigev_value.sival_int = which;
1010		pt->pt_overruns = 0;
1011		pt->pt_proc = p;
1012		pt->pt_type = which;
1013		pt->pt_entry = which;
1014		switch (which) {
1015		case ITIMER_REAL:
1016			callout_init(&pt->pt_ch, 0);
1017			pt->pt_ev.sigev_signo = SIGALRM;
1018			break;
1019		case ITIMER_VIRTUAL:
1020			pt->pt_active = 0;
1021			pt->pt_ev.sigev_signo = SIGVTALRM;
1022			break;
1023		case ITIMER_PROF:
1024			pt->pt_active = 0;
1025			pt->pt_ev.sigev_signo = SIGPROF;
1026			break;
1027		}
1028	} else
1029		pt = p->p_timers->pts_timers[which];
1030
1031	pt->pt_time = *itvp;
1032	p->p_timers->pts_timers[which] = pt;
1033
1034	s = splclock();
1035	if ((which == ITIMER_REAL) && timerisset(&pt->pt_time.it_value)) {
1036		/* Convert to absolute time */
1037		/* XXX need to wrap in splclock for timecounters case? */
1038		getmicrotime(&now);
1039		timeradd(&pt->pt_time.it_value, &now, &pt->pt_time.it_value);
1040	}
1041	timer_settime(pt);
1042	splx(s);
1043
1044	return (0);
1045}
1046
1047/* Utility routines to manage the array of pointers to timers. */
1048void
1049timers_alloc(struct proc *p)
1050{
1051	int i;
1052	struct ptimers *pts;
1053
1054	pts = pool_get(&ptimers_pool, PR_WAITOK);
1055	LIST_INIT(&pts->pts_virtual);
1056	LIST_INIT(&pts->pts_prof);
1057	for (i = 0; i < TIMER_MAX; i++)
1058		pts->pts_timers[i] = NULL;
1059	pts->pts_fired = 0;
1060	p->p_timers = pts;
1061}
1062
1063/*
1064 * Clean up the per-process timers. If "which" is set to TIMERS_ALL,
1065 * then clean up all timers and free all the data structures. If
1066 * "which" is set to TIMERS_POSIX, only clean up the timers allocated
1067 * by timer_create(), not the BSD setitimer() timers, and only free the
1068 * structure if none of those remain.
1069 */
1070void
1071timers_free(struct proc *p, int which)
1072{
1073	int i, s;
1074	struct ptimers *pts;
1075	struct ptimer *pt, *ptn;
1076	struct timeval tv;
1077
1078	if (p->p_timers) {
1079		pts = p->p_timers;
1080		if (which == TIMERS_ALL)
1081			i = 0;
1082		else {
1083			s = splclock();
1084			timerclear(&tv);
1085			for (ptn = LIST_FIRST(&p->p_timers->pts_virtual);
1086			     ptn && ptn != pts->pts_timers[ITIMER_VIRTUAL];
1087			     ptn = LIST_NEXT(ptn, pt_list))
1088				timeradd(&tv, &ptn->pt_time.it_value, &tv);
1089			LIST_FIRST(&p->p_timers->pts_virtual) = NULL;
1090			if (ptn) {
1091				timeradd(&tv, &ptn->pt_time.it_value,
1092				    &ptn->pt_time.it_value);
1093				LIST_INSERT_HEAD(&p->p_timers->pts_virtual,
1094				    ptn, pt_list);
1095			}
1096
1097			timerclear(&tv);
1098			for (ptn = LIST_FIRST(&p->p_timers->pts_prof);
1099			     ptn && ptn != pts->pts_timers[ITIMER_PROF];
1100			     ptn = LIST_NEXT(ptn, pt_list))
1101				timeradd(&tv, &ptn->pt_time.it_value, &tv);
1102			LIST_FIRST(&p->p_timers->pts_prof) = NULL;
1103			if (ptn) {
1104				timeradd(&tv, &ptn->pt_time.it_value,
1105				    &ptn->pt_time.it_value);
1106				LIST_INSERT_HEAD(&p->p_timers->pts_prof, ptn,
1107				    pt_list);
1108			}
1109			splx(s);
1110			i = 3;
1111		}
1112		for ( ; i < TIMER_MAX; i++)
1113			if ((pt = pts->pts_timers[i]) != NULL) {
1114				if (pt->pt_type == CLOCK_REALTIME) {
1115					callout_stop(&pt->pt_ch);
1116					callout_destroy(&pt->pt_ch);
1117				}
1118				pts->pts_timers[i] = NULL;
1119				pool_put(&ptimer_pool, pt);
1120			}
1121		if ((pts->pts_timers[0] == NULL) &&
1122		    (pts->pts_timers[1] == NULL) &&
1123		    (pts->pts_timers[2] == NULL)) {
1124			p->p_timers = NULL;
1125			pool_put(&ptimers_pool, pts);
1126		}
1127	}
1128}
1129
1130/*
1131 * Decrement an interval timer by a specified number
1132 * of microseconds, which must be less than a second,
1133 * i.e. < 1000000.  If the timer expires, then reload
1134 * it.  In this case, carry over (usec - old value) to
1135 * reduce the value reloaded into the timer so that
1136 * the timer does not drift.  This routine assumes
1137 * that it is called in a context where the timers
1138 * on which it is operating cannot change in value.
1139 */
1140int
1141itimerdecr(struct ptimer *pt, int usec)
1142{
1143	struct itimerval *itp;
1144
1145	itp = &pt->pt_time;
1146	if (itp->it_value.tv_usec < usec) {
1147		if (itp->it_value.tv_sec == 0) {
1148			/* expired, and already in next interval */
1149			usec -= itp->it_value.tv_usec;
1150			goto expire;
1151		}
1152		itp->it_value.tv_usec += 1000000;
1153		itp->it_value.tv_sec--;
1154	}
1155	itp->it_value.tv_usec -= usec;
1156	usec = 0;
1157	if (timerisset(&itp->it_value))
1158		return (1);
1159	/* expired, exactly at end of interval */
1160expire:
1161	if (timerisset(&itp->it_interval)) {
1162		itp->it_value = itp->it_interval;
1163		itp->it_value.tv_usec -= usec;
1164		if (itp->it_value.tv_usec < 0) {
1165			itp->it_value.tv_usec += 1000000;
1166			itp->it_value.tv_sec--;
1167		}
1168		timer_settime(pt);
1169	} else
1170		itp->it_value.tv_usec = 0;		/* sec is already 0 */
1171	return (0);
1172}
1173
1174void
1175itimerfire(struct ptimer *pt)
1176{
1177	struct proc *p = pt->pt_proc;
1178
1179	if (pt->pt_ev.sigev_notify == SIGEV_SIGNAL) {
1180		/*
1181		 * No RT signal infrastructure exists at this time;
1182		 * just post the signal number and throw away the
1183		 * value.
1184		 */
1185		if (sigismember(&p->p_sigpend.sp_set, pt->pt_ev.sigev_signo))
1186			pt->pt_overruns++;
1187		else {
1188			ksiginfo_t ksi;
1189			KSI_INIT(&ksi);
1190			ksi.ksi_signo = pt->pt_ev.sigev_signo;
1191			ksi.ksi_code = SI_TIMER;
1192			ksi.ksi_value = pt->pt_ev.sigev_value;
1193			pt->pt_poverruns = pt->pt_overruns;
1194			pt->pt_overruns = 0;
1195			mutex_enter(&proclist_mutex);
1196			kpsignal(p, &ksi, NULL);
1197			mutex_exit(&proclist_mutex);
1198		}
1199	}
1200}
1201
1202/*
1203 * ratecheck(): simple time-based rate-limit checking.  see ratecheck(9)
1204 * for usage and rationale.
1205 */
1206int
1207ratecheck(struct timeval *lasttime, const struct timeval *mininterval)
1208{
1209	struct timeval tv, delta;
1210	int rv = 0;
1211
1212	getmicrouptime(&tv);
1213	timersub(&tv, lasttime, &delta);
1214
1215	/*
1216	 * check for 0,0 is so that the message will be seen at least once,
1217	 * even if interval is huge.
1218	 */
1219	if (timercmp(&delta, mininterval, >=) ||
1220	    (lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) {
1221		*lasttime = tv;
1222		rv = 1;
1223	}
1224
1225	return (rv);
1226}
1227
1228/*
1229 * ppsratecheck(): packets (or events) per second limitation.
1230 */
1231int
1232ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps)
1233{
1234	struct timeval tv, delta;
1235	int rv;
1236
1237	getmicrouptime(&tv);
1238	timersub(&tv, lasttime, &delta);
1239
1240	/*
1241	 * check for 0,0 is so that the message will be seen at least once.
1242	 * if more than one second have passed since the last update of
1243	 * lasttime, reset the counter.
1244	 *
1245	 * we do increment *curpps even in *curpps < maxpps case, as some may
1246	 * try to use *curpps for stat purposes as well.
1247	 */
1248	if ((lasttime->tv_sec == 0 && lasttime->tv_usec == 0) ||
1249	    delta.tv_sec >= 1) {
1250		*lasttime = tv;
1251		*curpps = 0;
1252	}
1253	if (maxpps < 0)
1254		rv = 1;
1255	else if (*curpps < maxpps)
1256		rv = 1;
1257	else
1258		rv = 0;
1259
1260#if 1 /*DIAGNOSTIC?*/
1261	/* be careful about wrap-around */
1262	if (*curpps + 1 > *curpps)
1263		*curpps = *curpps + 1;
1264#else
1265	/*
1266	 * assume that there's not too many calls to this function.
1267	 * not sure if the assumption holds, as it depends on *caller's*
1268	 * behavior, not the behavior of this function.
1269	 * IMHO it is wrong to make assumption on the caller's behavior,
1270	 * so the above #if is #if 1, not #ifdef DIAGNOSTIC.
1271	 */
1272	*curpps = *curpps + 1;
1273#endif
1274
1275	return (rv);
1276}
1277