kern_time.c revision 1.71
1/*	$NetBSD: kern_time.c,v 1.71 2003/07/17 18:16:59 fvdl Exp $	*/
2
3/*-
4 * Copyright (c) 2000 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Christopher G. Demetriou.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the NetBSD
21 *	Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 *    contributors may be used to endorse or promote products derived
24 *    from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39/*
40 * Copyright (c) 1982, 1986, 1989, 1993
41 *	The Regents of the University of California.  All rights reserved.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 *    notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 *    notice, this list of conditions and the following disclaimer in the
50 *    documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 *    must display the following acknowledgement:
53 *	This product includes software developed by the University of
54 *	California, Berkeley and its contributors.
55 * 4. Neither the name of the University nor the names of its contributors
56 *    may be used to endorse or promote products derived from this software
57 *    without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 *	@(#)kern_time.c	8.4 (Berkeley) 5/26/95
72 */
73
74#include <sys/cdefs.h>
75__KERNEL_RCSID(0, "$NetBSD: kern_time.c,v 1.71 2003/07/17 18:16:59 fvdl Exp $");
76
77#include "fs_nfs.h"
78#include "opt_nfs.h"
79#include "opt_nfsserver.h"
80
81#include <sys/param.h>
82#include <sys/resourcevar.h>
83#include <sys/kernel.h>
84#include <sys/systm.h>
85#include <sys/malloc.h>
86#include <sys/proc.h>
87#include <sys/sa.h>
88#include <sys/savar.h>
89#include <sys/vnode.h>
90#include <sys/signalvar.h>
91#include <sys/syslog.h>
92
93#include <sys/mount.h>
94#include <sys/syscallargs.h>
95
96#include <uvm/uvm_extern.h>
97
98#if defined(NFS) || defined(NFSSERVER)
99#include <nfs/rpcv2.h>
100#include <nfs/nfsproto.h>
101#include <nfs/nfs_var.h>
102#endif
103
104#include <machine/cpu.h>
105
106static void timerupcall(struct lwp *, void *);
107
108
109/* Time of day and interval timer support.
110 *
111 * These routines provide the kernel entry points to get and set
112 * the time-of-day and per-process interval timers.  Subroutines
113 * here provide support for adding and subtracting timeval structures
114 * and decrementing interval timers, optionally reloading the interval
115 * timers when they expire.
116 */
117
118/* This function is used by clock_settime and settimeofday */
119int
120settime(struct timeval *tv)
121{
122	struct timeval delta;
123	struct cpu_info *ci;
124	int s;
125
126	/* WHAT DO WE DO ABOUT PENDING REAL-TIME TIMEOUTS??? */
127	s = splclock();
128	timersub(tv, &time, &delta);
129	if ((delta.tv_sec < 0 || delta.tv_usec < 0) && securelevel > 1) {
130		splx(s);
131		return (EPERM);
132	}
133#ifdef notyet
134	if ((delta.tv_sec < 86400) && securelevel > 0) {
135		splx(s);
136		return (EPERM);
137	}
138#endif
139	time = *tv;
140	(void) spllowersoftclock();
141	timeradd(&boottime, &delta, &boottime);
142	/*
143	 * XXXSMP
144	 * This is wrong.  We should traverse a list of all
145	 * CPUs and add the delta to the runtime of those
146	 * CPUs which have a process on them.
147	 */
148	ci = curcpu();
149	timeradd(&ci->ci_schedstate.spc_runtime, &delta,
150	    &ci->ci_schedstate.spc_runtime);
151#	if (defined(NFS) && !defined (NFS_V2_ONLY)) || defined(NFSSERVER)
152		nqnfs_lease_updatetime(delta.tv_sec);
153#	endif
154	splx(s);
155	resettodr();
156	return (0);
157}
158
159/* ARGSUSED */
160int
161sys_clock_gettime(struct lwp *l, void *v, register_t *retval)
162{
163	struct sys_clock_gettime_args /* {
164		syscallarg(clockid_t) clock_id;
165		syscallarg(struct timespec *) tp;
166	} */ *uap = v;
167	clockid_t clock_id;
168	struct timeval atv;
169	struct timespec ats;
170	int s;
171
172	clock_id = SCARG(uap, clock_id);
173	switch (clock_id) {
174	case CLOCK_REALTIME:
175		microtime(&atv);
176		TIMEVAL_TO_TIMESPEC(&atv,&ats);
177		break;
178	case CLOCK_MONOTONIC:
179		/* XXX "hz" granularity */
180		s = splclock();
181		atv = mono_time;
182		splx(s);
183		TIMEVAL_TO_TIMESPEC(&atv,&ats);
184		break;
185	default:
186		return (EINVAL);
187	}
188
189	return copyout(&ats, SCARG(uap, tp), sizeof(ats));
190}
191
192/* ARGSUSED */
193int
194sys_clock_settime(l, v, retval)
195	struct lwp *l;
196	void *v;
197	register_t *retval;
198{
199	struct sys_clock_settime_args /* {
200		syscallarg(clockid_t) clock_id;
201		syscallarg(const struct timespec *) tp;
202	} */ *uap = v;
203	struct proc *p = l->l_proc;
204	int error;
205
206	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
207		return (error);
208
209	return (clock_settime1(SCARG(uap, clock_id), SCARG(uap, tp)));
210}
211
212
213int
214clock_settime1(clock_id, tp)
215	clockid_t clock_id;
216	const struct timespec *tp;
217{
218	struct timespec ats;
219	struct timeval atv;
220	int error;
221
222	if ((error = copyin(tp, &ats, sizeof(ats))) != 0)
223		return (error);
224
225	switch (clock_id) {
226	case CLOCK_REALTIME:
227		TIMESPEC_TO_TIMEVAL(&atv, &ats);
228		if ((error = settime(&atv)) != 0)
229			return (error);
230		break;
231	case CLOCK_MONOTONIC:
232		return (EINVAL);	/* read-only clock */
233	default:
234		return (EINVAL);
235	}
236
237	return 0;
238}
239
240int
241sys_clock_getres(struct lwp *l, void *v, register_t *retval)
242{
243	struct sys_clock_getres_args /* {
244		syscallarg(clockid_t) clock_id;
245		syscallarg(struct timespec *) tp;
246	} */ *uap = v;
247	clockid_t clock_id;
248	struct timespec ts;
249	int error = 0;
250
251	clock_id = SCARG(uap, clock_id);
252	switch (clock_id) {
253	case CLOCK_REALTIME:
254	case CLOCK_MONOTONIC:
255		ts.tv_sec = 0;
256		ts.tv_nsec = 1000000000 / hz;
257		break;
258	default:
259		return (EINVAL);
260	}
261
262	if (SCARG(uap, tp))
263		error = copyout(&ts, SCARG(uap, tp), sizeof(ts));
264
265	return error;
266}
267
268/* ARGSUSED */
269int
270sys_nanosleep(struct lwp *l, void *v, register_t *retval)
271{
272	static int nanowait;
273	struct sys_nanosleep_args/* {
274		syscallarg(struct timespec *) rqtp;
275		syscallarg(struct timespec *) rmtp;
276	} */ *uap = v;
277	struct timespec rqt;
278	struct timespec rmt;
279	struct timeval atv, utv;
280	int error, s, timo;
281
282	error = copyin((caddr_t)SCARG(uap, rqtp), (caddr_t)&rqt,
283		       sizeof(struct timespec));
284	if (error)
285		return (error);
286
287	TIMESPEC_TO_TIMEVAL(&atv,&rqt)
288	if (itimerfix(&atv) || atv.tv_sec > 1000000000)
289		return (EINVAL);
290
291	s = splclock();
292	timeradd(&atv,&time,&atv);
293	timo = hzto(&atv);
294	/*
295	 * Avoid inadvertantly sleeping forever
296	 */
297	if (timo == 0)
298		timo = 1;
299	splx(s);
300
301	error = tsleep(&nanowait, PWAIT | PCATCH, "nanosleep", timo);
302	if (error == ERESTART)
303		error = EINTR;
304	if (error == EWOULDBLOCK)
305		error = 0;
306
307	if (SCARG(uap, rmtp)) {
308		int error;
309
310		s = splclock();
311		utv = time;
312		splx(s);
313
314		timersub(&atv, &utv, &utv);
315		if (utv.tv_sec < 0)
316			timerclear(&utv);
317
318		TIMEVAL_TO_TIMESPEC(&utv,&rmt);
319		error = copyout((caddr_t)&rmt, (caddr_t)SCARG(uap,rmtp),
320			sizeof(rmt));
321		if (error)
322			return (error);
323	}
324
325	return error;
326}
327
328/* ARGSUSED */
329int
330sys_gettimeofday(struct lwp *l, void *v, register_t *retval)
331{
332	struct sys_gettimeofday_args /* {
333		syscallarg(struct timeval *) tp;
334		syscallarg(struct timezone *) tzp;
335	} */ *uap = v;
336	struct timeval atv;
337	int error = 0;
338	struct timezone tzfake;
339
340	if (SCARG(uap, tp)) {
341		microtime(&atv);
342		error = copyout(&atv, SCARG(uap, tp), sizeof(atv));
343		if (error)
344			return (error);
345	}
346	if (SCARG(uap, tzp)) {
347		/*
348		 * NetBSD has no kernel notion of time zone, so we just
349		 * fake up a timezone struct and return it if demanded.
350		 */
351		tzfake.tz_minuteswest = 0;
352		tzfake.tz_dsttime = 0;
353		error = copyout(&tzfake, SCARG(uap, tzp), sizeof(tzfake));
354	}
355	return (error);
356}
357
358/* ARGSUSED */
359int
360sys_settimeofday(struct lwp *l, void *v, register_t *retval)
361{
362	struct sys_settimeofday_args /* {
363		syscallarg(const struct timeval *) tv;
364		syscallarg(const struct timezone *) tzp;
365	} */ *uap = v;
366	struct proc *p = l->l_proc;
367	int error;
368
369	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
370		return (error);
371
372	return settimeofday1(SCARG(uap, tv), SCARG(uap, tzp), p);
373}
374
375int
376settimeofday1(utv, utzp, p)
377	const struct timeval *utv;
378	const struct timezone *utzp;
379	struct proc *p;
380{
381	struct timeval atv;
382	struct timezone atz;
383	struct timeval *tv = NULL;
384	struct timezone *tzp = NULL;
385	int error;
386
387	/* Verify all parameters before changing time. */
388	if (utv) {
389		if ((error = copyin(utv, &atv, sizeof(atv))) != 0)
390			return (error);
391		tv = &atv;
392	}
393	/* XXX since we don't use tz, probably no point in doing copyin. */
394	if (utzp) {
395		if ((error = copyin(utzp, &atz, sizeof(atz))) != 0)
396			return (error);
397		tzp = &atz;
398	}
399
400	if (tv)
401		if ((error = settime(tv)) != 0)
402			return (error);
403	/*
404	 * NetBSD has no kernel notion of time zone, and only an
405	 * obsolete program would try to set it, so we log a warning.
406	 */
407	if (tzp)
408		log(LOG_WARNING, "pid %d attempted to set the "
409		    "(obsolete) kernel time zone\n", p->p_pid);
410	return (0);
411}
412
413int	tickdelta;			/* current clock skew, us. per tick */
414long	timedelta;			/* unapplied time correction, us. */
415long	bigadj = 1000000;		/* use 10x skew above bigadj us. */
416int	time_adjusted;			/* set if an adjustment is made */
417
418/* ARGSUSED */
419int
420sys_adjtime(struct lwp *l, void *v, register_t *retval)
421{
422	struct sys_adjtime_args /* {
423		syscallarg(const struct timeval *) delta;
424		syscallarg(struct timeval *) olddelta;
425	} */ *uap = v;
426	struct proc *p = l->l_proc;
427	int error;
428
429	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
430		return (error);
431
432	return adjtime1(SCARG(uap, delta), SCARG(uap, olddelta), p);
433}
434
435int
436adjtime1(delta, olddelta, p)
437	const struct timeval *delta;
438	struct timeval *olddelta;
439	struct proc *p;
440{
441	struct timeval atv;
442	long ndelta, ntickdelta, odelta;
443	int error;
444	int s;
445
446	error = copyin(delta, &atv, sizeof(struct timeval));
447	if (error)
448		return (error);
449
450	if (olddelta != NULL) {
451		if (uvm_useracc((caddr_t)olddelta,
452		    sizeof(struct timeval), B_WRITE) == FALSE)
453			return (EFAULT);
454	}
455
456	/*
457	 * Compute the total correction and the rate at which to apply it.
458	 * Round the adjustment down to a whole multiple of the per-tick
459	 * delta, so that after some number of incremental changes in
460	 * hardclock(), tickdelta will become zero, lest the correction
461	 * overshoot and start taking us away from the desired final time.
462	 */
463	ndelta = atv.tv_sec * 1000000 + atv.tv_usec;
464	if (ndelta > bigadj || ndelta < -bigadj)
465		ntickdelta = 10 * tickadj;
466	else
467		ntickdelta = tickadj;
468	if (ndelta % ntickdelta)
469		ndelta = ndelta / ntickdelta * ntickdelta;
470
471	/*
472	 * To make hardclock()'s job easier, make the per-tick delta negative
473	 * if we want time to run slower; then hardclock can simply compute
474	 * tick + tickdelta, and subtract tickdelta from timedelta.
475	 */
476	if (ndelta < 0)
477		ntickdelta = -ntickdelta;
478	if (ndelta != 0)
479		/* We need to save the system clock time during shutdown */
480		time_adjusted |= 1;
481	s = splclock();
482	odelta = timedelta;
483	timedelta = ndelta;
484	tickdelta = ntickdelta;
485	splx(s);
486
487	if (olddelta) {
488		atv.tv_sec = odelta / 1000000;
489		atv.tv_usec = odelta % 1000000;
490		(void) copyout(&atv, olddelta, sizeof(struct timeval));
491	}
492	return (0);
493}
494
495/*
496 * Interval timer support. Both the BSD getitimer() family and the POSIX
497 * timer_*() family of routines are supported.
498 *
499 * All timers are kept in an array pointed to by p_timers, which is
500 * allocated on demand - many processes don't use timers at all. The
501 * first three elements in this array are reserved for the BSD timers:
502 * element 0 is ITIMER_REAL, element 1 is ITIMER_VIRTUAL, and element
503 * 2 is ITIMER_PROF. The rest may be allocated by the timer_create()
504 * syscall.
505 *
506 * Realtime timers are kept in the ptimer structure as an absolute
507 * time; virtual time timers are kept as a linked list of deltas.
508 * Virtual time timers are processed in the hardclock() routine of
509 * kern_clock.c.  The real time timer is processed by a callout
510 * routine, called from the softclock() routine.  Since a callout may
511 * be delayed in real time due to interrupt processing in the system,
512 * it is possible for the real time timeout routine (realtimeexpire,
513 * given below), to be delayed in real time past when it is supposed
514 * to occur.  It does not suffice, therefore, to reload the real timer
515 * .it_value from the real time timers .it_interval.  Rather, we
516 * compute the next time in absolute time the timer should go off.  */
517
518/* Allocate a POSIX realtime timer. */
519int
520sys_timer_create(struct lwp *l, void *v, register_t *retval)
521{
522	struct sys_timer_create_args /* {
523		syscallarg(clockid_t) clock_id;
524		syscallarg(struct sigevent *) evp;
525		syscallarg(timer_t *) timerid;
526	} */ *uap = v;
527	struct proc *p = l->l_proc;
528	clockid_t id;
529	struct sigevent *evp;
530	struct ptimer *pt;
531	timer_t timerid;
532	int error;
533
534	id = SCARG(uap, clock_id);
535	if (id < CLOCK_REALTIME ||
536	    id > CLOCK_PROF)
537		return (EINVAL);
538
539	if (p->p_timers == NULL)
540		timers_alloc(p);
541
542	/* Find a free timer slot, skipping those reserved for setitimer(). */
543	for (timerid = 3; timerid < TIMER_MAX; timerid++)
544		if (p->p_timers->pts_timers[timerid] == NULL)
545			break;
546
547	if (timerid == TIMER_MAX)
548		return EAGAIN;
549
550	pt = pool_get(&ptimer_pool, PR_WAITOK);
551	evp = SCARG(uap, evp);
552	if (evp) {
553		if (((error =
554		    copyin(evp, &pt->pt_ev, sizeof (pt->pt_ev))) != 0) ||
555		    ((pt->pt_ev.sigev_notify < SIGEV_NONE) ||
556			(pt->pt_ev.sigev_notify > SIGEV_SA))) {
557			pool_put(&ptimer_pool, pt);
558			return (error ? error : EINVAL);
559		}
560	} else {
561		pt->pt_ev.sigev_notify = SIGEV_SIGNAL;
562		switch (id) {
563		case CLOCK_REALTIME:
564			pt->pt_ev.sigev_signo = SIGALRM;
565			break;
566		case CLOCK_VIRTUAL:
567			pt->pt_ev.sigev_signo = SIGVTALRM;
568			break;
569		case CLOCK_PROF:
570			pt->pt_ev.sigev_signo = SIGPROF;
571			break;
572		}
573		pt->pt_ev.sigev_value.sival_int = timerid;
574	}
575	pt->pt_info.si_signo = pt->pt_ev.sigev_signo;
576	pt->pt_info.si_errno = 0;
577	pt->pt_info.si_code = 0;
578	pt->pt_info.si_pid = p->p_pid;
579	pt->pt_info.si_uid = p->p_cred->p_ruid;
580	pt->pt_info.si_sigval = pt->pt_ev.sigev_value;
581
582	pt->pt_type = id;
583	pt->pt_proc = p;
584	pt->pt_overruns = 0;
585	pt->pt_poverruns = 0;
586	pt->pt_entry = timerid;
587	timerclear(&pt->pt_time.it_value);
588	if (id == CLOCK_REALTIME)
589		callout_init(&pt->pt_ch);
590	else
591		pt->pt_active = 0;
592
593	p->p_timers->pts_timers[timerid] = pt;
594
595	return copyout(&timerid, SCARG(uap, timerid), sizeof(timerid));
596}
597
598
599/* Delete a POSIX realtime timer */
600int
601sys_timer_delete(struct lwp *l, void *v, register_t *retval)
602{
603	struct sys_timer_delete_args /*  {
604		syscallarg(timer_t) timerid;
605	} */ *uap = v;
606	struct proc *p = l->l_proc;
607	timer_t timerid;
608	struct ptimer *pt, *ptn;
609	int s;
610
611	timerid = SCARG(uap, timerid);
612
613	if ((p->p_timers == NULL) ||
614	    (timerid < 2) || (timerid >= TIMER_MAX) ||
615	    ((pt = p->p_timers->pts_timers[timerid]) == NULL))
616		return (EINVAL);
617
618	if (pt->pt_type == CLOCK_REALTIME)
619		callout_stop(&pt->pt_ch);
620	else if (pt->pt_active) {
621		s = splclock();
622		ptn = LIST_NEXT(pt, pt_list);
623		LIST_REMOVE(pt, pt_list);
624		for ( ; ptn; ptn = LIST_NEXT(ptn, pt_list))
625			timeradd(&pt->pt_time.it_value, &ptn->pt_time.it_value,
626			    &ptn->pt_time.it_value);
627		splx(s);
628	}
629
630	p->p_timers->pts_timers[timerid] = NULL;
631	pool_put(&ptimer_pool, pt);
632
633	return (0);
634}
635
636/*
637 * Set up the given timer. The value in pt->pt_time.it_value is taken
638 * to be an absolute time for CLOCK_REALTIME timers and a relative
639 * time for virtual timers.
640 * Must be called at splclock().
641 */
642void
643timer_settime(struct ptimer *pt)
644{
645	struct ptimer *ptn, *pptn;
646	struct ptlist *ptl;
647
648	if (pt->pt_type == CLOCK_REALTIME) {
649		callout_stop(&pt->pt_ch);
650		if (timerisset(&pt->pt_time.it_value)) {
651			/*
652			 * Don't need to check hzto() return value, here.
653			 * callout_reset() does it for us.
654			 */
655			callout_reset(&pt->pt_ch, hzto(&pt->pt_time.it_value),
656			    realtimerexpire, pt);
657		}
658	} else {
659		if (pt->pt_active) {
660			ptn = LIST_NEXT(pt, pt_list);
661			LIST_REMOVE(pt, pt_list);
662			for ( ; ptn; ptn = LIST_NEXT(ptn, pt_list))
663				timeradd(&pt->pt_time.it_value,
664				    &ptn->pt_time.it_value,
665				    &ptn->pt_time.it_value);
666		}
667		if (timerisset(&pt->pt_time.it_value)) {
668			if (pt->pt_type == CLOCK_VIRTUAL)
669				ptl = &pt->pt_proc->p_timers->pts_virtual;
670			else
671				ptl = &pt->pt_proc->p_timers->pts_prof;
672
673			for (ptn = LIST_FIRST(ptl), pptn = NULL;
674			     ptn && timercmp(&pt->pt_time.it_value,
675				 &ptn->pt_time.it_value, >);
676			     pptn = ptn, ptn = LIST_NEXT(ptn, pt_list))
677				timersub(&pt->pt_time.it_value,
678				    &ptn->pt_time.it_value,
679				    &pt->pt_time.it_value);
680
681			if (pptn)
682				LIST_INSERT_AFTER(pptn, pt, pt_list);
683			else
684				LIST_INSERT_HEAD(ptl, pt, pt_list);
685
686			for ( ; ptn ; ptn = LIST_NEXT(ptn, pt_list))
687				timersub(&ptn->pt_time.it_value,
688				    &pt->pt_time.it_value,
689				    &ptn->pt_time.it_value);
690
691			pt->pt_active = 1;
692		} else
693			pt->pt_active = 0;
694	}
695}
696
697void
698timer_gettime(struct ptimer *pt, struct itimerval *aitv)
699{
700	struct ptimer *ptn;
701
702	*aitv = pt->pt_time;
703	if (pt->pt_type == CLOCK_REALTIME) {
704		/*
705		 * Convert from absolute to relative time in .it_value
706		 * part of real time timer.  If time for real time
707		 * timer has passed return 0, else return difference
708		 * between current time and time for the timer to go
709		 * off.
710		 */
711		if (timerisset(&aitv->it_value)) {
712			if (timercmp(&aitv->it_value, &time, <))
713				timerclear(&aitv->it_value);
714			else
715				timersub(&aitv->it_value, &time,
716				    &aitv->it_value);
717		}
718	} else if (pt->pt_active) {
719		if (pt->pt_type == CLOCK_VIRTUAL)
720			ptn = LIST_FIRST(&pt->pt_proc->p_timers->pts_virtual);
721		else
722			ptn = LIST_FIRST(&pt->pt_proc->p_timers->pts_prof);
723		for ( ; ptn && ptn != pt; ptn = LIST_NEXT(ptn, pt_list))
724			timeradd(&aitv->it_value,
725			    &ptn->pt_time.it_value, &aitv->it_value);
726		KASSERT(ptn != NULL); /* pt should be findable on the list */
727	} else
728		timerclear(&aitv->it_value);
729}
730
731
732
733/* Set and arm a POSIX realtime timer */
734int
735sys_timer_settime(struct lwp *l, void *v, register_t *retval)
736{
737	struct sys_timer_settime_args /* {
738		syscallarg(timer_t) timerid;
739		syscallarg(int) flags;
740		syscallarg(const struct itimerspec *) value;
741		syscallarg(struct itimerspec *) ovalue;
742	} */ *uap = v;
743	struct proc *p = l->l_proc;
744	int error, s, timerid;
745	struct itimerval val, oval;
746	struct itimerspec value, ovalue;
747	struct ptimer *pt;
748
749	timerid = SCARG(uap, timerid);
750
751	if ((p->p_timers == NULL) ||
752	    (timerid < 2) || (timerid >= TIMER_MAX) ||
753	    ((pt = p->p_timers->pts_timers[timerid]) == NULL))
754		return (EINVAL);
755
756	if ((error = copyin(SCARG(uap, value), &value,
757	    sizeof(struct itimerspec))) != 0)
758		return (error);
759
760	TIMESPEC_TO_TIMEVAL(&val.it_value, &value.it_value);
761	TIMESPEC_TO_TIMEVAL(&val.it_interval, &value.it_interval);
762	if (itimerfix(&val.it_value) || itimerfix(&val.it_interval))
763		return (EINVAL);
764
765	oval = pt->pt_time;
766	pt->pt_time = val;
767
768	s = splclock();
769	/*
770	 * If we've been passed a relative time for a realtime timer,
771	 * convert it to absolute; if an absolute time for a virtual
772	 * timer, convert it to relative and make sure we don't set it
773	 * to zero, which would cancel the timer, or let it go
774	 * negative, which would confuse the comparison tests.
775	 */
776	if (timerisset(&pt->pt_time.it_value)) {
777		if (pt->pt_type == CLOCK_REALTIME) {
778			if ((SCARG(uap, flags) & TIMER_ABSTIME) == 0)
779				timeradd(&pt->pt_time.it_value, &time,
780				    &pt->pt_time.it_value);
781		} else {
782			if ((SCARG(uap, flags) & TIMER_ABSTIME) != 0) {
783				timersub(&pt->pt_time.it_value, &time,
784				    &pt->pt_time.it_value);
785				if (!timerisset(&pt->pt_time.it_value) ||
786				    pt->pt_time.it_value.tv_sec < 0) {
787					pt->pt_time.it_value.tv_sec = 0;
788					pt->pt_time.it_value.tv_usec = 1;
789				}
790			}
791		}
792	}
793
794	timer_settime(pt);
795	splx(s);
796
797	if (SCARG(uap, ovalue)) {
798		TIMEVAL_TO_TIMESPEC(&oval.it_value, &ovalue.it_value);
799		TIMEVAL_TO_TIMESPEC(&oval.it_interval, &ovalue.it_interval);
800		return copyout(&ovalue, SCARG(uap, ovalue),
801		    sizeof(struct itimerspec));
802	}
803
804	return (0);
805}
806
807/* Return the time remaining until a POSIX timer fires. */
808int
809sys_timer_gettime(struct lwp *l, void *v, register_t *retval)
810{
811	struct sys_timer_gettime_args /* {
812		syscallarg(timer_t) timerid;
813		syscallarg(struct itimerspec *) value;
814	} */ *uap = v;
815	struct itimerval aitv;
816	struct itimerspec its;
817	struct proc *p = l->l_proc;
818	int s, timerid;
819	struct ptimer *pt;
820
821	timerid = SCARG(uap, timerid);
822
823	if ((p->p_timers == NULL) ||
824	    (timerid < 2) || (timerid >= TIMER_MAX) ||
825	    ((pt = p->p_timers->pts_timers[timerid]) == NULL))
826		return (EINVAL);
827
828	s = splclock();
829	timer_gettime(pt, &aitv);
830	splx(s);
831
832	TIMEVAL_TO_TIMESPEC(&aitv.it_interval, &its.it_interval);
833	TIMEVAL_TO_TIMESPEC(&aitv.it_value, &its.it_value);
834
835	return copyout(&its, SCARG(uap, value), sizeof(its));
836}
837
838/*
839 * Return the count of the number of times a periodic timer expired
840 * while a notification was already pending. The counter is reset when
841 * a timer expires and a notification can be posted.
842 */
843int
844sys_timer_getoverrun(struct lwp *l, void *v, register_t *retval)
845{
846	struct sys_timer_getoverrun_args /* {
847		syscallarg(timer_t) timerid;
848	} */ *uap = v;
849	struct proc *p = l->l_proc;
850	int timerid;
851	struct ptimer *pt;
852
853	timerid = SCARG(uap, timerid);
854
855	if ((p->p_timers == NULL) ||
856	    (timerid < 2) || (timerid >= TIMER_MAX) ||
857	    ((pt = p->p_timers->pts_timers[timerid]) == NULL))
858		return (EINVAL);
859
860	*retval = pt->pt_poverruns;
861
862	return (0);
863}
864
865/* Glue function that triggers an upcall; called from userret(). */
866static void
867timerupcall(struct lwp *l, void *arg)
868{
869	struct ptimers *pt = (struct ptimers *)arg;
870	unsigned int i, fired, done;
871	KERNEL_PROC_LOCK(l);
872
873	{
874		struct proc	*p = l->l_proc;
875		struct sadata *sa = p->p_sa;
876
877		/* Bail out if we do not own the virtual processor */
878		if (sa->sa_vp != l) {
879			KERNEL_PROC_UNLOCK(l);
880			return ;
881		}
882	}
883
884	fired = pt->pts_fired;
885	done = 0;
886	while ((i = ffs(fired)) != 0) {
887		i--;
888		if (sa_upcall(l, SA_UPCALL_SIGEV | SA_UPCALL_DEFER, NULL, l,
889		    sizeof(siginfo_t), &pt->pts_timers[i]->pt_info) == 0)
890			done |= 1 << i;
891		fired &= ~(1 << i);
892	}
893	pt->pts_fired &= ~done;
894	if (pt->pts_fired == 0)
895		l->l_proc->p_userret = NULL;
896
897	KERNEL_PROC_UNLOCK(l);
898}
899
900
901/*
902 * Real interval timer expired:
903 * send process whose timer expired an alarm signal.
904 * If time is not set up to reload, then just return.
905 * Else compute next time timer should go off which is > current time.
906 * This is where delay in processing this timeout causes multiple
907 * SIGALRM calls to be compressed into one.
908 */
909void
910realtimerexpire(void *arg)
911{
912	struct ptimer *pt;
913	int s;
914
915	pt = (struct ptimer *)arg;
916
917	itimerfire(pt);
918
919	if (!timerisset(&pt->pt_time.it_interval)) {
920		timerclear(&pt->pt_time.it_value);
921		return;
922	}
923	for (;;) {
924		s = splclock();
925		timeradd(&pt->pt_time.it_value,
926		    &pt->pt_time.it_interval, &pt->pt_time.it_value);
927		if (timercmp(&pt->pt_time.it_value, &time, >)) {
928			/*
929			 * Don't need to check hzto() return value, here.
930			 * callout_reset() does it for us.
931			 */
932			callout_reset(&pt->pt_ch, hzto(&pt->pt_time.it_value),
933			    realtimerexpire, pt);
934			splx(s);
935			return;
936		}
937		splx(s);
938		pt->pt_overruns++;
939	}
940}
941
942/* BSD routine to get the value of an interval timer. */
943/* ARGSUSED */
944int
945sys_getitimer(struct lwp *l, void *v, register_t *retval)
946{
947	struct sys_getitimer_args /* {
948		syscallarg(int) which;
949		syscallarg(struct itimerval *) itv;
950	} */ *uap = v;
951	struct proc *p = l->l_proc;
952	struct itimerval aitv;
953	int s, which;
954
955	which = SCARG(uap, which);
956
957	if ((u_int)which > ITIMER_PROF)
958		return (EINVAL);
959
960	if ((p->p_timers == NULL) || (p->p_timers->pts_timers[which] == NULL)){
961		timerclear(&aitv.it_value);
962		timerclear(&aitv.it_interval);
963	} else {
964		s = splclock();
965		timer_gettime(p->p_timers->pts_timers[which], &aitv);
966		splx(s);
967	}
968
969	return (copyout(&aitv, SCARG(uap, itv), sizeof(struct itimerval)));
970
971}
972
973/* BSD routine to set/arm an interval timer. */
974/* ARGSUSED */
975int
976sys_setitimer(struct lwp *l, void *v, register_t *retval)
977{
978	struct sys_setitimer_args /* {
979		syscallarg(int) which;
980		syscallarg(const struct itimerval *) itv;
981		syscallarg(struct itimerval *) oitv;
982	} */ *uap = v;
983	struct proc *p = l->l_proc;
984	int which = SCARG(uap, which);
985	struct sys_getitimer_args getargs;
986	struct itimerval aitv;
987	const struct itimerval *itvp;
988	struct ptimer *pt;
989	int s, error;
990
991	if ((u_int)which > ITIMER_PROF)
992		return (EINVAL);
993	itvp = SCARG(uap, itv);
994	if (itvp &&
995	    (error = copyin(itvp, &aitv, sizeof(struct itimerval)) != 0))
996		return (error);
997	if (SCARG(uap, oitv) != NULL) {
998		SCARG(&getargs, which) = which;
999		SCARG(&getargs, itv) = SCARG(uap, oitv);
1000		if ((error = sys_getitimer(l, &getargs, retval)) != 0)
1001			return (error);
1002	}
1003	if (itvp == 0)
1004		return (0);
1005	if (itimerfix(&aitv.it_value) || itimerfix(&aitv.it_interval))
1006		return (EINVAL);
1007
1008	/*
1009	 * Don't bother allocating data structures if the process just
1010	 * wants to clear the timer.
1011	 */
1012	if (!timerisset(&aitv.it_value) &&
1013	    ((p->p_timers == NULL) ||(p->p_timers->pts_timers[which] == NULL)))
1014		return (0);
1015
1016	if (p->p_timers == NULL)
1017		timers_alloc(p);
1018	if (p->p_timers->pts_timers[which] == NULL) {
1019		pt = pool_get(&ptimer_pool, PR_WAITOK);
1020		pt->pt_ev.sigev_notify = SIGEV_SIGNAL;
1021		pt->pt_overruns = 0;
1022		pt->pt_proc = p;
1023		pt->pt_type = which;
1024		pt->pt_entry = which;
1025		switch (which) {
1026		case ITIMER_REAL:
1027			callout_init(&pt->pt_ch);
1028			pt->pt_ev.sigev_signo = SIGALRM;
1029			break;
1030		case ITIMER_VIRTUAL:
1031			pt->pt_active = 0;
1032			pt->pt_ev.sigev_signo = SIGVTALRM;
1033			break;
1034		case ITIMER_PROF:
1035			pt->pt_active = 0;
1036			pt->pt_ev.sigev_signo = SIGPROF;
1037			break;
1038		}
1039	} else
1040		pt = p->p_timers->pts_timers[which];
1041
1042	pt->pt_time = aitv;
1043	p->p_timers->pts_timers[which] = pt;
1044
1045	s = splclock();
1046	if ((which == ITIMER_REAL) && timerisset(&pt->pt_time.it_value)) {
1047		/* Convert to absolute time */
1048		timeradd(&pt->pt_time.it_value, &time, &pt->pt_time.it_value);
1049	}
1050	timer_settime(pt);
1051	splx(s);
1052
1053	return (0);
1054}
1055
1056/* Utility routines to manage the array of pointers to timers. */
1057void
1058timers_alloc(struct proc *p)
1059{
1060	int i;
1061	struct ptimers *pts;
1062
1063	pts = malloc(sizeof (struct ptimers), M_SUBPROC, 0);
1064	LIST_INIT(&pts->pts_virtual);
1065	LIST_INIT(&pts->pts_prof);
1066	for (i = 0; i < TIMER_MAX; i++)
1067		pts->pts_timers[i] = NULL;
1068	pts->pts_fired = 0;
1069	p->p_timers = pts;
1070}
1071
1072/*
1073 * Clean up the per-process timers. If "which" is set to TIMERS_ALL,
1074 * then clean up all timers and free all the data structures. If
1075 * "which" is set to TIMERS_POSIX, only clean up the timers allocated
1076 * by timer_create(), not the BSD setitimer() timers, and only free the
1077 * structure if none of those remain.
1078 */
1079void
1080timers_free(struct proc *p, int which)
1081{
1082	int i, s;
1083	struct ptimers *pts;
1084	struct ptimer *pt, *ptn;
1085	struct timeval tv;
1086
1087	if (p->p_timers) {
1088		pts = p->p_timers;
1089		if (which == TIMERS_ALL)
1090			i = 0;
1091		else {
1092			s = splclock();
1093			timerclear(&tv);
1094			for (ptn = LIST_FIRST(&p->p_timers->pts_virtual);
1095			     ptn && ptn != pts->pts_timers[ITIMER_VIRTUAL];
1096			     ptn = LIST_NEXT(ptn, pt_list))
1097				timeradd(&tv, &ptn->pt_time.it_value, &tv);
1098			LIST_FIRST(&p->p_timers->pts_virtual) = NULL;
1099			if (ptn) {
1100				timeradd(&tv, &ptn->pt_time.it_value,
1101				    &ptn->pt_time.it_value);
1102				LIST_INSERT_HEAD(&p->p_timers->pts_virtual,
1103				    ptn, pt_list);
1104			}
1105
1106			timerclear(&tv);
1107			for (ptn = LIST_FIRST(&p->p_timers->pts_prof);
1108			     ptn && ptn != pts->pts_timers[ITIMER_PROF];
1109			     ptn = LIST_NEXT(ptn, pt_list))
1110				timeradd(&tv, &ptn->pt_time.it_value, &tv);
1111			LIST_FIRST(&p->p_timers->pts_prof) = NULL;
1112			if (ptn) {
1113				timeradd(&tv, &ptn->pt_time.it_value,
1114				    &ptn->pt_time.it_value);
1115				LIST_INSERT_HEAD(&p->p_timers->pts_prof, ptn,
1116				    pt_list);
1117			}
1118			splx(s);
1119			i = 3;
1120		}
1121		for ( ; i < TIMER_MAX; i++)
1122			if ((pt = pts->pts_timers[i]) != NULL) {
1123				if (pt->pt_type == CLOCK_REALTIME)
1124					callout_stop(&pt->pt_ch);
1125				pts->pts_timers[i] = NULL;
1126				pool_put(&ptimer_pool, pt);
1127			}
1128		if ((pts->pts_timers[0] == NULL) &&
1129		    (pts->pts_timers[1] == NULL) &&
1130		    (pts->pts_timers[2] == NULL)) {
1131			p->p_timers = NULL;
1132			free(pts, M_SUBPROC);
1133		}
1134	}
1135}
1136
1137/*
1138 * Check that a proposed value to load into the .it_value or
1139 * .it_interval part of an interval timer is acceptable, and
1140 * fix it to have at least minimal value (i.e. if it is less
1141 * than the resolution of the clock, round it up.)
1142 */
1143int
1144itimerfix(struct timeval *tv)
1145{
1146
1147	if (tv->tv_sec < 0 || tv->tv_usec < 0 || tv->tv_usec >= 1000000)
1148		return (EINVAL);
1149	if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < tick)
1150		tv->tv_usec = tick;
1151	return (0);
1152}
1153
1154/*
1155 * Decrement an interval timer by a specified number
1156 * of microseconds, which must be less than a second,
1157 * i.e. < 1000000.  If the timer expires, then reload
1158 * it.  In this case, carry over (usec - old value) to
1159 * reduce the value reloaded into the timer so that
1160 * the timer does not drift.  This routine assumes
1161 * that it is called in a context where the timers
1162 * on which it is operating cannot change in value.
1163 */
1164int
1165itimerdecr(struct ptimer *pt, int usec)
1166{
1167	struct itimerval *itp;
1168
1169	itp = &pt->pt_time;
1170	if (itp->it_value.tv_usec < usec) {
1171		if (itp->it_value.tv_sec == 0) {
1172			/* expired, and already in next interval */
1173			usec -= itp->it_value.tv_usec;
1174			goto expire;
1175		}
1176		itp->it_value.tv_usec += 1000000;
1177		itp->it_value.tv_sec--;
1178	}
1179	itp->it_value.tv_usec -= usec;
1180	usec = 0;
1181	if (timerisset(&itp->it_value))
1182		return (1);
1183	/* expired, exactly at end of interval */
1184expire:
1185	if (timerisset(&itp->it_interval)) {
1186		itp->it_value = itp->it_interval;
1187		itp->it_value.tv_usec -= usec;
1188		if (itp->it_value.tv_usec < 0) {
1189			itp->it_value.tv_usec += 1000000;
1190			itp->it_value.tv_sec--;
1191		}
1192		timer_settime(pt);
1193	} else
1194		itp->it_value.tv_usec = 0;		/* sec is already 0 */
1195	return (0);
1196}
1197
1198void
1199itimerfire(struct ptimer *pt)
1200{
1201	struct proc *p = pt->pt_proc;
1202#if 0
1203	int s;
1204#endif
1205	if (pt->pt_ev.sigev_notify == SIGEV_SIGNAL) {
1206		/*
1207		 * No RT signal infrastructure exists at this time;
1208		 * just post the signal number and throw away the
1209		 * value.
1210		 */
1211		if (sigismember(&p->p_sigctx.ps_siglist, pt->pt_ev.sigev_signo))
1212			pt->pt_overruns++;
1213		else {
1214			pt->pt_poverruns = pt->pt_overruns;
1215			pt->pt_overruns = 0;
1216			psignal(p, pt->pt_ev.sigev_signo);
1217		}
1218	} else if (pt->pt_ev.sigev_notify == SIGEV_SA && (p->p_flag & P_SA)) {
1219		/* Cause the process to generate an upcall when it returns. */
1220		struct sadata *sa = p->p_sa;
1221		unsigned int i;
1222
1223		if (p->p_userret == NULL) {
1224			/*
1225			 * XXX stop signals can be processed inside tsleep,
1226			 * which can be inside sa_yield's inner loop, which
1227			 * makes testing for sa_idle alone insuffucent to
1228			 * determine if we really should call setrunnable.
1229			 */
1230#if 0
1231
1232		        if ((sa->sa_idle) && (p->p_stat != SSTOP)) {
1233				SCHED_LOCK(s);
1234				setrunnable(sa->sa_idle);
1235				SCHED_UNLOCK(s);
1236			}
1237#endif
1238			pt->pt_poverruns = pt->pt_overruns;
1239			pt->pt_overruns = 0;
1240			i = 1 << pt->pt_entry;
1241			p->p_timers->pts_fired = i;
1242			p->p_userret = timerupcall;
1243			p->p_userret_arg = p->p_timers;
1244
1245			if (sa->sa_idle)
1246				wakeup(sa->sa_idle);
1247
1248		} else if (p->p_userret == timerupcall) {
1249			i = 1 << pt->pt_entry;
1250			if ((p->p_timers->pts_fired & i) == 0) {
1251				pt->pt_poverruns = pt->pt_overruns;
1252				pt->pt_overruns = 0;
1253				p->p_timers->pts_fired |= i;
1254			} else
1255				pt->pt_overruns++;
1256		} else {
1257			pt->pt_overruns++;
1258			printf("itimerfire(%d): overrun %d on timer %x (userret is %p)\n",
1259			    p->p_pid, pt->pt_overruns,
1260			    pt->pt_ev.sigev_value.sival_int,
1261			    p->p_userret);
1262		}
1263	}
1264
1265}
1266
1267/*
1268 * ratecheck(): simple time-based rate-limit checking.  see ratecheck(9)
1269 * for usage and rationale.
1270 */
1271int
1272ratecheck(struct timeval *lasttime, const struct timeval *mininterval)
1273{
1274	struct timeval tv, delta;
1275	int s, rv = 0;
1276
1277	s = splclock();
1278	tv = mono_time;
1279	splx(s);
1280
1281	timersub(&tv, lasttime, &delta);
1282
1283	/*
1284	 * check for 0,0 is so that the message will be seen at least once,
1285	 * even if interval is huge.
1286	 */
1287	if (timercmp(&delta, mininterval, >=) ||
1288	    (lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) {
1289		*lasttime = tv;
1290		rv = 1;
1291	}
1292
1293	return (rv);
1294}
1295
1296/*
1297 * ppsratecheck(): packets (or events) per second limitation.
1298 */
1299int
1300ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps)
1301{
1302	struct timeval tv, delta;
1303	int s, rv;
1304
1305	s = splclock();
1306	tv = mono_time;
1307	splx(s);
1308
1309	timersub(&tv, lasttime, &delta);
1310
1311	/*
1312	 * check for 0,0 is so that the message will be seen at least once.
1313	 * if more than one second have passed since the last update of
1314	 * lasttime, reset the counter.
1315	 *
1316	 * we do increment *curpps even in *curpps < maxpps case, as some may
1317	 * try to use *curpps for stat purposes as well.
1318	 */
1319	if ((lasttime->tv_sec == 0 && lasttime->tv_usec == 0) ||
1320	    delta.tv_sec >= 1) {
1321		*lasttime = tv;
1322		*curpps = 0;
1323	}
1324	if (maxpps < 0)
1325		rv = 1;
1326	else if (*curpps < maxpps)
1327		rv = 1;
1328	else
1329		rv = 0;
1330
1331#if 1 /*DIAGNOSTIC?*/
1332	/* be careful about wrap-around */
1333	if (*curpps + 1 > *curpps)
1334		*curpps = *curpps + 1;
1335#else
1336	/*
1337	 * assume that there's not too many calls to this function.
1338	 * not sure if the assumption holds, as it depends on *caller's*
1339	 * behavior, not the behavior of this function.
1340	 * IMHO it is wrong to make assumption on the caller's behavior,
1341	 * so the above #if is #if 1, not #ifdef DIAGNOSTIC.
1342	 */
1343	*curpps = *curpps + 1;
1344#endif
1345
1346	return (rv);
1347}
1348