thr_kern.c revision 107201
1/*
2 * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by John Birrell.
16 * 4. Neither the name of the author nor the names of any co-contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * $FreeBSD: head/lib/libkse/thread/thr_kern.c 107201 2002-11-24 06:43:21Z mini $
33 *
34 */
35#include <errno.h>
36#include <poll.h>
37#include <stdlib.h>
38#include <stdarg.h>
39#include <string.h>
40#include <unistd.h>
41#include <sys/param.h>
42#include <sys/types.h>
43#include <sys/signalvar.h>
44#include <sys/stat.h>
45#include <sys/time.h>
46#include <sys/socket.h>
47#include <sys/uio.h>
48#include <sys/syscall.h>
49#include <fcntl.h>
50#include <pthread.h>
51#include "thr_private.h"
52
53/* #define DEBUG_THREAD_KERN */
54#ifdef DEBUG_THREAD_KERN
55#define DBG_MSG		stdout_debug
56#else
57#define DBG_MSG(x...)
58#endif
59
60static int _kern_idle_running = 0;
61static struct timeval _kern_idle_timeout;
62
63/* Static function prototype definitions: */
64static void
65thread_kern_idle(void);
66
67static void
68dequeue_signals(void);
69
70static inline void
71thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in);
72
73/* Static variables: */
74static int	last_tick = 0;
75
76void
77_thread_kern_sched(void)
78{
79	struct timespec	ts;
80	struct timeval	tv;
81	struct pthread	*curthread = _get_curthread();
82	unsigned int	current_tick;
83
84	/* Get the current time of day. */
85	GET_CURRENT_TOD(tv);
86	TIMEVAL_TO_TIMESPEC(&tv, &ts);
87	current_tick = _sched_ticks;
88
89	/*
90	 * Enter a critical section.
91	 */
92	_thread_kern_kse_mailbox.km_curthread = NULL;
93
94	/*
95	 * If this thread is becoming inactive, make note of the
96	 * time.
97	 */
98	if (curthread->state != PS_RUNNING) {
99		/*
100		 * Save the current time as the time that the
101		 * thread became inactive:
102		 */
103		curthread->last_inactive = (long)current_tick;
104		if (curthread->last_inactive <
105		    curthread->last_active) {
106			/* Account for a rollover: */
107			curthread->last_inactive =+
108			    UINT_MAX + 1;
109		}
110	}
111
112	/*
113	 * Place this thread into the appropriate queue(s).
114	 */
115	switch (curthread->state) {
116	case PS_DEAD:
117	case PS_STATE_MAX: /* XXX: silences -Wall */
118	case PS_SUSPENDED:
119		/* Dead or suspended threads are not placed in any queue. */
120		break;
121	case PS_RUNNING:
122		/*
123		 * Save the current time as the time that the
124		 * thread became inactive:
125		 */
126		current_tick = _sched_ticks;
127		curthread->last_inactive = (long)current_tick;
128		if (curthread->last_inactive <
129		    curthread->last_active) {
130			/* Account for a rollover: */
131			curthread->last_inactive =+ UINT_MAX + 1;
132		}
133
134		if ((curthread->slice_usec != -1) &&
135		   (curthread->attr.sched_policy != SCHED_FIFO)) {
136			/*
137			 * Accumulate the number of microseconds for
138			 * which the current thread has run:
139			 */
140			curthread->slice_usec +=
141			    (curthread->last_inactive -
142			    curthread->last_active) *
143			    (long)_clock_res_usec;
144			/* Check for time quantum exceeded: */
145			if (curthread->slice_usec > TIMESLICE_USEC)
146				curthread->slice_usec = -1;
147		}
148
149		if (curthread->slice_usec == -1) {
150			/*
151			 * The thread exceeded its time
152			 * quantum or it yielded the CPU;
153			 * place it at the tail of the
154			 * queue for its priority.
155			 */
156			PTHREAD_PRIOQ_INSERT_TAIL(curthread);
157		} else {
158			/*
159			 * The thread hasn't exceeded its
160			 * interval.  Place it at the head
161			 * of the queue for its priority.
162			 */
163			PTHREAD_PRIOQ_INSERT_HEAD(curthread);
164		}
165		break;
166	case PS_SPINBLOCK:
167		/* Increment spinblock count. */
168		_spinblock_count++;
169		/*FALLTHROUGH*/
170	case PS_DEADLOCK:
171	case PS_JOIN:
172	case PS_MUTEX_WAIT:
173	case PS_WAIT_WAIT:
174		/* No timeouts for these states. */
175		curthread->wakeup_time.tv_sec = -1;
176		curthread->wakeup_time.tv_nsec = -1;
177
178		/* Restart the time slice. */
179		curthread->slice_usec = -1;
180
181		/* Insert into the waiting queue. */
182		PTHREAD_WAITQ_INSERT(curthread);
183		break;
184
185	case PS_COND_WAIT:
186	case PS_SLEEP_WAIT:
187		/* These states can timeout. */
188		/* Restart the time slice. */
189		curthread->slice_usec = -1;
190
191		/* Insert into the waiting queue. */
192		PTHREAD_WAITQ_INSERT(curthread);
193		break;
194	}
195
196	/* Switch into the scheduler's context. */
197	DBG_MSG("Calling _thread_enter_uts()\n");
198	_thread_enter_uts(&curthread->mailbox, &_thread_kern_kse_mailbox);
199	DBG_MSG("Returned from _thread_enter_uts, thread %p\n", curthread);
200
201	/*
202	 * This point is reached when _thread_switch() is called
203	 * to restore the state of a thread.
204	 *
205	 * This is the normal way out of the scheduler (for synchronous
206	 * switches).
207	 */
208
209	/* XXXKSE: Do this inside _thread_kern_scheduler() */
210	if (curthread->sig_defer_count == 0) {
211		if (((curthread->cancelflags &
212		    PTHREAD_AT_CANCEL_POINT) == 0) &&
213		    ((curthread->cancelflags &
214		    PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
215			/*
216			 * Stick a cancellation point at the
217			 * start of each async-cancellable
218			 * thread's resumption.
219			 *
220			 * We allow threads woken at cancel
221			 * points to do their own checks.
222			 */
223			pthread_testcancel();
224	}
225
226	if (_sched_switch_hook != NULL) {
227		/* Run the installed switch hook: */
228		thread_run_switch_hook(_last_user_thread, curthread);
229	}
230}
231
232void
233_thread_kern_scheduler(struct kse_mailbox *km)
234{
235	struct timespec	ts;
236	struct timeval	tv;
237	pthread_t	td, pthread, pthread_h;
238	unsigned int	current_tick;
239	struct kse_thr_mailbox	*tm, *p;
240
241	DBG_MSG("entering\n");
242	while (!TAILQ_EMPTY(&_thread_list)) {
243
244		/* Get the current time of day. */
245		ts = km->km_timeofday;
246		TIMESPEC_TO_TIMEVAL(&_sched_tod, &ts);
247		current_tick = _sched_ticks;
248
249		/*
250		 * Pick up threads that had blocked in the kernel and
251		 * have now completed their trap (syscall, vm fault, etc).
252		 * These threads were PS_RUNNING (and still are), but they
253		 * need to be added to the run queue so that they can be
254		 * scheduled again.
255		 */
256		DBG_MSG("Picking up km_completed\n");
257		p = km->km_completed;
258		km->km_completed = NULL;	/* XXX: Atomic xchg here. */
259		while ((tm = p) != NULL) {
260			p = tm->tm_next;
261			tm->tm_next = NULL;
262			if (tm->tm_udata == NULL) {
263				DBG_MSG("\tidle context\n");
264				_kern_idle_running = 0;
265				continue;
266			}
267			DBG_MSG("\tmailbox=%p pthread=%p\n", tm, tm->tm_udata);
268			PTHREAD_PRIOQ_INSERT_TAIL((pthread_t)tm->tm_udata);
269		}
270
271		/* Deliver posted signals. */
272		/* XXX: Not yet. */
273		DBG_MSG("Picking up signals\n");
274
275		if (_spinblock_count != 0) {
276			/*
277			 * Enter a loop to look for threads waiting on
278			 * a spinlock that is now available.
279			 */
280			PTHREAD_WAITQ_SETACTIVE();
281			TAILQ_FOREACH(pthread, &_workq, qe) {
282				if (pthread->state == PS_SPINBLOCK) {
283					/*
284					 * If the lock is available, let the
285					 * thread run.
286					 */
287					if (pthread->data.spinlock->
288					    access_lock == 0) {
289						PTHREAD_WAITQ_CLEARACTIVE();
290						PTHREAD_WORKQ_REMOVE(pthread);
291						PTHREAD_NEW_STATE(pthread,
292						    PS_RUNNING);
293						PTHREAD_WAITQ_SETACTIVE();
294
295						/*
296						 * One less thread in a
297						 * spinblock state:
298						 */
299						_spinblock_count--;
300					}
301				}
302			}
303			PTHREAD_WAITQ_CLEARACTIVE();
304		}
305
306		/* Wake up threads that have timed out.  */
307		DBG_MSG("setactive\n");
308		PTHREAD_WAITQ_SETACTIVE();
309		DBG_MSG("Picking up timeouts (%x)\n", TAILQ_FIRST(&_waitingq));
310		while (((pthread = TAILQ_FIRST(&_waitingq)) != NULL) &&
311		    (pthread->wakeup_time.tv_sec != -1) &&
312		    (((pthread->wakeup_time.tv_sec == 0) &&
313		    (pthread->wakeup_time.tv_nsec == 0)) ||
314		    (pthread->wakeup_time.tv_sec < ts.tv_sec) ||
315		    ((pthread->wakeup_time.tv_sec == ts.tv_sec) &&
316		    (pthread->wakeup_time.tv_nsec <= ts.tv_nsec)))) {
317			DBG_MSG("\t...\n");
318			/*
319			 * Remove this thread from the waiting queue
320			 * (and work queue if necessary) and place it
321			 * in the ready queue.
322			 */
323			PTHREAD_WAITQ_CLEARACTIVE();
324			if (pthread->flags & PTHREAD_FLAGS_IN_WORKQ)
325				PTHREAD_WORKQ_REMOVE(pthread);
326			DBG_MSG("\twaking thread\n");
327			PTHREAD_NEW_STATE(pthread, PS_RUNNING);
328			PTHREAD_WAITQ_SETACTIVE();
329			/*
330			 * Flag the timeout in the thread structure:
331			 */
332			pthread->timeout = 1;
333		}
334		DBG_MSG("clearactive\n");
335		PTHREAD_WAITQ_CLEARACTIVE();
336
337		/*
338		 * Get the highest priority thread in the ready queue.
339		 */
340		DBG_MSG("Selecting thread\n");
341		pthread_h = PTHREAD_PRIOQ_FIRST();
342
343		/* Check if there are no threads ready to run: */
344		if (pthread_h) {
345			DBG_MSG("Scheduling thread\n");
346			/* Remove the thread from the ready queue: */
347			PTHREAD_PRIOQ_REMOVE(pthread_h);
348
349			/* Make the selected thread the current thread: */
350			_set_curthread(pthread_h);
351
352			/*
353			 * Save the current time as the time that the thread
354			 * became active:
355			 */
356			current_tick = _sched_ticks;
357			pthread_h->last_active = (long) current_tick;
358
359			/*
360			 * Check if this thread is running for the first time
361			 * or running again after using its full time slice
362			 * allocation:
363			 */
364			if (pthread_h->slice_usec == -1) {
365				/* Reset the accumulated time slice period: */
366				pthread_h->slice_usec = 0;
367			}
368
369			/*
370			 * If we had a context switch, run any
371			 * installed switch hooks.
372			 */
373			if ((_sched_switch_hook != NULL) &&
374			    (_last_user_thread != pthread_h)) {
375				thread_run_switch_hook(_last_user_thread,
376				    pthread_h);
377			}
378			/*
379			 * Continue the thread at its current frame:
380			 */
381			_last_user_thread = td;
382			DBG_MSG("switch in\n");
383			_thread_switch(&pthread_h->mailbox,
384			    &_thread_kern_kse_mailbox.km_curthread);
385			DBG_MSG("switch out\n");
386		} else {
387			/*
388			 * There is nothing for us to do. Either
389			 * yield, or idle until something wakes up.
390			 */
391			DBG_MSG("No runnable threads, idling.\n");
392			if (_kern_idle_running) {
393				DBG_MSG("kse_release");
394				kse_release();
395			}
396			_kern_idle_running = 1;
397			if ((pthread == NULL) ||
398			    (pthread->wakeup_time.tv_sec == -1))
399				/*
400				 * Nothing is waiting on a timeout, so
401				 * idling gains us nothing; spin.
402				 */
403				continue;
404			TIMESPEC_TO_TIMEVAL(&_kern_idle_timeout,
405			    &pthread->wakeup_time);
406			_thread_switch(&_idle_thr_mailbox,
407			    &_thread_kern_kse_mailbox.km_curthread);
408		}
409		DBG_MSG("looping\n");
410	}
411	/* There are no threads; exit. */
412	DBG_MSG("No threads, exiting.\n");
413	exit(0);
414}
415
416void
417_thread_kern_sched_state(enum pthread_state state, char *fname, int lineno)
418{
419	struct pthread	*curthread = _get_curthread();
420
421	/*
422	 * Flag the pthread kernel as executing scheduler code
423	 * to avoid an upcall from interrupting this execution
424	 * and calling the scheduler again.
425	 */
426	_thread_kern_kse_mailbox.km_curthread = NULL;
427
428	/* Change the state of the current thread: */
429	curthread->state = state;
430	curthread->fname = fname;
431	curthread->lineno = lineno;
432
433	/* Schedule the next thread that is ready: */
434	_thread_kern_sched();
435}
436
437void
438_thread_kern_sched_state_unlock(enum pthread_state state,
439    spinlock_t *lock, char *fname, int lineno)
440{
441	struct pthread	*curthread = _get_curthread();
442
443	/*
444	 * Flag the pthread kernel as executing scheduler code
445	 * to avoid an upcall from interrupting this execution
446	 * and calling the scheduler again.
447	 */
448	_thread_kern_kse_mailbox.km_curthread = NULL;
449
450	/* Change the state of the current thread: */
451	curthread->state = state;
452	curthread->fname = fname;
453	curthread->lineno = lineno;
454
455	_SPINUNLOCK(lock);
456
457	/* Schedule the next thread that is ready: */
458	_thread_kern_sched();
459}
460
461/*
462 * Block until the next timeout.
463 */
464void
465_thread_kern_idle(void)
466{
467	struct timespec ts;
468	struct timeval timeout;
469
470	for (;;) {
471		timersub(&_kern_idle_timeout, &_sched_tod, &timeout);
472		TIMEVAL_TO_TIMESPEC(&timeout, &ts);
473		__sys_nanosleep(&ts, NULL);
474	}
475}
476
477void
478_thread_kern_set_timeout(const struct timespec * timeout)
479{
480	struct pthread	*curthread = _get_curthread();
481	struct timespec current_time;
482	struct timeval  tv;
483
484	/* Reset the timeout flag for the running thread: */
485	curthread->timeout = 0;
486
487	/* Check if the thread is to wait forever: */
488	if (timeout == NULL) {
489		/*
490		 * Set the wakeup time to something that can be recognised as
491		 * different to an actual time of day:
492		 */
493		curthread->wakeup_time.tv_sec = -1;
494		curthread->wakeup_time.tv_nsec = -1;
495	}
496	/* Check if no waiting is required: */
497	else if (timeout->tv_sec == 0 && timeout->tv_nsec == 0) {
498		/* Set the wake up time to 'immediately': */
499		curthread->wakeup_time.tv_sec = 0;
500		curthread->wakeup_time.tv_nsec = 0;
501	} else {
502		/* Get the current time: */
503		GET_CURRENT_TOD(tv);
504		TIMEVAL_TO_TIMESPEC(&tv, &current_time);
505
506		/* Calculate the time for the current thread to wake up: */
507		curthread->wakeup_time.tv_sec = current_time.tv_sec + timeout->tv_sec;
508		curthread->wakeup_time.tv_nsec = current_time.tv_nsec + timeout->tv_nsec;
509
510		/* Check if the nanosecond field needs to wrap: */
511		if (curthread->wakeup_time.tv_nsec >= 1000000000) {
512			/* Wrap the nanosecond field: */
513			curthread->wakeup_time.tv_sec += 1;
514			curthread->wakeup_time.tv_nsec -= 1000000000;
515		}
516	}
517}
518
519void
520_thread_kern_sig_defer(void)
521{
522	struct pthread	*curthread = _get_curthread();
523
524	/* Allow signal deferral to be recursive. */
525	curthread->sig_defer_count++;
526}
527
528void
529_thread_kern_sig_undefer(void)
530{
531	struct pthread	*curthread = _get_curthread();
532
533	/*
534	 * Perform checks to yield only if we are about to undefer
535	 * signals.
536	 */
537	if (curthread->sig_defer_count > 1) {
538		/* Decrement the signal deferral count. */
539		curthread->sig_defer_count--;
540	}
541	else if (curthread->sig_defer_count == 1) {
542		/* Reenable signals: */
543		curthread->sig_defer_count = 0;
544
545		/*
546		 * Check for asynchronous cancellation before delivering any
547		 * pending signals:
548		 */
549		if (((curthread->cancelflags & PTHREAD_AT_CANCEL_POINT) == 0) &&
550		    ((curthread->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
551			pthread_testcancel();
552	}
553}
554
555static inline void
556thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in)
557{
558	pthread_t tid_out = thread_out;
559	pthread_t tid_in = thread_in;
560
561	if ((tid_out != NULL) &&
562	    (tid_out->flags & PTHREAD_FLAGS_PRIVATE) != 0)
563		tid_out = NULL;
564	if ((tid_in != NULL) &&
565	    (tid_in->flags & PTHREAD_FLAGS_PRIVATE) != 0)
566		tid_in = NULL;
567
568	if ((_sched_switch_hook != NULL) && (tid_out != tid_in)) {
569		/* Run the scheduler switch hook: */
570		_sched_switch_hook(tid_out, tid_in);
571	}
572}
573
574struct pthread *
575_get_curthread(void)
576{
577	if (_thread_initial == NULL)
578		_thread_init();
579
580	return (_thread_run);
581}
582
583void
584_set_curthread(struct pthread *newthread)
585{
586	_thread_run = newthread;
587}
588