thr_kern.c revision 71581
1/*
2 * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by John Birrell.
16 * 4. Neither the name of the author nor the names of any co-contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * $FreeBSD: head/lib/libkse/thread/thr_kern.c 71581 2001-01-24 13:03:38Z deischen $
33 *
34 */
35#include <errno.h>
36#include <poll.h>
37#include <stdlib.h>
38#include <stdarg.h>
39#include <string.h>
40#include <unistd.h>
41#include <setjmp.h>
42#include <sys/param.h>
43#include <sys/types.h>
44#include <sys/signalvar.h>
45#include <sys/stat.h>
46#include <sys/time.h>
47#include <sys/socket.h>
48#include <sys/uio.h>
49#include <sys/syscall.h>
50#include <fcntl.h>
51#include <pthread.h>
52#include "pthread_private.h"
53
54/* #define DEBUG_THREAD_KERN */
55#ifdef DEBUG_THREAD_KERN
56#define DBG_MSG		stdout_debug
57#else
58#define DBG_MSG(x...)
59#endif
60
61/* Static function prototype definitions: */
62static void
63thread_kern_poll(int wait_reqd);
64
65static void
66dequeue_signals(void);
67
68static inline void
69thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in);
70
71/* Static variables: */
72static int	last_tick = 0;
73static int	called_from_handler = 0;
74
75/*
76 * This is called when a signal handler finishes and wants to
77 * return to a previous frame.
78 */
79void
80_thread_kern_sched_frame(struct pthread_signal_frame *psf)
81{
82	struct pthread	*curthread = _get_curthread();
83
84	/*
85	 * Flag the pthread kernel as executing scheduler code
86	 * to avoid a signal from interrupting this execution and
87	 * corrupting the (soon-to-be) current frame.
88	 */
89	_thread_kern_in_sched = 1;
90
91	/* Restore the signal frame: */
92	_thread_sigframe_restore(curthread, psf);
93
94	/* The signal mask was restored; check for any pending signals: */
95	curthread->check_pending = 1;
96
97	/* Switch to the thread scheduler: */
98	___longjmp(_thread_kern_sched_jb, 1);
99}
100
101
102void
103_thread_kern_sched(ucontext_t *scp)
104{
105	struct pthread	*curthread = _get_curthread();
106
107	/*
108	 * Flag the pthread kernel as executing scheduler code
109	 * to avoid a scheduler signal from interrupting this
110	 * execution and calling the scheduler again.
111	 */
112	_thread_kern_in_sched = 1;
113
114	/* Check if this function was called from the signal handler: */
115	if (scp != NULL) {
116		called_from_handler = 1;
117		DBG_MSG("Entering scheduler due to signal\n");
118	} else {
119		/* Save the state of the current thread: */
120		if (_setjmp(curthread->ctx.jb) == 0) {
121			/* Flag the jump buffer was the last state saved: */
122			curthread->ctxtype = CTX_JB_NOSIG;
123			curthread->longjmp_val = 1;
124		} else {
125			DBG_MSG("Returned from ___longjmp, thread %p\n",
126			    curthread);
127			/*
128			 * This point is reached when a longjmp() is called
129			 * to restore the state of a thread.
130			 *
131			 * This is the normal way out of the scheduler.
132			 */
133			_thread_kern_in_sched = 0;
134
135			if (curthread->sig_defer_count == 0) {
136				if (((curthread->cancelflags &
137				    PTHREAD_AT_CANCEL_POINT) == 0) &&
138				    ((curthread->cancelflags &
139				    PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
140					/*
141					 * Cancellations override signals.
142					 *
143					 * Stick a cancellation point at the
144					 * start of each async-cancellable
145					 * thread's resumption.
146					 *
147					 * We allow threads woken at cancel
148					 * points to do their own checks.
149					 */
150					pthread_testcancel();
151			}
152
153			if (_sched_switch_hook != NULL) {
154				/* Run the installed switch hook: */
155				thread_run_switch_hook(_last_user_thread,
156				    curthread);
157			}
158			return;
159		}
160	}
161	/* Switch to the thread scheduler: */
162	___longjmp(_thread_kern_sched_jb, 1);
163}
164
165void
166_thread_kern_sched_sig(void)
167{
168	struct pthread	*curthread = _get_curthread();
169
170	curthread->check_pending = 1;
171	_thread_kern_sched(NULL);
172}
173
174
175void
176_thread_kern_scheduler(void)
177{
178	struct timespec	ts;
179	struct timeval	tv;
180	struct pthread	*curthread = _get_curthread();
181	pthread_t	pthread, pthread_h;
182	unsigned int	current_tick;
183	int		add_to_prioq;
184
185	/* If the currently running thread is a user thread, save it: */
186	if ((curthread->flags & PTHREAD_FLAGS_PRIVATE) == 0)
187		_last_user_thread = curthread;
188
189	if (called_from_handler != 0) {
190		called_from_handler = 0;
191
192		/*
193		 * The signal handler should have saved the state of
194		 * the current thread.  Restore the process signal
195		 * mask.
196		 */
197		if (__sys_sigprocmask(SIG_SETMASK,
198		    &_process_sigmask, NULL) != 0)
199			PANIC("Unable to restore process mask after signal");
200
201		/*
202		 * Since the signal handler didn't return normally, we
203		 * have to tell the kernel to reuse the signal stack.
204		 */
205		if (__sys_sigaltstack(&_thread_sigstack, NULL) != 0)
206			PANIC("Unable to restore alternate signal stack");
207	}
208
209	/* Are there pending signals for this thread? */
210	if (curthread->check_pending != 0) {
211		curthread->check_pending = 0;
212		_thread_sig_check_pending(curthread);
213	}
214
215	/*
216	 * Enter a scheduling loop that finds the next thread that is
217	 * ready to run. This loop completes when there are no more threads
218	 * in the global list or when a thread has its state restored by
219	 * either a sigreturn (if the state was saved as a sigcontext) or a
220	 * longjmp (if the state was saved by a setjmp).
221	 */
222	while (!(TAILQ_EMPTY(&_thread_list))) {
223		/* Get the current time of day: */
224		GET_CURRENT_TOD(tv);
225		TIMEVAL_TO_TIMESPEC(&tv, &ts);
226		current_tick = _sched_ticks;
227
228		/*
229		 * Protect the scheduling queues from access by the signal
230		 * handler.
231		 */
232		_queue_signals = 1;
233		add_to_prioq = 0;
234
235		if (curthread != &_thread_kern_thread) {
236			/*
237			 * This thread no longer needs to yield the CPU.
238			 */
239			curthread->yield_on_sig_undefer = 0;
240
241			if (curthread->state != PS_RUNNING) {
242				/*
243				 * Save the current time as the time that the
244				 * thread became inactive:
245				 */
246				curthread->last_inactive = (long)current_tick;
247				if (curthread->last_inactive <
248				    curthread->last_active) {
249					/* Account for a rollover: */
250					curthread->last_inactive =+
251					    UINT_MAX + 1;
252				}
253			}
254
255			/*
256			 * Place the currently running thread into the
257			 * appropriate queue(s).
258			 */
259			switch (curthread->state) {
260			case PS_DEAD:
261			case PS_STATE_MAX: /* to silence -Wall */
262			case PS_SUSPENDED:
263				/*
264				 * Dead and suspended threads are not placed
265				 * in any queue:
266				 */
267				break;
268
269			case PS_RUNNING:
270				/*
271				 * Runnable threads can't be placed in the
272				 * priority queue until after waiting threads
273				 * are polled (to preserve round-robin
274				 * scheduling).
275				 */
276				add_to_prioq = 1;
277				break;
278
279			/*
280			 * States which do not depend on file descriptor I/O
281			 * operations or timeouts:
282			 */
283			case PS_DEADLOCK:
284			case PS_FDLR_WAIT:
285			case PS_FDLW_WAIT:
286			case PS_FILE_WAIT:
287			case PS_JOIN:
288			case PS_MUTEX_WAIT:
289			case PS_SIGSUSPEND:
290			case PS_SIGTHREAD:
291			case PS_SIGWAIT:
292			case PS_WAIT_WAIT:
293				/* No timeouts for these states: */
294				curthread->wakeup_time.tv_sec = -1;
295				curthread->wakeup_time.tv_nsec = -1;
296
297				/* Restart the time slice: */
298				curthread->slice_usec = -1;
299
300				/* Insert into the waiting queue: */
301				PTHREAD_WAITQ_INSERT(curthread);
302				break;
303
304			/* States which can timeout: */
305			case PS_COND_WAIT:
306			case PS_SLEEP_WAIT:
307				/* Restart the time slice: */
308				curthread->slice_usec = -1;
309
310				/* Insert into the waiting queue: */
311				PTHREAD_WAITQ_INSERT(curthread);
312				break;
313
314			/* States that require periodic work: */
315			case PS_SPINBLOCK:
316				/* No timeouts for this state: */
317				curthread->wakeup_time.tv_sec = -1;
318				curthread->wakeup_time.tv_nsec = -1;
319
320				/* Increment spinblock count: */
321				_spinblock_count++;
322
323				/* FALLTHROUGH */
324			case PS_FDR_WAIT:
325			case PS_FDW_WAIT:
326			case PS_POLL_WAIT:
327			case PS_SELECT_WAIT:
328				/* Restart the time slice: */
329				curthread->slice_usec = -1;
330
331				/* Insert into the waiting queue: */
332				PTHREAD_WAITQ_INSERT(curthread);
333
334				/* Insert into the work queue: */
335				PTHREAD_WORKQ_INSERT(curthread);
336				break;
337			}
338		}
339
340		/*
341		 * Avoid polling file descriptors if there are none
342		 * waiting:
343		 */
344		if (TAILQ_EMPTY(&_workq) != 0) {
345		}
346		/*
347		 * Poll file descriptors only if a new scheduling signal
348		 * has occurred or if we have no more runnable threads.
349		 */
350		else if (((current_tick = _sched_ticks) != last_tick) ||
351		    ((curthread->state != PS_RUNNING) &&
352		    (PTHREAD_PRIOQ_FIRST() == NULL))) {
353			/* Unprotect the scheduling queues: */
354			_queue_signals = 0;
355
356			/*
357			 * Poll file descriptors to update the state of threads
358			 * waiting on file I/O where data may be available:
359			 */
360			thread_kern_poll(0);
361
362			/* Protect the scheduling queues: */
363			_queue_signals = 1;
364		}
365		last_tick = current_tick;
366
367		/*
368		 * Wake up threads that have timedout.  This has to be
369		 * done after polling in case a thread does a poll or
370		 * select with zero time.
371		 */
372		PTHREAD_WAITQ_SETACTIVE();
373		while (((pthread = TAILQ_FIRST(&_waitingq)) != NULL) &&
374		    (pthread->wakeup_time.tv_sec != -1) &&
375		    (((pthread->wakeup_time.tv_sec == 0) &&
376		    (pthread->wakeup_time.tv_nsec == 0)) ||
377		    (pthread->wakeup_time.tv_sec < ts.tv_sec) ||
378		    ((pthread->wakeup_time.tv_sec == ts.tv_sec) &&
379		    (pthread->wakeup_time.tv_nsec <= ts.tv_nsec)))) {
380			switch (pthread->state) {
381			case PS_POLL_WAIT:
382			case PS_SELECT_WAIT:
383				/* Return zero file descriptors ready: */
384				pthread->data.poll_data->nfds = 0;
385				/* fall through */
386			default:
387				/*
388				 * Remove this thread from the waiting queue
389				 * (and work queue if necessary) and place it
390				 * in the ready queue.
391				 */
392				PTHREAD_WAITQ_CLEARACTIVE();
393				if (pthread->flags & PTHREAD_FLAGS_IN_WORKQ)
394					PTHREAD_WORKQ_REMOVE(pthread);
395				PTHREAD_NEW_STATE(pthread, PS_RUNNING);
396				PTHREAD_WAITQ_SETACTIVE();
397				break;
398			}
399			/*
400			 * Flag the timeout in the thread structure:
401			 */
402			pthread->timeout = 1;
403		}
404		PTHREAD_WAITQ_CLEARACTIVE();
405
406		/*
407		 * Check to see if the current thread needs to be added
408		 * to the priority queue:
409		 */
410		if (add_to_prioq != 0) {
411			/*
412			 * Save the current time as the time that the
413			 * thread became inactive:
414			 */
415			current_tick = _sched_ticks;
416			curthread->last_inactive = (long)current_tick;
417			if (curthread->last_inactive <
418			    curthread->last_active) {
419				/* Account for a rollover: */
420				curthread->last_inactive =+ UINT_MAX + 1;
421			}
422
423			if ((curthread->slice_usec != -1) &&
424		 	   (curthread->attr.sched_policy != SCHED_FIFO)) {
425				/*
426				 * Accumulate the number of microseconds for
427				 * which the current thread has run:
428				 */
429				curthread->slice_usec +=
430				    (curthread->last_inactive -
431				    curthread->last_active) *
432				    (long)_clock_res_usec;
433				/* Check for time quantum exceeded: */
434				if (curthread->slice_usec > TIMESLICE_USEC)
435					curthread->slice_usec = -1;
436			}
437
438			if (curthread->slice_usec == -1) {
439				/*
440				 * The thread exceeded its time
441				 * quantum or it yielded the CPU;
442				 * place it at the tail of the
443				 * queue for its priority.
444				 */
445				PTHREAD_PRIOQ_INSERT_TAIL(curthread);
446			} else {
447				/*
448				 * The thread hasn't exceeded its
449				 * interval.  Place it at the head
450				 * of the queue for its priority.
451				 */
452				PTHREAD_PRIOQ_INSERT_HEAD(curthread);
453			}
454		}
455
456		/*
457		 * Get the highest priority thread in the ready queue.
458		 */
459		pthread_h = PTHREAD_PRIOQ_FIRST();
460
461		/* Check if there are no threads ready to run: */
462		if (pthread_h == NULL) {
463			/*
464			 * Lock the pthread kernel by changing the pointer to
465			 * the running thread to point to the global kernel
466			 * thread structure:
467			 */
468			_set_curthread(&_thread_kern_thread);
469			curthread = &_thread_kern_thread;
470
471			DBG_MSG("No runnable threads, using kernel thread %p\n",
472			    curthread);
473
474			/* Unprotect the scheduling queues: */
475			_queue_signals = 0;
476
477			/*
478			 * There are no threads ready to run, so wait until
479			 * something happens that changes this condition:
480			 */
481			thread_kern_poll(1);
482
483			/*
484			 * This process' usage will likely be very small
485			 * while waiting in a poll.  Since the scheduling
486			 * clock is based on the profiling timer, it is
487			 * unlikely that the profiling timer will fire
488			 * and update the time of day.  To account for this,
489			 * get the time of day after polling with a timeout.
490			 */
491			gettimeofday((struct timeval *) &_sched_tod, NULL);
492
493			/* Check once more for a runnable thread: */
494			_queue_signals = 1;
495			pthread_h = PTHREAD_PRIOQ_FIRST();
496			_queue_signals = 0;
497		}
498
499		if (pthread_h != NULL) {
500			/* Remove the thread from the ready queue: */
501			PTHREAD_PRIOQ_REMOVE(pthread_h);
502
503			/* Unprotect the scheduling queues: */
504			_queue_signals = 0;
505
506			/*
507			 * Check for signals queued while the scheduling
508			 * queues were protected:
509			 */
510			while (_sigq_check_reqd != 0) {
511				/* Clear before handling queued signals: */
512				_sigq_check_reqd = 0;
513
514				/* Protect the scheduling queues again: */
515				_queue_signals = 1;
516
517				dequeue_signals();
518
519				/*
520				 * Check for a higher priority thread that
521				 * became runnable due to signal handling.
522				 */
523				if (((pthread = PTHREAD_PRIOQ_FIRST()) != NULL) &&
524				    (pthread->active_priority > pthread_h->active_priority)) {
525					/* Remove the thread from the ready queue: */
526					PTHREAD_PRIOQ_REMOVE(pthread);
527
528					/*
529					 * Insert the lower priority thread
530					 * at the head of its priority list:
531					 */
532					PTHREAD_PRIOQ_INSERT_HEAD(pthread_h);
533
534					/* There's a new thread in town: */
535					pthread_h = pthread;
536				}
537
538				/* Unprotect the scheduling queues: */
539				_queue_signals = 0;
540			}
541
542			/* Make the selected thread the current thread: */
543			_set_curthread(pthread_h);
544			curthread = pthread_h;
545
546			/*
547			 * Save the current time as the time that the thread
548			 * became active:
549			 */
550			current_tick = _sched_ticks;
551			curthread->last_active = (long) current_tick;
552
553			/*
554			 * Check if this thread is running for the first time
555			 * or running again after using its full time slice
556			 * allocation:
557			 */
558			if (curthread->slice_usec == -1) {
559				/* Reset the accumulated time slice period: */
560				curthread->slice_usec = 0;
561			}
562
563			/*
564			 * If we had a context switch, run any
565			 * installed switch hooks.
566			 */
567			if ((_sched_switch_hook != NULL) &&
568			    (_last_user_thread != curthread)) {
569				thread_run_switch_hook(_last_user_thread,
570				    curthread);
571			}
572			/*
573			 * Continue the thread at its current frame:
574			 */
575			switch(curthread->ctxtype) {
576			case CTX_JB_NOSIG:
577				___longjmp(curthread->ctx.jb,
578				    curthread->longjmp_val);
579				break;
580			case CTX_JB:
581				__longjmp(curthread->ctx.jb,
582				    curthread->longjmp_val);
583				break;
584			case CTX_SJB:
585				__siglongjmp(curthread->ctx.sigjb,
586				    curthread->longjmp_val);
587				break;
588			case CTX_UC:
589				/* XXX - Restore FP regsisters? */
590				FP_RESTORE_UC(&curthread->ctx.uc);
591
592				/*
593				 * Do a sigreturn to restart the thread that
594				 * was interrupted by a signal:
595				 */
596				_thread_kern_in_sched = 0;
597
598#if NOT_YET
599				_setcontext(&curthread->ctx.uc);
600#else
601				/*
602				 * Ensure the process signal mask is set
603				 * correctly:
604				 */
605				curthread->ctx.uc.uc_sigmask =
606				    _process_sigmask;
607				__sys_sigreturn(&curthread->ctx.uc);
608#endif
609				break;
610			}
611			/* This point should not be reached. */
612			PANIC("Thread has returned from sigreturn or longjmp");
613		}
614	}
615
616	/* There are no more threads, so exit this process: */
617	exit(0);
618}
619
620void
621_thread_kern_sched_state(enum pthread_state state, char *fname, int lineno)
622{
623	struct pthread	*curthread = _get_curthread();
624
625	/*
626	 * Flag the pthread kernel as executing scheduler code
627	 * to avoid a scheduler signal from interrupting this
628	 * execution and calling the scheduler again.
629	 */
630	_thread_kern_in_sched = 1;
631
632	/*
633	 * Prevent the signal handler from fiddling with this thread
634	 * before its state is set and is placed into the proper queue.
635	 */
636	_queue_signals = 1;
637
638	/* Change the state of the current thread: */
639	curthread->state = state;
640	curthread->fname = fname;
641	curthread->lineno = lineno;
642
643	/* Schedule the next thread that is ready: */
644	_thread_kern_sched(NULL);
645}
646
647void
648_thread_kern_sched_state_unlock(enum pthread_state state,
649    spinlock_t *lock, char *fname, int lineno)
650{
651	struct pthread	*curthread = _get_curthread();
652
653	/*
654	 * Flag the pthread kernel as executing scheduler code
655	 * to avoid a scheduler signal from interrupting this
656	 * execution and calling the scheduler again.
657	 */
658	_thread_kern_in_sched = 1;
659
660	/*
661	 * Prevent the signal handler from fiddling with this thread
662	 * before its state is set and it is placed into the proper
663	 * queue(s).
664	 */
665	_queue_signals = 1;
666
667	/* Change the state of the current thread: */
668	curthread->state = state;
669	curthread->fname = fname;
670	curthread->lineno = lineno;
671
672	_SPINUNLOCK(lock);
673
674	/* Schedule the next thread that is ready: */
675	_thread_kern_sched(NULL);
676}
677
678static void
679thread_kern_poll(int wait_reqd)
680{
681	int             count = 0;
682	int             i, found;
683	int		kern_pipe_added = 0;
684	int             nfds = 0;
685	int		timeout_ms = 0;
686	struct pthread	*pthread;
687	struct timespec ts;
688	struct timeval  tv;
689
690	/* Check if the caller wants to wait: */
691	if (wait_reqd == 0) {
692		timeout_ms = 0;
693	}
694	else {
695		/* Get the current time of day: */
696		GET_CURRENT_TOD(tv);
697		TIMEVAL_TO_TIMESPEC(&tv, &ts);
698
699		_queue_signals = 1;
700		pthread = TAILQ_FIRST(&_waitingq);
701		_queue_signals = 0;
702
703		if ((pthread == NULL) || (pthread->wakeup_time.tv_sec == -1)) {
704			/*
705			 * Either there are no threads in the waiting queue,
706			 * or there are no threads that can timeout.
707			 */
708			timeout_ms = INFTIM;
709		}
710		else {
711			/*
712			 * Calculate the time left for the next thread to
713			 * timeout:
714			 */
715			timeout_ms = ((pthread->wakeup_time.tv_sec - ts.tv_sec) *
716			    1000) + ((pthread->wakeup_time.tv_nsec - ts.tv_nsec) /
717			    1000000);
718			/*
719			 * Don't allow negative timeouts:
720			 */
721			if (timeout_ms < 0)
722				timeout_ms = 0;
723		}
724	}
725
726	/* Protect the scheduling queues: */
727	_queue_signals = 1;
728
729	/*
730	 * Check to see if the signal queue needs to be walked to look
731	 * for threads awoken by a signal while in the scheduler.
732	 */
733	if (_sigq_check_reqd != 0) {
734		/* Reset flag before handling queued signals: */
735		_sigq_check_reqd = 0;
736
737		dequeue_signals();
738	}
739
740	/*
741	 * Check for a thread that became runnable due to a signal:
742	 */
743	if (PTHREAD_PRIOQ_FIRST() != NULL) {
744		/*
745		 * Since there is at least one runnable thread,
746		 * disable the wait.
747		 */
748		timeout_ms = 0;
749	}
750
751	/*
752	 * Form the poll table:
753	 */
754	nfds = 0;
755	if (timeout_ms != 0) {
756		/* Add the kernel pipe to the poll table: */
757		_thread_pfd_table[nfds].fd = _thread_kern_pipe[0];
758		_thread_pfd_table[nfds].events = POLLRDNORM;
759		_thread_pfd_table[nfds].revents = 0;
760		nfds++;
761		kern_pipe_added = 1;
762	}
763
764	PTHREAD_WAITQ_SETACTIVE();
765	TAILQ_FOREACH(pthread, &_workq, qe) {
766		switch (pthread->state) {
767		case PS_SPINBLOCK:
768			/*
769			 * If the lock is available, let the thread run.
770			 */
771			if (pthread->data.spinlock->access_lock == 0) {
772				PTHREAD_WAITQ_CLEARACTIVE();
773				PTHREAD_WORKQ_REMOVE(pthread);
774				PTHREAD_NEW_STATE(pthread,PS_RUNNING);
775				PTHREAD_WAITQ_SETACTIVE();
776				/* One less thread in a spinblock state: */
777				_spinblock_count--;
778				/*
779				 * Since there is at least one runnable
780				 * thread, disable the wait.
781				 */
782				timeout_ms = 0;
783			}
784			break;
785
786		/* File descriptor read wait: */
787		case PS_FDR_WAIT:
788			/* Limit number of polled files to table size: */
789			if (nfds < _thread_dtablesize) {
790				_thread_pfd_table[nfds].events = POLLRDNORM;
791				_thread_pfd_table[nfds].fd = pthread->data.fd.fd;
792				nfds++;
793			}
794			break;
795
796		/* File descriptor write wait: */
797		case PS_FDW_WAIT:
798			/* Limit number of polled files to table size: */
799			if (nfds < _thread_dtablesize) {
800				_thread_pfd_table[nfds].events = POLLWRNORM;
801				_thread_pfd_table[nfds].fd = pthread->data.fd.fd;
802				nfds++;
803			}
804			break;
805
806		/* File descriptor poll or select wait: */
807		case PS_POLL_WAIT:
808		case PS_SELECT_WAIT:
809			/* Limit number of polled files to table size: */
810			if (pthread->data.poll_data->nfds + nfds <
811			    _thread_dtablesize) {
812				for (i = 0; i < pthread->data.poll_data->nfds; i++) {
813					_thread_pfd_table[nfds + i].fd =
814					    pthread->data.poll_data->fds[i].fd;
815					_thread_pfd_table[nfds + i].events =
816					    pthread->data.poll_data->fds[i].events;
817				}
818				nfds += pthread->data.poll_data->nfds;
819			}
820			break;
821
822		/* Other states do not depend on file I/O. */
823		default:
824			break;
825		}
826	}
827	PTHREAD_WAITQ_CLEARACTIVE();
828
829	/*
830	 * Wait for a file descriptor to be ready for read, write, or
831	 * an exception, or a timeout to occur:
832	 */
833	count = __sys_poll(_thread_pfd_table, nfds, timeout_ms);
834
835	if (kern_pipe_added != 0)
836		/*
837		 * Remove the pthread kernel pipe file descriptor
838		 * from the pollfd table:
839		 */
840		nfds = 1;
841	else
842		nfds = 0;
843
844	/*
845	 * Check if it is possible that there are bytes in the kernel
846	 * read pipe waiting to be read:
847	 */
848	if (count < 0 || ((kern_pipe_added != 0) &&
849	    (_thread_pfd_table[0].revents & POLLRDNORM))) {
850		/*
851		 * If the kernel read pipe was included in the
852		 * count:
853		 */
854		if (count > 0) {
855			/* Decrement the count of file descriptors: */
856			count--;
857		}
858
859		if (_sigq_check_reqd != 0) {
860			/* Reset flag before handling signals: */
861			_sigq_check_reqd = 0;
862
863			dequeue_signals();
864		}
865	}
866
867	/*
868	 * Check if any file descriptors are ready:
869	 */
870	if (count > 0) {
871		/*
872		 * Enter a loop to look for threads waiting on file
873		 * descriptors that are flagged as available by the
874		 * _poll syscall:
875		 */
876		PTHREAD_WAITQ_SETACTIVE();
877		TAILQ_FOREACH(pthread, &_workq, qe) {
878			switch (pthread->state) {
879			case PS_SPINBLOCK:
880				/*
881				 * If the lock is available, let the thread run.
882				 */
883				if (pthread->data.spinlock->access_lock == 0) {
884					PTHREAD_WAITQ_CLEARACTIVE();
885					PTHREAD_WORKQ_REMOVE(pthread);
886					PTHREAD_NEW_STATE(pthread,PS_RUNNING);
887					PTHREAD_WAITQ_SETACTIVE();
888
889					/*
890					 * One less thread in a spinblock state:
891					 */
892					_spinblock_count--;
893				}
894				break;
895
896			/* File descriptor read wait: */
897			case PS_FDR_WAIT:
898				if ((nfds < _thread_dtablesize) &&
899				    (_thread_pfd_table[nfds].revents & POLLRDNORM)) {
900					PTHREAD_WAITQ_CLEARACTIVE();
901					PTHREAD_WORKQ_REMOVE(pthread);
902					PTHREAD_NEW_STATE(pthread,PS_RUNNING);
903					PTHREAD_WAITQ_SETACTIVE();
904				}
905				nfds++;
906				break;
907
908			/* File descriptor write wait: */
909			case PS_FDW_WAIT:
910				if ((nfds < _thread_dtablesize) &&
911				    (_thread_pfd_table[nfds].revents & POLLWRNORM)) {
912					PTHREAD_WAITQ_CLEARACTIVE();
913					PTHREAD_WORKQ_REMOVE(pthread);
914					PTHREAD_NEW_STATE(pthread,PS_RUNNING);
915					PTHREAD_WAITQ_SETACTIVE();
916				}
917				nfds++;
918				break;
919
920			/* File descriptor poll or select wait: */
921			case PS_POLL_WAIT:
922			case PS_SELECT_WAIT:
923				if (pthread->data.poll_data->nfds + nfds <
924				    _thread_dtablesize) {
925					/*
926					 * Enter a loop looking for I/O
927					 * readiness:
928					 */
929					found = 0;
930					for (i = 0; i < pthread->data.poll_data->nfds; i++) {
931						if (_thread_pfd_table[nfds + i].revents != 0) {
932							pthread->data.poll_data->fds[i].revents =
933							    _thread_pfd_table[nfds + i].revents;
934							found++;
935						}
936					}
937
938					/* Increment before destroying: */
939					nfds += pthread->data.poll_data->nfds;
940
941					if (found != 0) {
942						pthread->data.poll_data->nfds = found;
943						PTHREAD_WAITQ_CLEARACTIVE();
944						PTHREAD_WORKQ_REMOVE(pthread);
945						PTHREAD_NEW_STATE(pthread,PS_RUNNING);
946						PTHREAD_WAITQ_SETACTIVE();
947					}
948				}
949				else
950					nfds += pthread->data.poll_data->nfds;
951				break;
952
953			/* Other states do not depend on file I/O. */
954			default:
955				break;
956			}
957		}
958		PTHREAD_WAITQ_CLEARACTIVE();
959	}
960	else if (_spinblock_count != 0) {
961		/*
962		 * Enter a loop to look for threads waiting on a spinlock
963		 * that is now available.
964		 */
965		PTHREAD_WAITQ_SETACTIVE();
966		TAILQ_FOREACH(pthread, &_workq, qe) {
967			if (pthread->state == PS_SPINBLOCK) {
968				/*
969				 * If the lock is available, let the thread run.
970				 */
971				if (pthread->data.spinlock->access_lock == 0) {
972					PTHREAD_WAITQ_CLEARACTIVE();
973					PTHREAD_WORKQ_REMOVE(pthread);
974					PTHREAD_NEW_STATE(pthread,PS_RUNNING);
975					PTHREAD_WAITQ_SETACTIVE();
976
977					/*
978					 * One less thread in a spinblock state:
979					 */
980					_spinblock_count--;
981				}
982			}
983		}
984		PTHREAD_WAITQ_CLEARACTIVE();
985	}
986
987	/* Unprotect the scheduling queues: */
988	_queue_signals = 0;
989
990	while (_sigq_check_reqd != 0) {
991		/* Handle queued signals: */
992		_sigq_check_reqd = 0;
993
994		/* Protect the scheduling queues: */
995		_queue_signals = 1;
996
997		dequeue_signals();
998
999		/* Unprotect the scheduling queues: */
1000		_queue_signals = 0;
1001	}
1002}
1003
1004void
1005_thread_kern_set_timeout(const struct timespec * timeout)
1006{
1007	struct pthread	*curthread = _get_curthread();
1008	struct timespec current_time;
1009	struct timeval  tv;
1010
1011	/* Reset the timeout flag for the running thread: */
1012	curthread->timeout = 0;
1013
1014	/* Check if the thread is to wait forever: */
1015	if (timeout == NULL) {
1016		/*
1017		 * Set the wakeup time to something that can be recognised as
1018		 * different to an actual time of day:
1019		 */
1020		curthread->wakeup_time.tv_sec = -1;
1021		curthread->wakeup_time.tv_nsec = -1;
1022	}
1023	/* Check if no waiting is required: */
1024	else if (timeout->tv_sec == 0 && timeout->tv_nsec == 0) {
1025		/* Set the wake up time to 'immediately': */
1026		curthread->wakeup_time.tv_sec = 0;
1027		curthread->wakeup_time.tv_nsec = 0;
1028	} else {
1029		/* Get the current time: */
1030		GET_CURRENT_TOD(tv);
1031		TIMEVAL_TO_TIMESPEC(&tv, &current_time);
1032
1033		/* Calculate the time for the current thread to wake up: */
1034		curthread->wakeup_time.tv_sec = current_time.tv_sec + timeout->tv_sec;
1035		curthread->wakeup_time.tv_nsec = current_time.tv_nsec + timeout->tv_nsec;
1036
1037		/* Check if the nanosecond field needs to wrap: */
1038		if (curthread->wakeup_time.tv_nsec >= 1000000000) {
1039			/* Wrap the nanosecond field: */
1040			curthread->wakeup_time.tv_sec += 1;
1041			curthread->wakeup_time.tv_nsec -= 1000000000;
1042		}
1043	}
1044}
1045
1046void
1047_thread_kern_sig_defer(void)
1048{
1049	struct pthread	*curthread = _get_curthread();
1050
1051	/* Allow signal deferral to be recursive. */
1052	curthread->sig_defer_count++;
1053}
1054
1055void
1056_thread_kern_sig_undefer(void)
1057{
1058	struct pthread	*curthread = _get_curthread();
1059
1060	/*
1061	 * Perform checks to yield only if we are about to undefer
1062	 * signals.
1063	 */
1064	if (curthread->sig_defer_count > 1) {
1065		/* Decrement the signal deferral count. */
1066		curthread->sig_defer_count--;
1067	}
1068	else if (curthread->sig_defer_count == 1) {
1069		/* Reenable signals: */
1070		curthread->sig_defer_count = 0;
1071
1072		/*
1073		 * Check if there are queued signals:
1074		 */
1075		if (_sigq_check_reqd != 0)
1076			_thread_kern_sched(NULL);
1077
1078		/*
1079		 * Check for asynchronous cancellation before delivering any
1080		 * pending signals:
1081		 */
1082		if (((curthread->cancelflags & PTHREAD_AT_CANCEL_POINT) == 0) &&
1083		    ((curthread->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
1084			pthread_testcancel();
1085
1086		/*
1087		 * If there are pending signals or this thread has
1088		 * to yield the CPU, call the kernel scheduler:
1089		 *
1090		 * XXX - Come back and revisit the pending signal problem
1091		 */
1092		if ((curthread->yield_on_sig_undefer != 0) ||
1093		    SIGNOTEMPTY(curthread->sigpend)) {
1094			curthread->yield_on_sig_undefer = 0;
1095			_thread_kern_sched(NULL);
1096		}
1097	}
1098}
1099
1100static void
1101dequeue_signals(void)
1102{
1103	char	bufr[128];
1104	int	num;
1105
1106	/*
1107	 * Enter a loop to clear the pthread kernel pipe:
1108	 */
1109	while (((num = __sys_read(_thread_kern_pipe[0], bufr,
1110	    sizeof(bufr))) > 0) || (num == -1 && errno == EINTR)) {
1111	}
1112	if ((num < 0) && (errno != EAGAIN)) {
1113		/*
1114		 * The only error we should expect is if there is
1115		 * no data to read.
1116		 */
1117		PANIC("Unable to read from thread kernel pipe");
1118	}
1119	/* Handle any pending signals: */
1120	_thread_sig_handle_pending();
1121}
1122
1123static inline void
1124thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in)
1125{
1126	pthread_t tid_out = thread_out;
1127	pthread_t tid_in = thread_in;
1128
1129	if ((tid_out != NULL) &&
1130	    (tid_out->flags & PTHREAD_FLAGS_PRIVATE) != 0)
1131		tid_out = NULL;
1132	if ((tid_in != NULL) &&
1133	    (tid_in->flags & PTHREAD_FLAGS_PRIVATE) != 0)
1134		tid_in = NULL;
1135
1136	if ((_sched_switch_hook != NULL) && (tid_out != tid_in)) {
1137		/* Run the scheduler switch hook: */
1138		_sched_switch_hook(tid_out, tid_in);
1139	}
1140}
1141
1142struct pthread *
1143_get_curthread(void)
1144{
1145	if (_thread_initial == NULL)
1146		_thread_init();
1147
1148	return (_thread_run);
1149}
1150
1151void
1152_set_curthread(struct pthread *newthread)
1153{
1154	_thread_run = newthread;
1155}
1156