thr_kern.c revision 70231
1/*
2 * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by John Birrell.
16 * 4. Neither the name of the author nor the names of any co-contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * $FreeBSD: head/lib/libkse/thread/thr_kern.c 70231 2000-12-20 16:55:57Z deischen $
33 *
34 */
35#include <errno.h>
36#include <poll.h>
37#include <stdlib.h>
38#include <stdarg.h>
39#include <string.h>
40#include <unistd.h>
41#include <setjmp.h>
42#include <sys/param.h>
43#include <sys/types.h>
44#include <sys/signalvar.h>
45#include <sys/stat.h>
46#include <sys/time.h>
47#include <sys/socket.h>
48#include <sys/uio.h>
49#include <sys/syscall.h>
50#include <fcntl.h>
51#ifdef _THREAD_SAFE
52#include <pthread.h>
53#include "pthread_private.h"
54
55/* #define DEBUG_THREAD_KERN */
56#ifdef DEBUG_THREAD_KERN
57#define DBG_MSG		stdout_debug
58#else
59#define DBG_MSG(x...)
60#endif
61
62/* Static function prototype definitions: */
63static void
64thread_kern_poll(int wait_reqd);
65
66static void
67dequeue_signals(void);
68
69static inline void
70thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in);
71
72/* Static variables: */
73static int	last_tick = 0;
74static int	called_from_handler = 0;
75
76/*
77 * This is called when a signal handler finishes and wants to
78 * return to a previous frame.
79 */
80void
81_thread_kern_sched_frame(struct pthread_signal_frame *psf)
82{
83	/*
84	 * Flag the pthread kernel as executing scheduler code
85	 * to avoid a signal from interrupting this execution and
86	 * corrupting the (soon-to-be) current frame.
87	 */
88	_thread_kern_in_sched = 1;
89
90	/* Restore the signal frame: */
91	_thread_sigframe_restore(_thread_run, psf);
92
93	/* The signal mask was restored; check for any pending signals: */
94	_thread_run->check_pending = 1;
95
96	/* Switch to the thread scheduler: */
97	___longjmp(_thread_kern_sched_jb, 1);
98}
99
100
101void
102_thread_kern_sched(ucontext_t *scp)
103{
104	/*
105	 * Flag the pthread kernel as executing scheduler code
106	 * to avoid a scheduler signal from interrupting this
107	 * execution and calling the scheduler again.
108	 */
109	_thread_kern_in_sched = 1;
110
111	/* Check if this function was called from the signal handler: */
112	if (scp != NULL) {
113		called_from_handler = 1;
114		DBG_MSG("Entering scheduler due to signal\n");
115	} else {
116		/* Save the state of the current thread: */
117		if (_setjmp(_thread_run->ctx.jb) == 0) {
118			/* Flag the jump buffer was the last state saved: */
119			_thread_run->ctxtype = CTX_JB_NOSIG;
120			_thread_run->longjmp_val = 1;
121		} else {
122			DBG_MSG("Returned from ___longjmp, thread %p\n",
123			    _thread_run);
124			/*
125			 * This point is reached when a longjmp() is called
126			 * to restore the state of a thread.
127			 *
128			 * This is the normal way out of the scheduler.
129			 */
130			_thread_kern_in_sched = 0;
131
132			if (_thread_run->sig_defer_count == 0) {
133				if (((_thread_run->cancelflags &
134				    PTHREAD_AT_CANCEL_POINT) == 0) &&
135				    ((_thread_run->cancelflags &
136				    PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
137					/*
138					 * Cancellations override signals.
139					 *
140					 * Stick a cancellation point at the
141					 * start of each async-cancellable
142					 * thread's resumption.
143					 *
144					 * We allow threads woken at cancel
145					 * points to do their own checks.
146					 */
147					pthread_testcancel();
148			}
149
150			if (_sched_switch_hook != NULL) {
151				/* Run the installed switch hook: */
152				thread_run_switch_hook(_last_user_thread,
153				    _thread_run);
154			}
155			return;
156		}
157	}
158	/* Switch to the thread scheduler: */
159	___longjmp(_thread_kern_sched_jb, 1);
160}
161
162void
163_thread_kern_sched_sig(void)
164{
165	_thread_run->check_pending = 1;
166	_thread_kern_sched(NULL);
167}
168
169
170void
171_thread_kern_scheduler(void)
172{
173	struct timespec	ts;
174	struct timeval	tv;
175	pthread_t	pthread, pthread_h;
176	unsigned int	current_tick;
177	int		add_to_prioq;
178
179	/* If the currently running thread is a user thread, save it: */
180	if ((_thread_run->flags & PTHREAD_FLAGS_PRIVATE) == 0)
181		_last_user_thread = _thread_run;
182
183	if (called_from_handler != 0) {
184		called_from_handler = 0;
185
186		/*
187		 * The signal handler should have saved the state of
188		 * the current thread.  Restore the process signal
189		 * mask.
190		 */
191		if (_thread_sys_sigprocmask(SIG_SETMASK,
192		    &_process_sigmask, NULL) != 0)
193			PANIC("Unable to restore process mask after signal");
194
195		/*
196		 * Since the signal handler didn't return normally, we
197		 * have to tell the kernel to reuse the signal stack.
198		 */
199		if (_thread_sys_sigaltstack(&_thread_sigstack, NULL) != 0)
200			PANIC("Unable to restore alternate signal stack");
201	}
202
203	/* Are there pending signals for this thread? */
204	if (_thread_run->check_pending != 0) {
205		_thread_run->check_pending = 0;
206		_thread_sig_check_pending(_thread_run);
207	}
208
209	/*
210	 * Enter a scheduling loop that finds the next thread that is
211	 * ready to run. This loop completes when there are no more threads
212	 * in the global list or when a thread has its state restored by
213	 * either a sigreturn (if the state was saved as a sigcontext) or a
214	 * longjmp (if the state was saved by a setjmp).
215	 */
216	while (!(TAILQ_EMPTY(&_thread_list))) {
217		/* Get the current time of day: */
218		GET_CURRENT_TOD(tv);
219		TIMEVAL_TO_TIMESPEC(&tv, &ts);
220		current_tick = _sched_ticks;
221
222		/*
223		 * Protect the scheduling queues from access by the signal
224		 * handler.
225		 */
226		_queue_signals = 1;
227		add_to_prioq = 0;
228
229		if (_thread_run != &_thread_kern_thread) {
230			/*
231			 * This thread no longer needs to yield the CPU.
232			 */
233			_thread_run->yield_on_sig_undefer = 0;
234
235			if (_thread_run->state != PS_RUNNING) {
236				/*
237				 * Save the current time as the time that the
238				 * thread became inactive:
239				 */
240				_thread_run->last_inactive = (long)current_tick;
241				if (_thread_run->last_inactive <
242				    _thread_run->last_active) {
243					/* Account for a rollover: */
244					_thread_run->last_inactive =+
245					    UINT_MAX + 1;
246				}
247			}
248
249			/*
250			 * Place the currently running thread into the
251			 * appropriate queue(s).
252			 */
253			switch (_thread_run->state) {
254			case PS_DEAD:
255			case PS_STATE_MAX: /* to silence -Wall */
256			case PS_SUSPENDED:
257				/*
258				 * Dead and suspended threads are not placed
259				 * in any queue:
260				 */
261				break;
262
263			case PS_RUNNING:
264				/*
265				 * Runnable threads can't be placed in the
266				 * priority queue until after waiting threads
267				 * are polled (to preserve round-robin
268				 * scheduling).
269				 */
270				add_to_prioq = 1;
271				break;
272
273			/*
274			 * States which do not depend on file descriptor I/O
275			 * operations or timeouts:
276			 */
277			case PS_DEADLOCK:
278			case PS_FDLR_WAIT:
279			case PS_FDLW_WAIT:
280			case PS_FILE_WAIT:
281			case PS_JOIN:
282			case PS_MUTEX_WAIT:
283			case PS_SIGSUSPEND:
284			case PS_SIGTHREAD:
285			case PS_SIGWAIT:
286			case PS_WAIT_WAIT:
287				/* No timeouts for these states: */
288				_thread_run->wakeup_time.tv_sec = -1;
289				_thread_run->wakeup_time.tv_nsec = -1;
290
291				/* Restart the time slice: */
292				_thread_run->slice_usec = -1;
293
294				/* Insert into the waiting queue: */
295				PTHREAD_WAITQ_INSERT(_thread_run);
296				break;
297
298			/* States which can timeout: */
299			case PS_COND_WAIT:
300			case PS_SLEEP_WAIT:
301				/* Restart the time slice: */
302				_thread_run->slice_usec = -1;
303
304				/* Insert into the waiting queue: */
305				PTHREAD_WAITQ_INSERT(_thread_run);
306				break;
307
308			/* States that require periodic work: */
309			case PS_SPINBLOCK:
310				/* No timeouts for this state: */
311				_thread_run->wakeup_time.tv_sec = -1;
312				_thread_run->wakeup_time.tv_nsec = -1;
313
314				/* Increment spinblock count: */
315				_spinblock_count++;
316
317				/* FALLTHROUGH */
318			case PS_FDR_WAIT:
319			case PS_FDW_WAIT:
320			case PS_POLL_WAIT:
321			case PS_SELECT_WAIT:
322				/* Restart the time slice: */
323				_thread_run->slice_usec = -1;
324
325				/* Insert into the waiting queue: */
326				PTHREAD_WAITQ_INSERT(_thread_run);
327
328				/* Insert into the work queue: */
329				PTHREAD_WORKQ_INSERT(_thread_run);
330				break;
331			}
332		}
333
334		/*
335		 * Avoid polling file descriptors if there are none
336		 * waiting:
337		 */
338		if (TAILQ_EMPTY(&_workq) != 0) {
339		}
340		/*
341		 * Poll file descriptors only if a new scheduling signal
342		 * has occurred or if we have no more runnable threads.
343		 */
344		else if (((current_tick = _sched_ticks) != last_tick) ||
345		    ((_thread_run->state != PS_RUNNING) &&
346		    (PTHREAD_PRIOQ_FIRST() == NULL))) {
347			/* Unprotect the scheduling queues: */
348			_queue_signals = 0;
349
350			/*
351			 * Poll file descriptors to update the state of threads
352			 * waiting on file I/O where data may be available:
353			 */
354			thread_kern_poll(0);
355
356			/* Protect the scheduling queues: */
357			_queue_signals = 1;
358		}
359		last_tick = current_tick;
360
361		/*
362		 * Wake up threads that have timedout.  This has to be
363		 * done after polling in case a thread does a poll or
364		 * select with zero time.
365		 */
366		PTHREAD_WAITQ_SETACTIVE();
367		while (((pthread = TAILQ_FIRST(&_waitingq)) != NULL) &&
368		    (pthread->wakeup_time.tv_sec != -1) &&
369		    (((pthread->wakeup_time.tv_sec == 0) &&
370		    (pthread->wakeup_time.tv_nsec == 0)) ||
371		    (pthread->wakeup_time.tv_sec < ts.tv_sec) ||
372		    ((pthread->wakeup_time.tv_sec == ts.tv_sec) &&
373		    (pthread->wakeup_time.tv_nsec <= ts.tv_nsec)))) {
374			switch (pthread->state) {
375			case PS_POLL_WAIT:
376			case PS_SELECT_WAIT:
377				/* Return zero file descriptors ready: */
378				pthread->data.poll_data->nfds = 0;
379				/* fall through */
380			default:
381				/*
382				 * Remove this thread from the waiting queue
383				 * (and work queue if necessary) and place it
384				 * in the ready queue.
385				 */
386				PTHREAD_WAITQ_CLEARACTIVE();
387				if (pthread->flags & PTHREAD_FLAGS_IN_WORKQ)
388					PTHREAD_WORKQ_REMOVE(pthread);
389				PTHREAD_NEW_STATE(pthread, PS_RUNNING);
390				PTHREAD_WAITQ_SETACTIVE();
391				break;
392			}
393			/*
394			 * Flag the timeout in the thread structure:
395			 */
396			pthread->timeout = 1;
397		}
398		PTHREAD_WAITQ_CLEARACTIVE();
399
400		/*
401		 * Check to see if the current thread needs to be added
402		 * to the priority queue:
403		 */
404		if (add_to_prioq != 0) {
405			/*
406			 * Save the current time as the time that the
407			 * thread became inactive:
408			 */
409			current_tick = _sched_ticks;
410			_thread_run->last_inactive = (long)current_tick;
411			if (_thread_run->last_inactive <
412			    _thread_run->last_active) {
413				/* Account for a rollover: */
414				_thread_run->last_inactive =+ UINT_MAX + 1;
415			}
416
417			if ((_thread_run->slice_usec != -1) &&
418		 	   (_thread_run->attr.sched_policy != SCHED_FIFO)) {
419				/*
420				 * Accumulate the number of microseconds for
421				 * which the current thread has run:
422				 */
423				_thread_run->slice_usec +=
424				    (_thread_run->last_inactive -
425				    _thread_run->last_active) *
426				    (long)_clock_res_usec;
427				/* Check for time quantum exceeded: */
428				if (_thread_run->slice_usec > TIMESLICE_USEC)
429					_thread_run->slice_usec = -1;
430			}
431
432			if (_thread_run->slice_usec == -1) {
433				/*
434				 * The thread exceeded its time
435				 * quantum or it yielded the CPU;
436				 * place it at the tail of the
437				 * queue for its priority.
438				 */
439				PTHREAD_PRIOQ_INSERT_TAIL(_thread_run);
440			} else {
441				/*
442				 * The thread hasn't exceeded its
443				 * interval.  Place it at the head
444				 * of the queue for its priority.
445				 */
446				PTHREAD_PRIOQ_INSERT_HEAD(_thread_run);
447			}
448		}
449
450		/*
451		 * Get the highest priority thread in the ready queue.
452		 */
453		pthread_h = PTHREAD_PRIOQ_FIRST();
454
455		/* Check if there are no threads ready to run: */
456		if (pthread_h == NULL) {
457			/*
458			 * Lock the pthread kernel by changing the pointer to
459			 * the running thread to point to the global kernel
460			 * thread structure:
461			 */
462			_thread_run = &_thread_kern_thread;
463			DBG_MSG("No runnable threads, using kernel thread %p\n",
464			    _thread_run);
465
466			/* Unprotect the scheduling queues: */
467			_queue_signals = 0;
468
469			/*
470			 * There are no threads ready to run, so wait until
471			 * something happens that changes this condition:
472			 */
473			thread_kern_poll(1);
474
475			/*
476			 * This process' usage will likely be very small
477			 * while waiting in a poll.  Since the scheduling
478			 * clock is based on the profiling timer, it is
479			 * unlikely that the profiling timer will fire
480			 * and update the time of day.  To account for this,
481			 * get the time of day after polling with a timeout.
482			 */
483			gettimeofday((struct timeval *) &_sched_tod, NULL);
484
485			/* Check once more for a runnable thread: */
486			_queue_signals = 1;
487			pthread_h = PTHREAD_PRIOQ_FIRST();
488			_queue_signals = 0;
489		}
490
491		if (pthread_h != NULL) {
492			/* Remove the thread from the ready queue: */
493			PTHREAD_PRIOQ_REMOVE(pthread_h);
494
495			/* Unprotect the scheduling queues: */
496			_queue_signals = 0;
497
498			/*
499			 * Check for signals queued while the scheduling
500			 * queues were protected:
501			 */
502			while (_sigq_check_reqd != 0) {
503				/* Clear before handling queued signals: */
504				_sigq_check_reqd = 0;
505
506				/* Protect the scheduling queues again: */
507				_queue_signals = 1;
508
509				dequeue_signals();
510
511				/*
512				 * Check for a higher priority thread that
513				 * became runnable due to signal handling.
514				 */
515				if (((pthread = PTHREAD_PRIOQ_FIRST()) != NULL) &&
516				    (pthread->active_priority > pthread_h->active_priority)) {
517					/* Remove the thread from the ready queue: */
518					PTHREAD_PRIOQ_REMOVE(pthread);
519
520					/*
521					 * Insert the lower priority thread
522					 * at the head of its priority list:
523					 */
524					PTHREAD_PRIOQ_INSERT_HEAD(pthread_h);
525
526					/* There's a new thread in town: */
527					pthread_h = pthread;
528				}
529
530				/* Unprotect the scheduling queues: */
531				_queue_signals = 0;
532			}
533
534			/* Make the selected thread the current thread: */
535			_thread_run = pthread_h;
536
537			/*
538			 * Save the current time as the time that the thread
539			 * became active:
540			 */
541			current_tick = _sched_ticks;
542			_thread_run->last_active = (long) current_tick;
543
544			/*
545			 * Check if this thread is running for the first time
546			 * or running again after using its full time slice
547			 * allocation:
548			 */
549			if (_thread_run->slice_usec == -1) {
550				/* Reset the accumulated time slice period: */
551				_thread_run->slice_usec = 0;
552			}
553
554			/*
555			 * If we had a context switch, run any
556			 * installed switch hooks.
557			 */
558			if ((_sched_switch_hook != NULL) &&
559			    (_last_user_thread != _thread_run)) {
560				thread_run_switch_hook(_last_user_thread,
561				    _thread_run);
562			}
563			/*
564			 * Continue the thread at its current frame:
565			 */
566			switch(_thread_run->ctxtype) {
567			case CTX_JB_NOSIG:
568				___longjmp(_thread_run->ctx.jb,
569				    _thread_run->longjmp_val);
570				break;
571			case CTX_JB:
572				__longjmp(_thread_run->ctx.jb,
573				    _thread_run->longjmp_val);
574				break;
575			case CTX_SJB:
576				__siglongjmp(_thread_run->ctx.sigjb,
577				    _thread_run->longjmp_val);
578				break;
579			case CTX_UC:
580				/* XXX - Restore FP regsisters? */
581				FP_RESTORE_UC(&_thread_run->ctx.uc);
582
583				/*
584				 * Do a sigreturn to restart the thread that
585				 * was interrupted by a signal:
586				 */
587				_thread_kern_in_sched = 0;
588
589#if NOT_YET
590				_setcontext(&_thread_run->ctx.uc);
591#else
592				/*
593				 * Ensure the process signal mask is set
594				 * correctly:
595				 */
596				_thread_run->ctx.uc.uc_sigmask =
597				    _process_sigmask;
598				_thread_sys_sigreturn(&_thread_run->ctx.uc);
599#endif
600				break;
601			}
602			/* This point should not be reached. */
603			PANIC("Thread has returned from sigreturn or longjmp");
604		}
605	}
606
607	/* There are no more threads, so exit this process: */
608	exit(0);
609}
610
611void
612_thread_kern_sched_state(enum pthread_state state, char *fname, int lineno)
613{
614	/*
615	 * Flag the pthread kernel as executing scheduler code
616	 * to avoid a scheduler signal from interrupting this
617	 * execution and calling the scheduler again.
618	 */
619	_thread_kern_in_sched = 1;
620
621	/*
622	 * Prevent the signal handler from fiddling with this thread
623	 * before its state is set and is placed into the proper queue.
624	 */
625	_queue_signals = 1;
626
627	/* Change the state of the current thread: */
628	_thread_run->state = state;
629	_thread_run->fname = fname;
630	_thread_run->lineno = lineno;
631
632	/* Schedule the next thread that is ready: */
633	_thread_kern_sched(NULL);
634}
635
636void
637_thread_kern_sched_state_unlock(enum pthread_state state,
638    spinlock_t *lock, char *fname, int lineno)
639{
640	/*
641	 * Flag the pthread kernel as executing scheduler code
642	 * to avoid a scheduler signal from interrupting this
643	 * execution and calling the scheduler again.
644	 */
645	_thread_kern_in_sched = 1;
646
647	/*
648	 * Prevent the signal handler from fiddling with this thread
649	 * before its state is set and it is placed into the proper
650	 * queue(s).
651	 */
652	_queue_signals = 1;
653
654	/* Change the state of the current thread: */
655	_thread_run->state = state;
656	_thread_run->fname = fname;
657	_thread_run->lineno = lineno;
658
659	_SPINUNLOCK(lock);
660
661	/* Schedule the next thread that is ready: */
662	_thread_kern_sched(NULL);
663}
664
665static void
666thread_kern_poll(int wait_reqd)
667{
668	int             count = 0;
669	int             i, found;
670	int		kern_pipe_added = 0;
671	int             nfds = 0;
672	int		timeout_ms = 0;
673	struct pthread	*pthread;
674	struct timespec ts;
675	struct timeval  tv;
676
677	/* Check if the caller wants to wait: */
678	if (wait_reqd == 0) {
679		timeout_ms = 0;
680	}
681	else {
682		/* Get the current time of day: */
683		GET_CURRENT_TOD(tv);
684		TIMEVAL_TO_TIMESPEC(&tv, &ts);
685
686		_queue_signals = 1;
687		pthread = TAILQ_FIRST(&_waitingq);
688		_queue_signals = 0;
689
690		if ((pthread == NULL) || (pthread->wakeup_time.tv_sec == -1)) {
691			/*
692			 * Either there are no threads in the waiting queue,
693			 * or there are no threads that can timeout.
694			 */
695			timeout_ms = INFTIM;
696		}
697		else {
698			/*
699			 * Calculate the time left for the next thread to
700			 * timeout:
701			 */
702			timeout_ms = ((pthread->wakeup_time.tv_sec - ts.tv_sec) *
703			    1000) + ((pthread->wakeup_time.tv_nsec - ts.tv_nsec) /
704			    1000000);
705			/*
706			 * Don't allow negative timeouts:
707			 */
708			if (timeout_ms < 0)
709				timeout_ms = 0;
710		}
711	}
712
713	/* Protect the scheduling queues: */
714	_queue_signals = 1;
715
716	/*
717	 * Check to see if the signal queue needs to be walked to look
718	 * for threads awoken by a signal while in the scheduler.
719	 */
720	if (_sigq_check_reqd != 0) {
721		/* Reset flag before handling queued signals: */
722		_sigq_check_reqd = 0;
723
724		dequeue_signals();
725	}
726
727	/*
728	 * Check for a thread that became runnable due to a signal:
729	 */
730	if (PTHREAD_PRIOQ_FIRST() != NULL) {
731		/*
732		 * Since there is at least one runnable thread,
733		 * disable the wait.
734		 */
735		timeout_ms = 0;
736	}
737
738	/*
739	 * Form the poll table:
740	 */
741	nfds = 0;
742	if (timeout_ms != 0) {
743		/* Add the kernel pipe to the poll table: */
744		_thread_pfd_table[nfds].fd = _thread_kern_pipe[0];
745		_thread_pfd_table[nfds].events = POLLRDNORM;
746		_thread_pfd_table[nfds].revents = 0;
747		nfds++;
748		kern_pipe_added = 1;
749	}
750
751	PTHREAD_WAITQ_SETACTIVE();
752	TAILQ_FOREACH(pthread, &_workq, qe) {
753		switch (pthread->state) {
754		case PS_SPINBLOCK:
755			/*
756			 * If the lock is available, let the thread run.
757			 */
758			if (pthread->data.spinlock->access_lock == 0) {
759				PTHREAD_WAITQ_CLEARACTIVE();
760				PTHREAD_WORKQ_REMOVE(pthread);
761				PTHREAD_NEW_STATE(pthread,PS_RUNNING);
762				PTHREAD_WAITQ_SETACTIVE();
763				/* One less thread in a spinblock state: */
764				_spinblock_count--;
765				/*
766				 * Since there is at least one runnable
767				 * thread, disable the wait.
768				 */
769				timeout_ms = 0;
770			}
771			break;
772
773		/* File descriptor read wait: */
774		case PS_FDR_WAIT:
775			/* Limit number of polled files to table size: */
776			if (nfds < _thread_dtablesize) {
777				_thread_pfd_table[nfds].events = POLLRDNORM;
778				_thread_pfd_table[nfds].fd = pthread->data.fd.fd;
779				nfds++;
780			}
781			break;
782
783		/* File descriptor write wait: */
784		case PS_FDW_WAIT:
785			/* Limit number of polled files to table size: */
786			if (nfds < _thread_dtablesize) {
787				_thread_pfd_table[nfds].events = POLLWRNORM;
788				_thread_pfd_table[nfds].fd = pthread->data.fd.fd;
789				nfds++;
790			}
791			break;
792
793		/* File descriptor poll or select wait: */
794		case PS_POLL_WAIT:
795		case PS_SELECT_WAIT:
796			/* Limit number of polled files to table size: */
797			if (pthread->data.poll_data->nfds + nfds <
798			    _thread_dtablesize) {
799				for (i = 0; i < pthread->data.poll_data->nfds; i++) {
800					_thread_pfd_table[nfds + i].fd =
801					    pthread->data.poll_data->fds[i].fd;
802					_thread_pfd_table[nfds + i].events =
803					    pthread->data.poll_data->fds[i].events;
804				}
805				nfds += pthread->data.poll_data->nfds;
806			}
807			break;
808
809		/* Other states do not depend on file I/O. */
810		default:
811			break;
812		}
813	}
814	PTHREAD_WAITQ_CLEARACTIVE();
815
816	/*
817	 * Wait for a file descriptor to be ready for read, write, or
818	 * an exception, or a timeout to occur:
819	 */
820	count = _thread_sys_poll(_thread_pfd_table, nfds, timeout_ms);
821
822	if (kern_pipe_added != 0)
823		/*
824		 * Remove the pthread kernel pipe file descriptor
825		 * from the pollfd table:
826		 */
827		nfds = 1;
828	else
829		nfds = 0;
830
831	/*
832	 * Check if it is possible that there are bytes in the kernel
833	 * read pipe waiting to be read:
834	 */
835	if (count < 0 || ((kern_pipe_added != 0) &&
836	    (_thread_pfd_table[0].revents & POLLRDNORM))) {
837		/*
838		 * If the kernel read pipe was included in the
839		 * count:
840		 */
841		if (count > 0) {
842			/* Decrement the count of file descriptors: */
843			count--;
844		}
845
846		if (_sigq_check_reqd != 0) {
847			/* Reset flag before handling signals: */
848			_sigq_check_reqd = 0;
849
850			dequeue_signals();
851		}
852	}
853
854	/*
855	 * Check if any file descriptors are ready:
856	 */
857	if (count > 0) {
858		/*
859		 * Enter a loop to look for threads waiting on file
860		 * descriptors that are flagged as available by the
861		 * _poll syscall:
862		 */
863		PTHREAD_WAITQ_SETACTIVE();
864		TAILQ_FOREACH(pthread, &_workq, qe) {
865			switch (pthread->state) {
866			case PS_SPINBLOCK:
867				/*
868				 * If the lock is available, let the thread run.
869				 */
870				if (pthread->data.spinlock->access_lock == 0) {
871					PTHREAD_WAITQ_CLEARACTIVE();
872					PTHREAD_WORKQ_REMOVE(pthread);
873					PTHREAD_NEW_STATE(pthread,PS_RUNNING);
874					PTHREAD_WAITQ_SETACTIVE();
875
876					/*
877					 * One less thread in a spinblock state:
878					 */
879					_spinblock_count--;
880				}
881				break;
882
883			/* File descriptor read wait: */
884			case PS_FDR_WAIT:
885				if ((nfds < _thread_dtablesize) &&
886				    (_thread_pfd_table[nfds].revents & POLLRDNORM)) {
887					PTHREAD_WAITQ_CLEARACTIVE();
888					PTHREAD_WORKQ_REMOVE(pthread);
889					PTHREAD_NEW_STATE(pthread,PS_RUNNING);
890					PTHREAD_WAITQ_SETACTIVE();
891				}
892				nfds++;
893				break;
894
895			/* File descriptor write wait: */
896			case PS_FDW_WAIT:
897				if ((nfds < _thread_dtablesize) &&
898				    (_thread_pfd_table[nfds].revents & POLLWRNORM)) {
899					PTHREAD_WAITQ_CLEARACTIVE();
900					PTHREAD_WORKQ_REMOVE(pthread);
901					PTHREAD_NEW_STATE(pthread,PS_RUNNING);
902					PTHREAD_WAITQ_SETACTIVE();
903				}
904				nfds++;
905				break;
906
907			/* File descriptor poll or select wait: */
908			case PS_POLL_WAIT:
909			case PS_SELECT_WAIT:
910				if (pthread->data.poll_data->nfds + nfds <
911				    _thread_dtablesize) {
912					/*
913					 * Enter a loop looking for I/O
914					 * readiness:
915					 */
916					found = 0;
917					for (i = 0; i < pthread->data.poll_data->nfds; i++) {
918						if (_thread_pfd_table[nfds + i].revents != 0) {
919							pthread->data.poll_data->fds[i].revents =
920							    _thread_pfd_table[nfds + i].revents;
921							found++;
922						}
923					}
924
925					/* Increment before destroying: */
926					nfds += pthread->data.poll_data->nfds;
927
928					if (found != 0) {
929						pthread->data.poll_data->nfds = found;
930						PTHREAD_WAITQ_CLEARACTIVE();
931						PTHREAD_WORKQ_REMOVE(pthread);
932						PTHREAD_NEW_STATE(pthread,PS_RUNNING);
933						PTHREAD_WAITQ_SETACTIVE();
934					}
935				}
936				else
937					nfds += pthread->data.poll_data->nfds;
938				break;
939
940			/* Other states do not depend on file I/O. */
941			default:
942				break;
943			}
944		}
945		PTHREAD_WAITQ_CLEARACTIVE();
946	}
947	else if (_spinblock_count != 0) {
948		/*
949		 * Enter a loop to look for threads waiting on a spinlock
950		 * that is now available.
951		 */
952		PTHREAD_WAITQ_SETACTIVE();
953		TAILQ_FOREACH(pthread, &_workq, qe) {
954			if (pthread->state == PS_SPINBLOCK) {
955				/*
956				 * If the lock is available, let the thread run.
957				 */
958				if (pthread->data.spinlock->access_lock == 0) {
959					PTHREAD_WAITQ_CLEARACTIVE();
960					PTHREAD_WORKQ_REMOVE(pthread);
961					PTHREAD_NEW_STATE(pthread,PS_RUNNING);
962					PTHREAD_WAITQ_SETACTIVE();
963
964					/*
965					 * One less thread in a spinblock state:
966					 */
967					_spinblock_count--;
968				}
969			}
970		}
971		PTHREAD_WAITQ_CLEARACTIVE();
972	}
973
974	/* Unprotect the scheduling queues: */
975	_queue_signals = 0;
976
977	while (_sigq_check_reqd != 0) {
978		/* Handle queued signals: */
979		_sigq_check_reqd = 0;
980
981		/* Protect the scheduling queues: */
982		_queue_signals = 1;
983
984		dequeue_signals();
985
986		/* Unprotect the scheduling queues: */
987		_queue_signals = 0;
988	}
989}
990
991void
992_thread_kern_set_timeout(const struct timespec * timeout)
993{
994	struct timespec current_time;
995	struct timeval  tv;
996
997	/* Reset the timeout flag for the running thread: */
998	_thread_run->timeout = 0;
999
1000	/* Check if the thread is to wait forever: */
1001	if (timeout == NULL) {
1002		/*
1003		 * Set the wakeup time to something that can be recognised as
1004		 * different to an actual time of day:
1005		 */
1006		_thread_run->wakeup_time.tv_sec = -1;
1007		_thread_run->wakeup_time.tv_nsec = -1;
1008	}
1009	/* Check if no waiting is required: */
1010	else if (timeout->tv_sec == 0 && timeout->tv_nsec == 0) {
1011		/* Set the wake up time to 'immediately': */
1012		_thread_run->wakeup_time.tv_sec = 0;
1013		_thread_run->wakeup_time.tv_nsec = 0;
1014	} else {
1015		/* Get the current time: */
1016		GET_CURRENT_TOD(tv);
1017		TIMEVAL_TO_TIMESPEC(&tv, &current_time);
1018
1019		/* Calculate the time for the current thread to wake up: */
1020		_thread_run->wakeup_time.tv_sec = current_time.tv_sec + timeout->tv_sec;
1021		_thread_run->wakeup_time.tv_nsec = current_time.tv_nsec + timeout->tv_nsec;
1022
1023		/* Check if the nanosecond field needs to wrap: */
1024		if (_thread_run->wakeup_time.tv_nsec >= 1000000000) {
1025			/* Wrap the nanosecond field: */
1026			_thread_run->wakeup_time.tv_sec += 1;
1027			_thread_run->wakeup_time.tv_nsec -= 1000000000;
1028		}
1029	}
1030}
1031
1032void
1033_thread_kern_sig_defer(void)
1034{
1035	/* Allow signal deferral to be recursive. */
1036	_thread_run->sig_defer_count++;
1037}
1038
1039void
1040_thread_kern_sig_undefer(void)
1041{
1042	/*
1043	 * Perform checks to yield only if we are about to undefer
1044	 * signals.
1045	 */
1046	if (_thread_run->sig_defer_count > 1) {
1047		/* Decrement the signal deferral count. */
1048		_thread_run->sig_defer_count--;
1049	}
1050	else if (_thread_run->sig_defer_count == 1) {
1051		/* Reenable signals: */
1052		_thread_run->sig_defer_count = 0;
1053
1054		/*
1055		 * Check if there are queued signals:
1056		 */
1057		if (_sigq_check_reqd != 0)
1058			_thread_kern_sched(NULL);
1059
1060		/*
1061		 * Check for asynchronous cancellation before delivering any
1062		 * pending signals:
1063		 */
1064		if (((_thread_run->cancelflags & PTHREAD_AT_CANCEL_POINT) == 0) &&
1065		    ((_thread_run->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
1066			pthread_testcancel();
1067
1068		/*
1069		 * If there are pending signals or this thread has
1070		 * to yield the CPU, call the kernel scheduler:
1071		 *
1072		 * XXX - Come back and revisit the pending signal problem
1073		 */
1074		if ((_thread_run->yield_on_sig_undefer != 0) ||
1075		    SIGNOTEMPTY(_thread_run->sigpend)) {
1076			_thread_run->yield_on_sig_undefer = 0;
1077			_thread_kern_sched(NULL);
1078		}
1079	}
1080}
1081
1082static void
1083dequeue_signals(void)
1084{
1085	char	bufr[128];
1086	int	num;
1087
1088	/*
1089	 * Enter a loop to clear the pthread kernel pipe:
1090	 */
1091	while (((num = _thread_sys_read(_thread_kern_pipe[0], bufr,
1092	    sizeof(bufr))) > 0) || (num == -1 && errno == EINTR)) {
1093	}
1094	if ((num < 0) && (errno != EAGAIN)) {
1095		/*
1096		 * The only error we should expect is if there is
1097		 * no data to read.
1098		 */
1099		PANIC("Unable to read from thread kernel pipe");
1100	}
1101	/* Handle any pending signals: */
1102	_thread_sig_handle_pending();
1103}
1104
1105static inline void
1106thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in)
1107{
1108	pthread_t tid_out = thread_out;
1109	pthread_t tid_in = thread_in;
1110
1111	if ((tid_out != NULL) &&
1112	    (tid_out->flags & PTHREAD_FLAGS_PRIVATE) != 0)
1113		tid_out = NULL;
1114	if ((tid_in != NULL) &&
1115	    (tid_in->flags & PTHREAD_FLAGS_PRIVATE) != 0)
1116		tid_in = NULL;
1117
1118	if ((_sched_switch_hook != NULL) && (tid_out != tid_in)) {
1119		/* Run the scheduler switch hook: */
1120		_sched_switch_hook(tid_out, tid_in);
1121	}
1122}
1123#endif
1124