thr_kern.c revision 51794
1/* 2 * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by John Birrell. 16 * 4. Neither the name of the author nor the names of any co-contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * $FreeBSD: head/lib/libkse/thread/thr_kern.c 51794 1999-09-29 15:18:46Z marcel $ 33 * 34 */ 35#include <errno.h> 36#include <poll.h> 37#include <stdlib.h> 38#include <stdarg.h> 39#include <string.h> 40#include <unistd.h> 41#include <setjmp.h> 42#include <sys/types.h> 43#include <sys/stat.h> 44#include <sys/time.h> 45#include <sys/socket.h> 46#include <sys/uio.h> 47#include <sys/syscall.h> 48#include <fcntl.h> 49#ifdef _THREAD_SAFE 50#include <pthread.h> 51#include "pthread_private.h" 52 53/* Static function prototype definitions: */ 54static void 55_thread_kern_poll(int wait_reqd); 56 57static void 58dequeue_signals(void); 59 60static inline void 61thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in); 62 63void 64_thread_kern_sched(ucontext_t * scp) 65{ 66#ifndef __alpha__ 67 char *fdata; 68#endif 69 pthread_t pthread, pthread_h = NULL; 70 pthread_t last_thread = NULL; 71 struct itimerval itimer; 72 struct timespec ts, ts1; 73 struct timeval tv, tv1; 74 int i, set_timer = 0; 75 76 /* 77 * Flag the pthread kernel as executing scheduler code 78 * to avoid a scheduler signal from interrupting this 79 * execution and calling the scheduler again. 80 */ 81 _thread_kern_in_sched = 1; 82 83 /* Check if this function was called from the signal handler: */ 84 if (scp != NULL) { 85 /* 86 * Copy the signal context to the current thread's jump 87 * buffer: 88 */ 89 memcpy(&_thread_run->saved_sigcontext, scp, sizeof(_thread_run->saved_sigcontext)); 90 91#ifndef __alpha__ 92 /* Point to the floating point data in the running thread: */ 93 fdata = _thread_run->saved_fp; 94 95 /* Save the floating point data: */ 96__asm__("fnsave %0": :"m"(*fdata)); 97#endif 98 99 /* Flag the signal context as the last state saved: */ 100 _thread_run->sig_saved = 1; 101 } 102 /* Save the state of the current thread: */ 103 else if (setjmp(_thread_run->saved_jmp_buf) != 0) { 104 /* 105 * This point is reached when a longjmp() is called to 106 * restore the state of a thread. 107 * 108 * This is the normal way out of the scheduler. 109 */ 110 _thread_kern_in_sched = 0; 111 112 if (_sched_switch_hook != NULL) { 113 /* Run the installed switch hook: */ 114 thread_run_switch_hook(_last_user_thread, _thread_run); 115 } 116 117 return; 118 } else 119 /* Flag the jump buffer was the last state saved: */ 120 _thread_run->sig_saved = 0; 121 122 /* If the currently running thread is a user thread, save it: */ 123 if ((_thread_run->flags & PTHREAD_FLAGS_PRIVATE) == 0) 124 _last_user_thread = _thread_run; 125 126 /* 127 * Enter a scheduling loop that finds the next thread that is 128 * ready to run. This loop completes when there are no more threads 129 * in the global list or when a thread has its state restored by 130 * either a sigreturn (if the state was saved as a sigcontext) or a 131 * longjmp (if the state was saved by a setjmp). 132 */ 133 while (!(TAILQ_EMPTY(&_thread_list))) { 134 /* Get the current time of day: */ 135 gettimeofday(&tv, NULL); 136 TIMEVAL_TO_TIMESPEC(&tv, &ts); 137 138 /* 139 * Protect the scheduling queues from access by the signal 140 * handler. 141 */ 142 _queue_signals = 1; 143 144 if (_thread_run != &_thread_kern_thread) { 145 146 /* 147 * This thread no longer needs to yield the CPU. 148 */ 149 _thread_run->yield_on_sig_undefer = 0; 150 151 /* 152 * Save the current time as the time that the thread 153 * became inactive: 154 */ 155 _thread_run->last_inactive.tv_sec = tv.tv_sec; 156 _thread_run->last_inactive.tv_usec = tv.tv_usec; 157 158 /* 159 * Place the currently running thread into the 160 * appropriate queue(s). 161 */ 162 switch (_thread_run->state) { 163 case PS_DEAD: 164 /* 165 * Dead threads are not placed in any queue: 166 */ 167 break; 168 169 case PS_RUNNING: 170 /* 171 * Runnable threads can't be placed in the 172 * priority queue until after waiting threads 173 * are polled (to preserve round-robin 174 * scheduling). 175 */ 176 if ((_thread_run->slice_usec != -1) && 177 (_thread_run->attr.sched_policy != SCHED_FIFO)) { 178 /* 179 * Accumulate the number of microseconds that 180 * this thread has run for: 181 */ 182 _thread_run->slice_usec += 183 (_thread_run->last_inactive.tv_sec - 184 _thread_run->last_active.tv_sec) * 1000000 + 185 _thread_run->last_inactive.tv_usec - 186 _thread_run->last_active.tv_usec; 187 188 /* Check for time quantum exceeded: */ 189 if (_thread_run->slice_usec > TIMESLICE_USEC) 190 _thread_run->slice_usec = -1; 191 } 192 break; 193 194 /* 195 * States which do not depend on file descriptor I/O 196 * operations or timeouts: 197 */ 198 case PS_DEADLOCK: 199 case PS_FDLR_WAIT: 200 case PS_FDLW_WAIT: 201 case PS_FILE_WAIT: 202 case PS_JOIN: 203 case PS_MUTEX_WAIT: 204 case PS_SIGSUSPEND: 205 case PS_SIGTHREAD: 206 case PS_SIGWAIT: 207 case PS_SUSPENDED: 208 case PS_WAIT_WAIT: 209 /* No timeouts for these states: */ 210 _thread_run->wakeup_time.tv_sec = -1; 211 _thread_run->wakeup_time.tv_nsec = -1; 212 213 /* Restart the time slice: */ 214 _thread_run->slice_usec = -1; 215 216 /* Insert into the waiting queue: */ 217 PTHREAD_WAITQ_INSERT(_thread_run); 218 break; 219 220 /* States which can timeout: */ 221 case PS_COND_WAIT: 222 case PS_SLEEP_WAIT: 223 /* Restart the time slice: */ 224 _thread_run->slice_usec = -1; 225 226 /* Insert into the waiting queue: */ 227 PTHREAD_WAITQ_INSERT(_thread_run); 228 break; 229 230 /* States that require periodic work: */ 231 case PS_SPINBLOCK: 232 /* No timeouts for this state: */ 233 _thread_run->wakeup_time.tv_sec = -1; 234 _thread_run->wakeup_time.tv_nsec = -1; 235 236 /* Increment spinblock count: */ 237 _spinblock_count++; 238 239 /* fall through */ 240 case PS_FDR_WAIT: 241 case PS_FDW_WAIT: 242 case PS_POLL_WAIT: 243 case PS_SELECT_WAIT: 244 /* Restart the time slice: */ 245 _thread_run->slice_usec = -1; 246 247 /* Insert into the waiting queue: */ 248 PTHREAD_WAITQ_INSERT(_thread_run); 249 250 /* Insert into the work queue: */ 251 PTHREAD_WORKQ_INSERT(_thread_run); 252 } 253 } 254 255 /* Unprotect the scheduling queues: */ 256 _queue_signals = 0; 257 258 /* 259 * Poll file descriptors to update the state of threads 260 * waiting on file I/O where data may be available: 261 */ 262 _thread_kern_poll(0); 263 264 /* Protect the scheduling queues: */ 265 _queue_signals = 1; 266 267 /* 268 * Wake up threads that have timedout. This has to be 269 * done after polling in case a thread does a poll or 270 * select with zero time. 271 */ 272 PTHREAD_WAITQ_SETACTIVE(); 273 while (((pthread = TAILQ_FIRST(&_waitingq)) != NULL) && 274 (pthread->wakeup_time.tv_sec != -1) && 275 (((pthread->wakeup_time.tv_sec == 0) && 276 (pthread->wakeup_time.tv_nsec == 0)) || 277 (pthread->wakeup_time.tv_sec < ts.tv_sec) || 278 ((pthread->wakeup_time.tv_sec == ts.tv_sec) && 279 (pthread->wakeup_time.tv_nsec <= ts.tv_nsec)))) { 280 switch (pthread->state) { 281 case PS_POLL_WAIT: 282 case PS_SELECT_WAIT: 283 /* Return zero file descriptors ready: */ 284 pthread->data.poll_data->nfds = 0; 285 /* fall through */ 286 default: 287 /* 288 * Remove this thread from the waiting queue 289 * (and work queue if necessary) and place it 290 * in the ready queue. 291 */ 292 PTHREAD_WAITQ_CLEARACTIVE(); 293 if (pthread->flags & PTHREAD_FLAGS_IN_WORKQ) 294 PTHREAD_WORKQ_REMOVE(pthread); 295 PTHREAD_NEW_STATE(pthread, PS_RUNNING); 296 PTHREAD_WAITQ_SETACTIVE(); 297 break; 298 } 299 /* 300 * Flag the timeout in the thread structure: 301 */ 302 pthread->timeout = 1; 303 } 304 PTHREAD_WAITQ_CLEARACTIVE(); 305 306 /* 307 * Check if there is a current runnable thread that isn't 308 * already in the ready queue: 309 */ 310 if ((_thread_run != &_thread_kern_thread) && 311 (_thread_run->state == PS_RUNNING) && 312 ((_thread_run->flags & PTHREAD_FLAGS_IN_PRIOQ) == 0)) { 313 if (_thread_run->slice_usec == -1) { 314 /* 315 * The thread exceeded its time 316 * quantum or it yielded the CPU; 317 * place it at the tail of the 318 * queue for its priority. 319 */ 320 PTHREAD_PRIOQ_INSERT_TAIL(_thread_run); 321 } else { 322 /* 323 * The thread hasn't exceeded its 324 * interval. Place it at the head 325 * of the queue for its priority. 326 */ 327 PTHREAD_PRIOQ_INSERT_HEAD(_thread_run); 328 } 329 } 330 331 /* 332 * Get the highest priority thread in the ready queue. 333 */ 334 pthread_h = PTHREAD_PRIOQ_FIRST(); 335 336 /* Check if there are no threads ready to run: */ 337 if (pthread_h == NULL) { 338 /* 339 * Lock the pthread kernel by changing the pointer to 340 * the running thread to point to the global kernel 341 * thread structure: 342 */ 343 _thread_run = &_thread_kern_thread; 344 345 /* Unprotect the scheduling queues: */ 346 _queue_signals = 0; 347 348 /* 349 * There are no threads ready to run, so wait until 350 * something happens that changes this condition: 351 */ 352 _thread_kern_poll(1); 353 } 354 else { 355 /* Remove the thread from the ready queue: */ 356 PTHREAD_PRIOQ_REMOVE(pthread_h); 357 358 /* Get first thread on the waiting list: */ 359 pthread = TAILQ_FIRST(&_waitingq); 360 361 /* Check to see if there is more than one thread: */ 362 if (pthread_h != TAILQ_FIRST(&_thread_list) || 363 TAILQ_NEXT(pthread_h, tle) != NULL) 364 set_timer = 1; 365 else 366 set_timer = 0; 367 368 /* Unprotect the scheduling queues: */ 369 _queue_signals = 0; 370 371 /* 372 * Check for signals queued while the scheduling 373 * queues were protected: 374 */ 375 while (_sigq_check_reqd != 0) { 376 /* Clear before handling queued signals: */ 377 _sigq_check_reqd = 0; 378 379 /* Protect the scheduling queues again: */ 380 _queue_signals = 1; 381 382 dequeue_signals(); 383 384 /* 385 * Check for a higher priority thread that 386 * became runnable due to signal handling. 387 */ 388 if (((pthread = PTHREAD_PRIOQ_FIRST()) != NULL) && 389 (pthread->active_priority > pthread_h->active_priority)) { 390 /* 391 * Insert the lower priority thread 392 * at the head of its priority list: 393 */ 394 PTHREAD_PRIOQ_INSERT_HEAD(pthread_h); 395 396 /* Remove the thread from the ready queue: */ 397 PTHREAD_PRIOQ_REMOVE(pthread); 398 399 /* There's a new thread in town: */ 400 pthread_h = pthread; 401 } 402 403 /* Get first thread on the waiting list: */ 404 pthread = TAILQ_FIRST(&_waitingq); 405 406 /* 407 * Check to see if there is more than one 408 * thread: 409 */ 410 if (pthread_h != TAILQ_FIRST(&_thread_list) || 411 TAILQ_NEXT(pthread_h, tle) != NULL) 412 set_timer = 1; 413 else 414 set_timer = 0; 415 416 /* Unprotect the scheduling queues: */ 417 _queue_signals = 0; 418 } 419 420 /* Make the selected thread the current thread: */ 421 _thread_run = pthread_h; 422 423 /* 424 * Save the current time as the time that the thread 425 * became active: 426 */ 427 _thread_run->last_active.tv_sec = tv.tv_sec; 428 _thread_run->last_active.tv_usec = tv.tv_usec; 429 430 /* 431 * Define the maximum time before a scheduling signal 432 * is required: 433 */ 434 itimer.it_value.tv_sec = 0; 435 itimer.it_value.tv_usec = TIMESLICE_USEC; 436 437 /* 438 * The interval timer is not reloaded when it 439 * times out. The interval time needs to be 440 * calculated every time. 441 */ 442 itimer.it_interval.tv_sec = 0; 443 itimer.it_interval.tv_usec = 0; 444 445 /* Get first thread on the waiting list: */ 446 if ((pthread != NULL) && 447 (pthread->wakeup_time.tv_sec != -1)) { 448 /* 449 * Calculate the time until this thread 450 * is ready, allowing for the clock 451 * resolution: 452 */ 453 ts1.tv_sec = pthread->wakeup_time.tv_sec 454 - ts.tv_sec; 455 ts1.tv_nsec = pthread->wakeup_time.tv_nsec 456 - ts.tv_nsec + _clock_res_nsec; 457 458 /* 459 * Check for underflow of the nanosecond field: 460 */ 461 while (ts1.tv_nsec < 0) { 462 /* 463 * Allow for the underflow of the 464 * nanosecond field: 465 */ 466 ts1.tv_sec--; 467 ts1.tv_nsec += 1000000000; 468 } 469 /* 470 * Check for overflow of the nanosecond field: 471 */ 472 while (ts1.tv_nsec >= 1000000000) { 473 /* 474 * Allow for the overflow of the 475 * nanosecond field: 476 */ 477 ts1.tv_sec++; 478 ts1.tv_nsec -= 1000000000; 479 } 480 /* 481 * Convert the timespec structure to a 482 * timeval structure: 483 */ 484 TIMESPEC_TO_TIMEVAL(&tv1, &ts1); 485 486 /* 487 * Check if the thread will be ready 488 * sooner than the earliest ones found 489 * so far: 490 */ 491 if (timercmp(&tv1, &itimer.it_value, <)) { 492 /* 493 * Update the time value: 494 */ 495 itimer.it_value.tv_sec = tv1.tv_sec; 496 itimer.it_value.tv_usec = tv1.tv_usec; 497 } 498 } 499 500 /* 501 * Check if this thread is running for the first time 502 * or running again after using its full time slice 503 * allocation: 504 */ 505 if (_thread_run->slice_usec == -1) { 506 /* Reset the accumulated time slice period: */ 507 _thread_run->slice_usec = 0; 508 } 509 510 /* Check if there is more than one thread: */ 511 if (set_timer != 0) { 512 /* 513 * Start the interval timer for the 514 * calculated time interval: 515 */ 516 if (setitimer(_ITIMER_SCHED_TIMER, &itimer, NULL) != 0) { 517 /* 518 * Cannot initialise the timer, so 519 * abort this process: 520 */ 521 PANIC("Cannot set scheduling timer"); 522 } 523 } 524 525 /* Check if a signal context was saved: */ 526 if (_thread_run->sig_saved == 1) { 527#ifndef __alpha__ 528 /* 529 * Point to the floating point data in the 530 * running thread: 531 */ 532 fdata = _thread_run->saved_fp; 533 534 /* Restore the floating point state: */ 535 __asm__("frstor %0": :"m"(*fdata)); 536#endif 537 /* 538 * Do a sigreturn to restart the thread that 539 * was interrupted by a signal: 540 */ 541 _thread_kern_in_sched = 0; 542 543 /* 544 * If we had a context switch, run any 545 * installed switch hooks. 546 */ 547 if ((_sched_switch_hook != NULL) && 548 (_last_user_thread != _thread_run)) { 549 thread_run_switch_hook(_last_user_thread, 550 _thread_run); 551 } 552 _thread_sys_sigreturn(&_thread_run->saved_sigcontext); 553 } else { 554 /* 555 * Do a longjmp to restart the thread that 556 * was context switched out (by a longjmp to 557 * a different thread): 558 */ 559 longjmp(_thread_run->saved_jmp_buf, 1); 560 } 561 562 /* This point should not be reached. */ 563 PANIC("Thread has returned from sigreturn or longjmp"); 564 } 565 } 566 567 /* There are no more threads, so exit this process: */ 568 exit(0); 569} 570 571void 572_thread_kern_sched_state(enum pthread_state state, char *fname, int lineno) 573{ 574 /* 575 * Flag the pthread kernel as executing scheduler code 576 * to avoid a scheduler signal from interrupting this 577 * execution and calling the scheduler again. 578 */ 579 _thread_kern_in_sched = 1; 580 581 /* 582 * Prevent the signal handler from fiddling with this thread 583 * before its state is set and is placed into the proper queue. 584 */ 585 _queue_signals = 1; 586 587 /* Change the state of the current thread: */ 588 _thread_run->state = state; 589 _thread_run->fname = fname; 590 _thread_run->lineno = lineno; 591 592 /* Schedule the next thread that is ready: */ 593 _thread_kern_sched(NULL); 594 return; 595} 596 597void 598_thread_kern_sched_state_unlock(enum pthread_state state, 599 spinlock_t *lock, char *fname, int lineno) 600{ 601 /* 602 * Flag the pthread kernel as executing scheduler code 603 * to avoid a scheduler signal from interrupting this 604 * execution and calling the scheduler again. 605 */ 606 _thread_kern_in_sched = 1; 607 608 /* 609 * Prevent the signal handler from fiddling with this thread 610 * before its state is set and it is placed into the proper 611 * queue(s). 612 */ 613 _queue_signals = 1; 614 615 /* Change the state of the current thread: */ 616 _thread_run->state = state; 617 _thread_run->fname = fname; 618 _thread_run->lineno = lineno; 619 620 _SPINUNLOCK(lock); 621 622 /* Schedule the next thread that is ready: */ 623 _thread_kern_sched(NULL); 624 return; 625} 626 627static void 628_thread_kern_poll(int wait_reqd) 629{ 630 char bufr[128]; 631 int count = 0; 632 int i, found; 633 int kern_pipe_added = 0; 634 int nfds = 0; 635 int timeout_ms = 0; 636 struct pthread *pthread, *pthread_next; 637 ssize_t num; 638 struct timespec ts; 639 struct timeval tv; 640 641 /* Check if the caller wants to wait: */ 642 if (wait_reqd == 0) { 643 timeout_ms = 0; 644 } 645 else { 646 /* Get the current time of day: */ 647 gettimeofday(&tv, NULL); 648 TIMEVAL_TO_TIMESPEC(&tv, &ts); 649 650 _queue_signals = 1; 651 pthread = TAILQ_FIRST(&_waitingq); 652 _queue_signals = 0; 653 654 if ((pthread == NULL) || (pthread->wakeup_time.tv_sec == -1)) { 655 /* 656 * Either there are no threads in the waiting queue, 657 * or there are no threads that can timeout. 658 */ 659 timeout_ms = INFTIM; 660 } 661 else { 662 /* 663 * Calculate the time left for the next thread to 664 * timeout allowing for the clock resolution: 665 */ 666 timeout_ms = ((pthread->wakeup_time.tv_sec - ts.tv_sec) * 667 1000) + ((pthread->wakeup_time.tv_nsec - ts.tv_nsec + 668 _clock_res_nsec) / 1000000); 669 /* 670 * Don't allow negative timeouts: 671 */ 672 if (timeout_ms < 0) 673 timeout_ms = 0; 674 } 675 } 676 677 /* Protect the scheduling queues: */ 678 _queue_signals = 1; 679 680 /* 681 * Check to see if the signal queue needs to be walked to look 682 * for threads awoken by a signal while in the scheduler. 683 */ 684 if (_sigq_check_reqd != 0) { 685 /* Reset flag before handling queued signals: */ 686 _sigq_check_reqd = 0; 687 688 dequeue_signals(); 689 } 690 691 /* 692 * Check for a thread that became runnable due to a signal: 693 */ 694 if (PTHREAD_PRIOQ_FIRST() != NULL) { 695 /* 696 * Since there is at least one runnable thread, 697 * disable the wait. 698 */ 699 timeout_ms = 0; 700 } 701 702 /* 703 * Form the poll table: 704 */ 705 nfds = 0; 706 if (timeout_ms != 0) { 707 /* Add the kernel pipe to the poll table: */ 708 _thread_pfd_table[nfds].fd = _thread_kern_pipe[0]; 709 _thread_pfd_table[nfds].events = POLLRDNORM; 710 _thread_pfd_table[nfds].revents = 0; 711 nfds++; 712 kern_pipe_added = 1; 713 } 714 715 PTHREAD_WAITQ_SETACTIVE(); 716 TAILQ_FOREACH(pthread, &_workq, qe) { 717 switch (pthread->state) { 718 case PS_SPINBLOCK: 719 /* 720 * If the lock is available, let the thread run. 721 */ 722 if (pthread->data.spinlock->access_lock == 0) { 723 PTHREAD_WAITQ_CLEARACTIVE(); 724 PTHREAD_WORKQ_REMOVE(pthread); 725 PTHREAD_NEW_STATE(pthread,PS_RUNNING); 726 PTHREAD_WAITQ_SETACTIVE(); 727 /* One less thread in a spinblock state: */ 728 _spinblock_count--; 729 /* 730 * Since there is at least one runnable 731 * thread, disable the wait. 732 */ 733 timeout_ms = 0; 734 } 735 break; 736 737 /* File descriptor read wait: */ 738 case PS_FDR_WAIT: 739 /* Limit number of polled files to table size: */ 740 if (nfds < _thread_dtablesize) { 741 _thread_pfd_table[nfds].events = POLLRDNORM; 742 _thread_pfd_table[nfds].fd = pthread->data.fd.fd; 743 nfds++; 744 } 745 break; 746 747 /* File descriptor write wait: */ 748 case PS_FDW_WAIT: 749 /* Limit number of polled files to table size: */ 750 if (nfds < _thread_dtablesize) { 751 _thread_pfd_table[nfds].events = POLLWRNORM; 752 _thread_pfd_table[nfds].fd = pthread->data.fd.fd; 753 nfds++; 754 } 755 break; 756 757 /* File descriptor poll or select wait: */ 758 case PS_POLL_WAIT: 759 case PS_SELECT_WAIT: 760 /* Limit number of polled files to table size: */ 761 if (pthread->data.poll_data->nfds + nfds < 762 _thread_dtablesize) { 763 for (i = 0; i < pthread->data.poll_data->nfds; i++) { 764 _thread_pfd_table[nfds + i].fd = 765 pthread->data.poll_data->fds[i].fd; 766 _thread_pfd_table[nfds + i].events = 767 pthread->data.poll_data->fds[i].events; 768 } 769 nfds += pthread->data.poll_data->nfds; 770 } 771 break; 772 773 /* Other states do not depend on file I/O. */ 774 default: 775 break; 776 } 777 } 778 PTHREAD_WAITQ_CLEARACTIVE(); 779 780 /* 781 * Wait for a file descriptor to be ready for read, write, or 782 * an exception, or a timeout to occur: 783 */ 784 count = _thread_sys_poll(_thread_pfd_table, nfds, timeout_ms); 785 786 if (kern_pipe_added != 0) 787 /* 788 * Remove the pthread kernel pipe file descriptor 789 * from the pollfd table: 790 */ 791 nfds = 1; 792 else 793 nfds = 0; 794 795 /* 796 * Check if it is possible that there are bytes in the kernel 797 * read pipe waiting to be read: 798 */ 799 if (count < 0 || ((kern_pipe_added != 0) && 800 (_thread_pfd_table[0].revents & POLLRDNORM))) { 801 /* 802 * If the kernel read pipe was included in the 803 * count: 804 */ 805 if (count > 0) { 806 /* Decrement the count of file descriptors: */ 807 count--; 808 } 809 810 if (_sigq_check_reqd != 0) { 811 /* Reset flag before handling signals: */ 812 _sigq_check_reqd = 0; 813 814 dequeue_signals(); 815 } 816 } 817 818 /* 819 * Check if any file descriptors are ready: 820 */ 821 if (count > 0) { 822 /* 823 * Enter a loop to look for threads waiting on file 824 * descriptors that are flagged as available by the 825 * _poll syscall: 826 */ 827 PTHREAD_WAITQ_SETACTIVE(); 828 TAILQ_FOREACH(pthread, &_workq, qe) { 829 switch (pthread->state) { 830 case PS_SPINBLOCK: 831 /* 832 * If the lock is available, let the thread run. 833 */ 834 if (pthread->data.spinlock->access_lock == 0) { 835 PTHREAD_WAITQ_CLEARACTIVE(); 836 PTHREAD_WORKQ_REMOVE(pthread); 837 PTHREAD_NEW_STATE(pthread,PS_RUNNING); 838 PTHREAD_WAITQ_SETACTIVE(); 839 840 /* 841 * One less thread in a spinblock state: 842 */ 843 _spinblock_count--; 844 } 845 break; 846 847 /* File descriptor read wait: */ 848 case PS_FDR_WAIT: 849 if ((nfds < _thread_dtablesize) && 850 (_thread_pfd_table[nfds].revents & POLLRDNORM)) { 851 PTHREAD_WAITQ_CLEARACTIVE(); 852 PTHREAD_WORKQ_REMOVE(pthread); 853 PTHREAD_NEW_STATE(pthread,PS_RUNNING); 854 PTHREAD_WAITQ_SETACTIVE(); 855 } 856 nfds++; 857 break; 858 859 /* File descriptor write wait: */ 860 case PS_FDW_WAIT: 861 if ((nfds < _thread_dtablesize) && 862 (_thread_pfd_table[nfds].revents & POLLWRNORM)) { 863 PTHREAD_WAITQ_CLEARACTIVE(); 864 PTHREAD_WORKQ_REMOVE(pthread); 865 PTHREAD_NEW_STATE(pthread,PS_RUNNING); 866 PTHREAD_WAITQ_SETACTIVE(); 867 } 868 nfds++; 869 break; 870 871 /* File descriptor poll or select wait: */ 872 case PS_POLL_WAIT: 873 case PS_SELECT_WAIT: 874 if (pthread->data.poll_data->nfds + nfds < 875 _thread_dtablesize) { 876 /* 877 * Enter a loop looking for I/O 878 * readiness: 879 */ 880 found = 0; 881 for (i = 0; i < pthread->data.poll_data->nfds; i++) { 882 if (_thread_pfd_table[nfds + i].revents != 0) { 883 pthread->data.poll_data->fds[i].revents = 884 _thread_pfd_table[nfds + i].revents; 885 found++; 886 } 887 } 888 889 /* Increment before destroying: */ 890 nfds += pthread->data.poll_data->nfds; 891 892 if (found != 0) { 893 pthread->data.poll_data->nfds = found; 894 PTHREAD_WAITQ_CLEARACTIVE(); 895 PTHREAD_WORKQ_REMOVE(pthread); 896 PTHREAD_NEW_STATE(pthread,PS_RUNNING); 897 PTHREAD_WAITQ_SETACTIVE(); 898 } 899 } 900 else 901 nfds += pthread->data.poll_data->nfds; 902 break; 903 904 /* Other states do not depend on file I/O. */ 905 default: 906 break; 907 } 908 } 909 PTHREAD_WAITQ_CLEARACTIVE(); 910 } 911 else if (_spinblock_count != 0) { 912 /* 913 * Enter a loop to look for threads waiting on a spinlock 914 * that is now available. 915 */ 916 PTHREAD_WAITQ_SETACTIVE(); 917 TAILQ_FOREACH(pthread, &_workq, qe) { 918 if (pthread->state == PS_SPINBLOCK) { 919 /* 920 * If the lock is available, let the thread run. 921 */ 922 if (pthread->data.spinlock->access_lock == 0) { 923 PTHREAD_WAITQ_CLEARACTIVE(); 924 PTHREAD_WORKQ_REMOVE(pthread); 925 PTHREAD_NEW_STATE(pthread,PS_RUNNING); 926 PTHREAD_WAITQ_SETACTIVE(); 927 928 /* 929 * One less thread in a spinblock state: 930 */ 931 _spinblock_count--; 932 } 933 } 934 } 935 PTHREAD_WAITQ_CLEARACTIVE(); 936 } 937 938 /* Unprotect the scheduling queues: */ 939 _queue_signals = 0; 940 941 while (_sigq_check_reqd != 0) { 942 /* Handle queued signals: */ 943 _sigq_check_reqd = 0; 944 945 /* Protect the scheduling queues: */ 946 _queue_signals = 1; 947 948 dequeue_signals(); 949 950 /* Unprotect the scheduling queues: */ 951 _queue_signals = 0; 952 } 953 954 /* Nothing to return. */ 955 return; 956} 957 958void 959_thread_kern_set_timeout(struct timespec * timeout) 960{ 961 struct timespec current_time; 962 struct timeval tv; 963 964 /* Reset the timeout flag for the running thread: */ 965 _thread_run->timeout = 0; 966 967 /* Check if the thread is to wait forever: */ 968 if (timeout == NULL) { 969 /* 970 * Set the wakeup time to something that can be recognised as 971 * different to an actual time of day: 972 */ 973 _thread_run->wakeup_time.tv_sec = -1; 974 _thread_run->wakeup_time.tv_nsec = -1; 975 } 976 /* Check if no waiting is required: */ 977 else if (timeout->tv_sec == 0 && timeout->tv_nsec == 0) { 978 /* Set the wake up time to 'immediately': */ 979 _thread_run->wakeup_time.tv_sec = 0; 980 _thread_run->wakeup_time.tv_nsec = 0; 981 } else { 982 /* Get the current time: */ 983 gettimeofday(&tv, NULL); 984 TIMEVAL_TO_TIMESPEC(&tv, ¤t_time); 985 986 /* Calculate the time for the current thread to wake up: */ 987 _thread_run->wakeup_time.tv_sec = current_time.tv_sec + timeout->tv_sec; 988 _thread_run->wakeup_time.tv_nsec = current_time.tv_nsec + timeout->tv_nsec; 989 990 /* Check if the nanosecond field needs to wrap: */ 991 if (_thread_run->wakeup_time.tv_nsec >= 1000000000) { 992 /* Wrap the nanosecond field: */ 993 _thread_run->wakeup_time.tv_sec += 1; 994 _thread_run->wakeup_time.tv_nsec -= 1000000000; 995 } 996 } 997 return; 998} 999 1000void 1001_thread_kern_sig_defer(void) 1002{ 1003 /* Allow signal deferral to be recursive. */ 1004 _thread_run->sig_defer_count++; 1005} 1006 1007void 1008_thread_kern_sig_undefer(void) 1009{ 1010 pthread_t pthread; 1011 int need_resched = 0; 1012 1013 /* 1014 * Perform checks to yield only if we are about to undefer 1015 * signals. 1016 */ 1017 if (_thread_run->sig_defer_count > 1) { 1018 /* Decrement the signal deferral count. */ 1019 _thread_run->sig_defer_count--; 1020 } 1021 else if (_thread_run->sig_defer_count == 1) { 1022 /* Reenable signals: */ 1023 _thread_run->sig_defer_count = 0; 1024 1025 /* 1026 * Check if there are queued signals: 1027 */ 1028 while (_sigq_check_reqd != 0) { 1029 /* Defer scheduling while we process queued signals: */ 1030 _thread_run->sig_defer_count = 1; 1031 1032 /* Clear the flag before checking the signal queue: */ 1033 _sigq_check_reqd = 0; 1034 1035 /* Dequeue and handle signals: */ 1036 dequeue_signals(); 1037 1038 /* 1039 * Avoiding an unnecessary check to reschedule, check 1040 * to see if signal handling caused a higher priority 1041 * thread to become ready. 1042 */ 1043 if ((need_resched == 0) && 1044 (((pthread = PTHREAD_PRIOQ_FIRST()) != NULL) && 1045 (pthread->active_priority > _thread_run->active_priority))) { 1046 need_resched = 1; 1047 } 1048 1049 /* Reenable signals: */ 1050 _thread_run->sig_defer_count = 0; 1051 } 1052 1053 /* Yield the CPU if necessary: */ 1054 if (need_resched || _thread_run->yield_on_sig_undefer != 0) { 1055 _thread_run->yield_on_sig_undefer = 0; 1056 _thread_kern_sched(NULL); 1057 } 1058 } 1059} 1060 1061static void 1062dequeue_signals(void) 1063{ 1064 char bufr[128]; 1065 int i, num; 1066 1067 /* 1068 * Enter a loop to read and handle queued signals from the 1069 * pthread kernel pipe: 1070 */ 1071 while (((num = _thread_sys_read(_thread_kern_pipe[0], bufr, 1072 sizeof(bufr))) > 0) || (num == -1 && errno == EINTR)) { 1073 /* 1074 * The buffer read contains one byte per signal and 1075 * each byte is the signal number. 1076 */ 1077 for (i = 0; i < num; i++) { 1078 if ((int) bufr[i] == _SCHED_SIGNAL) { 1079 /* 1080 * Scheduling signals shouldn't ever be 1081 * queued; just ignore it for now. 1082 */ 1083 } 1084 else { 1085 /* Handle this signal: */ 1086 _thread_sig_handle((int) bufr[i], NULL); 1087 } 1088 } 1089 } 1090 if ((num < 0) && (errno != EAGAIN)) { 1091 /* 1092 * The only error we should expect is if there is 1093 * no data to read. 1094 */ 1095 PANIC("Unable to read from thread kernel pipe"); 1096 } 1097} 1098 1099static inline void 1100thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in) 1101{ 1102 pthread_t tid_out = thread_out; 1103 pthread_t tid_in = thread_in; 1104 1105 if ((tid_out != NULL) && 1106 (tid_out->flags & PTHREAD_FLAGS_PRIVATE != 0)) 1107 tid_out = NULL; 1108 if ((tid_in != NULL) && 1109 (tid_in->flags & PTHREAD_FLAGS_PRIVATE != 0)) 1110 tid_in = NULL; 1111 1112 if ((_sched_switch_hook != NULL) && (tid_out != tid_in)) { 1113 /* Run the scheduler switch hook: */ 1114 _sched_switch_hook(tid_out, tid_in); 1115 } 1116} 1117#endif 1118