kern_thread.c revision 108640
18876Srgrimes/* 24Srgrimes * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>. 34Srgrimes * All rights reserved. 44Srgrimes * 58876Srgrimes * Redistribution and use in source and binary forms, with or without 64Srgrimes * modification, are permitted provided that the following conditions 74Srgrimes * are met: 84Srgrimes * 1. Redistributions of source code must retain the above copyright 94Srgrimes * notice(s), this list of conditions and the following disclaimer as 104Srgrimes * the first lines of this file unmodified other than the possible 118876Srgrimes * addition of one or more copyright notices. 128876Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 134Srgrimes * notice(s), this list of conditions and the following disclaimer in the 144Srgrimes * documentation and/or other materials provided with the distribution. 158876Srgrimes * 164Srgrimes * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY 178876Srgrimes * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 184Srgrimes * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 194Srgrimes * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 204Srgrimes * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 214Srgrimes * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 228876Srgrimes * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 234Srgrimes * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 244Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 254Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 2612720Sphk * DAMAGE. 274Srgrimes * 28623Srgrimes * $FreeBSD: head/sys/kern/kern_thread.c 108640 2003-01-04 05:59:25Z davidxu $ 294Srgrimes */ 304Srgrimes 314Srgrimes#include <sys/param.h> 324Srgrimes#include <sys/systm.h> 334Srgrimes#include <sys/kernel.h> 344Srgrimes#include <sys/lock.h> 354Srgrimes#include <sys/malloc.h> 364Srgrimes#include <sys/mutex.h> 372056Swollman#include <sys/proc.h> 382056Swollman#include <sys/smp.h> 392056Swollman#include <sys/sysctl.h> 4012662Sdg#include <sys/sysproto.h> 4112662Sdg#include <sys/filedesc.h> 422056Swollman#include <sys/sched.h> 434Srgrimes#include <sys/signalvar.h> 444Srgrimes#include <sys/sx.h> 454Srgrimes#include <sys/tty.h> 464Srgrimes#include <sys/user.h> 474Srgrimes#include <sys/jail.h> 4812720Sphk#include <sys/kse.h> 494Srgrimes#include <sys/ktr.h> 504Srgrimes#include <sys/ucontext.h> 514Srgrimes 524Srgrimes#include <vm/vm.h> 534Srgrimes#include <vm/vm_object.h> 544Srgrimes#include <vm/pmap.h> 554Srgrimes#include <vm/uma.h> 564Srgrimes#include <vm/vm_map.h> 5712720Sphk 5812720Sphk#include <machine/frame.h> 5912720Sphk 604Srgrimes/* 614Srgrimes * KSEGRP related storage. 624Srgrimes */ 634Srgrimesstatic uma_zone_t ksegrp_zone; 644Srgrimesstatic uma_zone_t kse_zone; 654Srgrimesstatic uma_zone_t thread_zone; 6612473Sbde 674Srgrimes/* DEBUG ONLY */ 684SrgrimesSYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation"); 6912473Sbdestatic int thread_debug = 0; 704SrgrimesSYSCTL_INT(_kern_threads, OID_AUTO, debug, CTLFLAG_RW, 714Srgrimes &thread_debug, 0, "thread debug"); 7212515Sphk 7312515Sphkstatic int max_threads_per_proc = 30; 7412515SphkSYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW, 7512515Sphk &max_threads_per_proc, 0, "Limit on threads per proc"); 764Srgrimes 774Srgrimesstatic int max_groups_per_proc = 5; 784SrgrimesSYSCTL_INT(_kern_threads, OID_AUTO, max_groups_per_proc, CTLFLAG_RW, 794Srgrimes &max_groups_per_proc, 0, "Limit on thread groups per proc"); 804Srgrimes 814Srgrimes#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start)) 824Srgrimes 834Srgrimesstruct threadqueue zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads); 844SrgrimesTAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses); 854SrgrimesTAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps); 864Srgrimesstruct mtx zombie_thread_lock; 874SrgrimesMTX_SYSINIT(zombie_thread_lock, &zombie_thread_lock, 884Srgrimes "zombie_thread_lock", MTX_SPIN); 894Srgrimes 904Srgrimesstatic void kse_purge(struct proc *p, struct thread *td); 914Srgrimes 924Srgrimes/* 934Srgrimes * Prepare a thread for use. 944Srgrimes */ 954Srgrimesstatic void 964Srgrimesthread_ctor(void *mem, int size, void *arg) 974Srgrimes{ 984Srgrimes struct thread *td; 994Srgrimes 1004Srgrimes td = (struct thread *)mem; 1014Srgrimes td->td_state = TDS_INACTIVE; 1024Srgrimes td->td_flags |= TDF_UNBOUND; 1034Srgrimes} 1044Srgrimes 1054Srgrimes/* 1064Srgrimes * Reclaim a thread after use. 1074Srgrimes */ 1084Srgrimesstatic void 1094Srgrimesthread_dtor(void *mem, int size, void *arg) 1104Srgrimes{ 1114Srgrimes struct thread *td; 1128876Srgrimes 1134Srgrimes td = (struct thread *)mem; 1144Srgrimes 1154Srgrimes#ifdef INVARIANTS 1164Srgrimes /* Verify that this thread is in a safe state to free. */ 1174Srgrimes switch (td->td_state) { 1184Srgrimes case TDS_INHIBITED: 1194Srgrimes case TDS_RUNNING: 1204Srgrimes case TDS_CAN_RUN: 1214Srgrimes case TDS_RUNQ: 1224Srgrimes /* 1234Srgrimes * We must never unlink a thread that is in one of 1244Srgrimes * these states, because it is currently active. 1254Srgrimes */ 1264Srgrimes panic("bad state for thread unlinking"); 1274Srgrimes /* NOTREACHED */ 1284Srgrimes case TDS_INACTIVE: 1294Srgrimes break; 1304Srgrimes default: 1314Srgrimes panic("bad thread state"); 1324Srgrimes /* NOTREACHED */ 1334Srgrimes } 1344Srgrimes#endif 1354Srgrimes} 1364Srgrimes 1374Srgrimes/* 1384Srgrimes * Initialize type-stable parts of a thread (when newly created). 1394Srgrimes */ 1404Srgrimesstatic void 1414Srgrimesthread_init(void *mem, int size) 1424Srgrimes{ 1434Srgrimes struct thread *td; 1444Srgrimes 1454Srgrimes td = (struct thread *)mem; 1464Srgrimes mtx_lock(&Giant); 1474Srgrimes pmap_new_thread(td, 0); 1484Srgrimes mtx_unlock(&Giant); 1494Srgrimes cpu_thread_setup(td); 1504Srgrimes td->td_sched = (struct td_sched *)&td[1]; 1514Srgrimes} 1524Srgrimes 1534Srgrimes/* 1544Srgrimes * Tear down type-stable parts of a thread (just before being discarded). 1554Srgrimes */ 1564Srgrimesstatic void 1574Srgrimesthread_fini(void *mem, int size) 1584Srgrimes{ 1594Srgrimes struct thread *td; 1604Srgrimes 1614Srgrimes td = (struct thread *)mem; 1624Srgrimes pmap_dispose_thread(td); 1634Srgrimes} 1644Srgrimes/* 1654Srgrimes * Initialize type-stable parts of a kse (when newly created). 1664Srgrimes */ 1674Srgrimesstatic void 1684Srgrimeskse_init(void *mem, int size) 1694Srgrimes{ 1704Srgrimes struct kse *ke; 1714Srgrimes 1724Srgrimes ke = (struct kse *)mem; 1734Srgrimes ke->ke_sched = (struct ke_sched *)&ke[1]; 1744Srgrimes} 1754Srgrimes/* 1764Srgrimes * Initialize type-stable parts of a ksegrp (when newly created). 1774Srgrimes */ 1784Srgrimesstatic void 1794Srgrimesksegrp_init(void *mem, int size) 1804Srgrimes{ 1814Srgrimes struct ksegrp *kg; 1824Srgrimes 1834Srgrimes kg = (struct ksegrp *)mem; 1844Srgrimes kg->kg_sched = (struct kg_sched *)&kg[1]; 1854Srgrimes} 1864Srgrimes 1874Srgrimes/* 1884Srgrimes * KSE is linked onto the idle queue. 1894Srgrimes */ 1904Srgrimesvoid 1914Srgrimeskse_link(struct kse *ke, struct ksegrp *kg) 1924Srgrimes{ 1934Srgrimes struct proc *p = kg->kg_proc; 1944Srgrimes 1954Srgrimes TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist); 1964Srgrimes kg->kg_kses++; 1974Srgrimes ke->ke_state = KES_UNQUEUED; 1984Srgrimes ke->ke_proc = p; 1994Srgrimes ke->ke_ksegrp = kg; 2004Srgrimes ke->ke_owner = NULL; 2014Srgrimes ke->ke_thread = NULL; 2024Srgrimes ke->ke_oncpu = NOCPU; 2034Srgrimes} 2044Srgrimes 2054Srgrimesvoid 2064Srgrimeskse_unlink(struct kse *ke) 2074Srgrimes{ 2084Srgrimes struct ksegrp *kg; 2094Srgrimes 2104Srgrimes mtx_assert(&sched_lock, MA_OWNED); 2114Srgrimes kg = ke->ke_ksegrp; 2124Srgrimes 2134Srgrimes TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist); 2144Srgrimes if (--kg->kg_kses == 0) { 2154Srgrimes ksegrp_unlink(kg); 2164Srgrimes } 2174Srgrimes /* 21812515Sphk * Aggregate stats from the KSE 21912515Sphk */ 2204Srgrimes kse_stash(ke); 2214Srgrimes} 2224Srgrimes 2234Srgrimesvoid 2244Srgrimesksegrp_link(struct ksegrp *kg, struct proc *p) 2254Srgrimes{ 2264Srgrimes 2274Srgrimes TAILQ_INIT(&kg->kg_threads); 22812515Sphk TAILQ_INIT(&kg->kg_runq); /* links with td_runq */ 2294Srgrimes TAILQ_INIT(&kg->kg_slpq); /* links with td_runq */ 2304Srgrimes TAILQ_INIT(&kg->kg_kseq); /* all kses in ksegrp */ 2314Srgrimes TAILQ_INIT(&kg->kg_lq); /* loan kses in ksegrp */ 2324Srgrimes kg->kg_proc = p; 2334Srgrimes/* the following counters are in the -zero- section and may not need clearing */ 2344Srgrimes kg->kg_numthreads = 0; 2354Srgrimes kg->kg_runnable = 0; 2364Srgrimes kg->kg_kses = 0; 2374Srgrimes kg->kg_loan_kses = 0; 2384Srgrimes kg->kg_runq_kses = 0; /* XXXKSE change name */ 2394Srgrimes/* link it in now that it's consistent */ 2404Srgrimes p->p_numksegrps++; 2414Srgrimes TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp); 2424Srgrimes} 2434Srgrimes 2444Srgrimesvoid 2454Srgrimesksegrp_unlink(struct ksegrp *kg) 2464Srgrimes{ 2478876Srgrimes struct proc *p; 2484Srgrimes 2494Srgrimes mtx_assert(&sched_lock, MA_OWNED); 2504Srgrimes p = kg->kg_proc; 2514Srgrimes KASSERT(((kg->kg_numthreads == 0) && (kg->kg_kses == 0)), 2524Srgrimes ("kseg_unlink: residual threads or KSEs")); 2534Srgrimes TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp); 2544Srgrimes p->p_numksegrps--; 2554Srgrimes /* 2564Srgrimes * Aggregate stats from the KSE 2574Srgrimes */ 2584Srgrimes ksegrp_stash(kg); 2598876Srgrimes} 2604Srgrimes 2614Srgrimes/* 2624Srgrimes * for a newly created process, 2634Srgrimes * link up a the structure and its initial threads etc. 2644Srgrimes */ 2654Srgrimesvoid 2664Srgrimesproc_linkup(struct proc *p, struct ksegrp *kg, 2674Srgrimes struct kse *ke, struct thread *td) 2684Srgrimes{ 2694Srgrimes 2704Srgrimes TAILQ_INIT(&p->p_ksegrps); /* all ksegrps in proc */ 2714Srgrimes TAILQ_INIT(&p->p_threads); /* all threads in proc */ 2724Srgrimes TAILQ_INIT(&p->p_suspended); /* Threads suspended */ 2734Srgrimes p->p_numksegrps = 0; 2744Srgrimes p->p_numthreads = 0; 2754Srgrimes 2764Srgrimes ksegrp_link(kg, p); 2774Srgrimes kse_link(ke, kg); 2784Srgrimes thread_link(td, kg); 2794Srgrimes} 2804Srgrimes 2814Srgrimesint 2824Srgrimeskse_thr_interrupt(struct thread *td, struct kse_thr_interrupt_args *uap) 2834Srgrimes{ 2844Srgrimes struct proc *p; 2854Srgrimes struct thread *td2; 2864Srgrimes 2874Srgrimes p = td->td_proc; 2884Srgrimes /* KSE-enabled processes only, please. */ 2894Srgrimes if (!(p->p_flag & P_KSES)) 2904Srgrimes return (EINVAL); 2914Srgrimes if (uap->tmbx == NULL) 2924Srgrimes return (EINVAL); 2934Srgrimes mtx_lock_spin(&sched_lock); 2944Srgrimes FOREACH_THREAD_IN_PROC(p, td2) { 2954Srgrimes if (td2->td_mailbox == uap->tmbx) { 2964Srgrimes td2->td_flags |= TDF_INTERRUPT; 2974Srgrimes if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR)) { 2984Srgrimes if (td2->td_flags & TDF_CVWAITQ) 2994Srgrimes cv_abort(td2); 3004Srgrimes else 3014Srgrimes abortsleep(td2); 3024Srgrimes } 3034Srgrimes mtx_unlock_spin(&sched_lock); 3044Srgrimes return (0); 3054Srgrimes } 3064Srgrimes } 3074Srgrimes mtx_unlock_spin(&sched_lock); 3084Srgrimes return (ESRCH); 3094Srgrimes} 3104Srgrimes 3114Srgrimesint 3124Srgrimeskse_exit(struct thread *td, struct kse_exit_args *uap) 31312473Sbde{ 3144Srgrimes struct proc *p; 3154Srgrimes struct ksegrp *kg; 3164Srgrimes struct kse *ke; 3174Srgrimes 3184Srgrimes p = td->td_proc; 3194Srgrimes /* Only UTS can do the syscall */ 3204Srgrimes if (!(p->p_flag & P_KSES) || (td->td_mailbox != NULL)) 3214Srgrimes return (EINVAL); 3224Srgrimes kg = td->td_ksegrp; 3234Srgrimes /* serialize killing kse */ 3244Srgrimes PROC_LOCK(p); 3254Srgrimes mtx_lock_spin(&sched_lock); 3264Srgrimes if ((kg->kg_kses == 1) && (kg->kg_numthreads > 1)) { 3274Srgrimes mtx_unlock_spin(&sched_lock); 3284Srgrimes PROC_UNLOCK(p); 3294Srgrimes return (EDEADLK); 3304Srgrimes } 3314Srgrimes ke = td->td_kse; 3324Srgrimes if (p->p_numthreads == 1) { 3334Srgrimes ke->ke_flags &= ~KEF_DOUPCALL; 3344Srgrimes ke->ke_mailbox = NULL; 3354Srgrimes p->p_flag &= ~P_KSES; 3364Srgrimes mtx_unlock_spin(&sched_lock); 3374Srgrimes PROC_UNLOCK(p); 3384Srgrimes } else { 3394Srgrimes ke->ke_flags |= KEF_EXIT; 34012473Sbde thread_exit(); 3414Srgrimes /* NOTREACHED */ 3424Srgrimes } 3434Srgrimes return (0); 3444Srgrimes} 3454Srgrimes 3464Srgrimes/* 3474Srgrimes * Either becomes an upcall or waits for an awakening event and 3484Srgrimes * THEN becomes an upcall. Only error cases return. 3494Srgrimes */ 3504Srgrimesint 3514Srgrimeskse_release(struct thread * td, struct kse_release_args * uap) 3524Srgrimes{ 3534Srgrimes struct proc *p; 3544Srgrimes struct ksegrp *kg; 3554Srgrimes 3564Srgrimes p = td->td_proc; 3574Srgrimes kg = td->td_ksegrp; 3584Srgrimes /* 3594Srgrimes * kse must have a mailbox ready for upcall, and only UTS can 3604Srgrimes * do the syscall. 3614Srgrimes */ 36212473Sbde if (!(p->p_flag & P_KSES) || 3634Srgrimes (td->td_mailbox != NULL) || 3644Srgrimes (td->td_kse->ke_mailbox == NULL)) 3654Srgrimes return (EINVAL); 3664Srgrimes 3674Srgrimes PROC_LOCK(p); 3684Srgrimes mtx_lock_spin(&sched_lock); 3694Srgrimes /* Change OURSELF to become an upcall. */ 3704Srgrimes td->td_flags = TDF_UPCALLING; /* BOUND */ 3714Srgrimes if (!(td->td_kse->ke_flags & KEF_DOUPCALL) && 3724Srgrimes (kg->kg_completed == NULL)) { 3734Srgrimes /* XXXKSE also look for waiting signals etc. */ 3744Srgrimes /* 3754Srgrimes * The KSE will however be lendable. 3764Srgrimes */ 3774Srgrimes TD_SET_IDLE(td); 3784Srgrimes PROC_UNLOCK(p); 3794Srgrimes p->p_stats->p_ru.ru_nvcsw++; 3804Srgrimes mi_switch(); 3814Srgrimes mtx_unlock_spin(&sched_lock); 3824Srgrimes } else { 3834Srgrimes mtx_unlock_spin(&sched_lock); 3844Srgrimes PROC_UNLOCK(p); 3854Srgrimes } 38612473Sbde return (0); 3874Srgrimes} 3884Srgrimes 3894Srgrimes/* struct kse_wakeup_args { 3904Srgrimes struct kse_mailbox *mbx; 3914Srgrimes}; */ 3924Srgrimesint 3934Srgrimeskse_wakeup(struct thread *td, struct kse_wakeup_args *uap) 3944Srgrimes{ 3954Srgrimes struct proc *p; 3964Srgrimes struct kse *ke; 3974Srgrimes struct ksegrp *kg; 3984Srgrimes struct thread *td2; 3994Srgrimes 400 p = td->td_proc; 401 td2 = NULL; 402 /* KSE-enabled processes only, please. */ 403 if (!(p->p_flag & P_KSES)) 404 return EINVAL; 405 406 mtx_lock_spin(&sched_lock); 407 if (uap->mbx) { 408 FOREACH_KSEGRP_IN_PROC(p, kg) { 409 FOREACH_KSE_IN_GROUP(kg, ke) { 410 if (ke->ke_mailbox != uap->mbx) 411 continue; 412 td2 = ke->ke_owner; 413 KASSERT((td2 != NULL),("KSE with no owner")); 414 break; 415 } 416 if (td2) { 417 break; 418 } 419 } 420 } else { 421 /* 422 * look for any idle KSE to resurrect. 423 */ 424 kg = td->td_ksegrp; 425 FOREACH_KSE_IN_GROUP(kg, ke) { 426 td2 = ke->ke_owner; 427 KASSERT((td2 != NULL),("KSE with no owner2")); 428 if (TD_IS_IDLE(td2)) 429 break; 430 } 431 KASSERT((td2 != NULL), ("no thread(s)")); 432 } 433 if (td2) { 434 if (TD_IS_IDLE(td2)) { 435 TD_CLR_IDLE(td2); 436 setrunnable(td2); 437 } else if (td != td2) { 438 /* guarantee do an upcall ASAP */ 439 td2->td_kse->ke_flags |= KEF_DOUPCALL; 440 } 441 mtx_unlock_spin(&sched_lock); 442 return (0); 443 } 444 mtx_unlock_spin(&sched_lock); 445 return (ESRCH); 446} 447 448/* 449 * No new KSEG: first call: use current KSE, don't schedule an upcall 450 * All other situations, do allocate a new KSE and schedule an upcall on it. 451 */ 452/* struct kse_create_args { 453 struct kse_mailbox *mbx; 454 int newgroup; 455}; */ 456int 457kse_create(struct thread *td, struct kse_create_args *uap) 458{ 459 struct kse *newke; 460 struct kse *ke; 461 struct ksegrp *newkg; 462 struct ksegrp *kg; 463 struct proc *p; 464 struct kse_mailbox mbx; 465 int err; 466 467 p = td->td_proc; 468 if ((err = copyin(uap->mbx, &mbx, sizeof(mbx)))) 469 return (err); 470 471 p->p_flag |= P_KSES; /* easier to just set it than to test and set */ 472 kg = td->td_ksegrp; 473 if (uap->newgroup) { 474 if (p->p_numksegrps >= max_groups_per_proc) 475 return (EPROCLIM); 476 /* 477 * If we want a new KSEGRP it doesn't matter whether 478 * we have already fired up KSE mode before or not. 479 * We put the process in KSE mode and create a new KSEGRP 480 * and KSE. If our KSE has not got a mailbox yet then 481 * that doesn't matter, just leave it that way. It will 482 * ensure that this thread stay BOUND. It's possible 483 * that the call came form a threaded library and the main 484 * program knows nothing of threads. 485 */ 486 newkg = ksegrp_alloc(); 487 bzero(&newkg->kg_startzero, RANGEOF(struct ksegrp, 488 kg_startzero, kg_endzero)); 489 bcopy(&kg->kg_startcopy, &newkg->kg_startcopy, 490 RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy)); 491 newke = kse_alloc(); 492 } else { 493 /* 494 * Otherwise, if we have already set this KSE 495 * to have a mailbox, we want to make another KSE here, 496 * but only if there are not already the limit, which 497 * is 1 per CPU max. 498 * 499 * If the current KSE doesn't have a mailbox we just use it 500 * and give it one. 501 * 502 * Because we don't like to access 503 * the KSE outside of schedlock if we are UNBOUND, 504 * (because it can change if we are preempted by an interrupt) 505 * we can deduce it as having a mailbox if we are UNBOUND, 506 * and only need to actually look at it if we are BOUND, 507 * which is safe. 508 */ 509 if ((td->td_flags & TDF_UNBOUND) || td->td_kse->ke_mailbox) { 510 if (thread_debug == 0) { /* if debugging, allow more */ 511#ifdef SMP 512 if (kg->kg_kses > mp_ncpus) 513#endif 514 return (EPROCLIM); 515 } 516 newke = kse_alloc(); 517 } else { 518 newke = NULL; 519 } 520 newkg = NULL; 521 } 522 if (newke) { 523 bzero(&newke->ke_startzero, RANGEOF(struct kse, 524 ke_startzero, ke_endzero)); 525#if 0 526 bcopy(&ke->ke_startcopy, &newke->ke_startcopy, 527 RANGEOF(struct kse, ke_startcopy, ke_endcopy)); 528#endif 529 /* For the first call this may not have been set */ 530 if (td->td_standin == NULL) { 531 td->td_standin = thread_alloc(); 532 } 533 mtx_lock_spin(&sched_lock); 534 if (newkg) { 535 if (p->p_numksegrps >= max_groups_per_proc) { 536 mtx_unlock_spin(&sched_lock); 537 ksegrp_free(newkg); 538 kse_free(newke); 539 return (EPROCLIM); 540 } 541 ksegrp_link(newkg, p); 542 } 543 else 544 newkg = kg; 545 kse_link(newke, newkg); 546 if (p->p_sflag & PS_NEEDSIGCHK) 547 newke->ke_flags |= KEF_ASTPENDING; 548 newke->ke_mailbox = uap->mbx; 549 newke->ke_upcall = mbx.km_func; 550 bcopy(&mbx.km_stack, &newke->ke_stack, sizeof(stack_t)); 551 thread_schedule_upcall(td, newke); 552 mtx_unlock_spin(&sched_lock); 553 } else { 554 /* 555 * If we didn't allocate a new KSE then the we are using 556 * the exisiting (BOUND) kse. 557 */ 558 ke = td->td_kse; 559 ke->ke_mailbox = uap->mbx; 560 ke->ke_upcall = mbx.km_func; 561 bcopy(&mbx.km_stack, &ke->ke_stack, sizeof(stack_t)); 562 } 563 /* 564 * Fill out the KSE-mode specific fields of the new kse. 565 */ 566 return (0); 567} 568 569/* 570 * Fill a ucontext_t with a thread's context information. 571 * 572 * This is an analogue to getcontext(3). 573 */ 574void 575thread_getcontext(struct thread *td, ucontext_t *uc) 576{ 577 578/* 579 * XXX this is declared in a MD include file, i386/include/ucontext.h but 580 * is used in MI code. 581 */ 582#ifdef __i386__ 583 get_mcontext(td, &uc->uc_mcontext); 584#endif 585 uc->uc_sigmask = td->td_proc->p_sigmask; 586} 587 588/* 589 * Set a thread's context from a ucontext_t. 590 * 591 * This is an analogue to setcontext(3). 592 */ 593int 594thread_setcontext(struct thread *td, ucontext_t *uc) 595{ 596 int ret; 597 598/* 599 * XXX this is declared in a MD include file, i386/include/ucontext.h but 600 * is used in MI code. 601 */ 602#ifdef __i386__ 603 ret = set_mcontext(td, &uc->uc_mcontext); 604#else 605 ret = ENOSYS; 606#endif 607 if (ret == 0) { 608 SIG_CANTMASK(uc->uc_sigmask); 609 PROC_LOCK(td->td_proc); 610 td->td_proc->p_sigmask = uc->uc_sigmask; 611 PROC_UNLOCK(td->td_proc); 612 } 613 return (ret); 614} 615 616/* 617 * Initialize global thread allocation resources. 618 */ 619void 620threadinit(void) 621{ 622 623#ifndef __ia64__ 624 thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(), 625 thread_ctor, thread_dtor, thread_init, thread_fini, 626 UMA_ALIGN_CACHE, 0); 627#else 628 /* 629 * XXX the ia64 kstack allocator is really lame and is at the mercy 630 * of contigmallloc(). This hackery is to pre-construct a whole 631 * pile of thread structures with associated kernel stacks early 632 * in the system startup while contigmalloc() still works. Once we 633 * have them, keep them. Sigh. 634 */ 635 thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(), 636 thread_ctor, thread_dtor, thread_init, thread_fini, 637 UMA_ALIGN_CACHE, UMA_ZONE_NOFREE); 638 uma_prealloc(thread_zone, 512); /* XXX arbitary */ 639#endif 640 ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(), 641 NULL, NULL, ksegrp_init, NULL, 642 UMA_ALIGN_CACHE, 0); 643 kse_zone = uma_zcreate("KSE", sched_sizeof_kse(), 644 NULL, NULL, kse_init, NULL, 645 UMA_ALIGN_CACHE, 0); 646} 647 648/* 649 * Stash an embarasingly extra thread into the zombie thread queue. 650 */ 651void 652thread_stash(struct thread *td) 653{ 654 mtx_lock_spin(&zombie_thread_lock); 655 TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq); 656 mtx_unlock_spin(&zombie_thread_lock); 657} 658 659/* 660 * Stash an embarasingly extra kse into the zombie kse queue. 661 */ 662void 663kse_stash(struct kse *ke) 664{ 665 mtx_lock_spin(&zombie_thread_lock); 666 TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq); 667 mtx_unlock_spin(&zombie_thread_lock); 668} 669 670/* 671 * Stash an embarasingly extra ksegrp into the zombie ksegrp queue. 672 */ 673void 674ksegrp_stash(struct ksegrp *kg) 675{ 676 mtx_lock_spin(&zombie_thread_lock); 677 TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp); 678 mtx_unlock_spin(&zombie_thread_lock); 679} 680 681/* 682 * Reap zombie threads. 683 */ 684void 685thread_reap(void) 686{ 687 struct thread *td_first, *td_next; 688 struct kse *ke_first, *ke_next; 689 struct ksegrp *kg_first, * kg_next; 690 691 /* 692 * don't even bother to lock if none at this instant 693 * We really don't care about the next instant.. 694 */ 695 if ((!TAILQ_EMPTY(&zombie_threads)) 696 || (!TAILQ_EMPTY(&zombie_kses)) 697 || (!TAILQ_EMPTY(&zombie_ksegrps))) { 698 mtx_lock_spin(&zombie_thread_lock); 699 td_first = TAILQ_FIRST(&zombie_threads); 700 ke_first = TAILQ_FIRST(&zombie_kses); 701 kg_first = TAILQ_FIRST(&zombie_ksegrps); 702 if (td_first) 703 TAILQ_INIT(&zombie_threads); 704 if (ke_first) 705 TAILQ_INIT(&zombie_kses); 706 if (kg_first) 707 TAILQ_INIT(&zombie_ksegrps); 708 mtx_unlock_spin(&zombie_thread_lock); 709 while (td_first) { 710 td_next = TAILQ_NEXT(td_first, td_runq); 711 thread_free(td_first); 712 td_first = td_next; 713 } 714 while (ke_first) { 715 ke_next = TAILQ_NEXT(ke_first, ke_procq); 716 kse_free(ke_first); 717 ke_first = ke_next; 718 } 719 while (kg_first) { 720 kg_next = TAILQ_NEXT(kg_first, kg_ksegrp); 721 ksegrp_free(kg_first); 722 kg_first = kg_next; 723 } 724 } 725} 726 727/* 728 * Allocate a ksegrp. 729 */ 730struct ksegrp * 731ksegrp_alloc(void) 732{ 733 return (uma_zalloc(ksegrp_zone, M_WAITOK)); 734} 735 736/* 737 * Allocate a kse. 738 */ 739struct kse * 740kse_alloc(void) 741{ 742 return (uma_zalloc(kse_zone, M_WAITOK)); 743} 744 745/* 746 * Allocate a thread. 747 */ 748struct thread * 749thread_alloc(void) 750{ 751 thread_reap(); /* check if any zombies to get */ 752 return (uma_zalloc(thread_zone, M_WAITOK)); 753} 754 755/* 756 * Deallocate a ksegrp. 757 */ 758void 759ksegrp_free(struct ksegrp *td) 760{ 761 uma_zfree(ksegrp_zone, td); 762} 763 764/* 765 * Deallocate a kse. 766 */ 767void 768kse_free(struct kse *td) 769{ 770 uma_zfree(kse_zone, td); 771} 772 773/* 774 * Deallocate a thread. 775 */ 776void 777thread_free(struct thread *td) 778{ 779 780 cpu_thread_clean(td); 781 uma_zfree(thread_zone, td); 782} 783 784/* 785 * Store the thread context in the UTS's mailbox. 786 * then add the mailbox at the head of a list we are building in user space. 787 * The list is anchored in the ksegrp structure. 788 */ 789int 790thread_export_context(struct thread *td) 791{ 792 struct proc *p; 793 struct ksegrp *kg; 794 uintptr_t mbx; 795 void *addr; 796 int error; 797 ucontext_t uc; 798 uint temp; 799 800 p = td->td_proc; 801 kg = td->td_ksegrp; 802 803 /* Export the user/machine context. */ 804#if 0 805 addr = (caddr_t)td->td_mailbox + 806 offsetof(struct kse_thr_mailbox, tm_context); 807#else /* if user pointer arithmetic is valid in the kernel */ 808 addr = (void *)(&td->td_mailbox->tm_context); 809#endif 810 error = copyin(addr, &uc, sizeof(ucontext_t)); 811 if (error) 812 goto bad; 813 814 thread_getcontext(td, &uc); 815 error = copyout(&uc, addr, sizeof(ucontext_t)); 816 if (error) 817 goto bad; 818 819 /* get address in latest mbox of list pointer */ 820#if 0 821 addr = (caddr_t)td->td_mailbox 822 + offsetof(struct kse_thr_mailbox , tm_next); 823#else /* if user pointer arithmetic is valid in the kernel */ 824 addr = (void *)(&td->td_mailbox->tm_next); 825#endif 826 /* 827 * Put the saved address of the previous first 828 * entry into this one 829 */ 830 for (;;) { 831 mbx = (uintptr_t)kg->kg_completed; 832 if (suword(addr, mbx)) { 833 error = EFAULT; 834 goto bad; 835 } 836 PROC_LOCK(p); 837 if (mbx == (uintptr_t)kg->kg_completed) { 838 kg->kg_completed = td->td_mailbox; 839 PROC_UNLOCK(p); 840 break; 841 } 842 PROC_UNLOCK(p); 843 } 844 addr = (caddr_t)td->td_mailbox 845 + offsetof(struct kse_thr_mailbox, tm_sticks); 846 temp = fuword(addr) + td->td_usticks; 847 if (suword(addr, temp)) 848 goto bad; 849 return (0); 850 851bad: 852 PROC_LOCK(p); 853 psignal(p, SIGSEGV); 854 PROC_UNLOCK(p); 855 return (error); 856} 857 858/* 859 * Take the list of completed mailboxes for this KSEGRP and put them on this 860 * KSE's mailbox as it's the next one going up. 861 */ 862static int 863thread_link_mboxes(struct ksegrp *kg, struct kse *ke) 864{ 865 struct proc *p = kg->kg_proc; 866 void *addr; 867 uintptr_t mbx; 868 869#if 0 870 addr = (caddr_t)ke->ke_mailbox 871 + offsetof(struct kse_mailbox, km_completed); 872#else /* if user pointer arithmetic is valid in the kernel */ 873 addr = (void *)(&ke->ke_mailbox->km_completed); 874#endif 875 for (;;) { 876 mbx = (uintptr_t)kg->kg_completed; 877 if (suword(addr, mbx)) { 878 PROC_LOCK(p); 879 psignal(p, SIGSEGV); 880 PROC_UNLOCK(p); 881 return (EFAULT); 882 } 883 /* XXXKSE could use atomic CMPXCH here */ 884 PROC_LOCK(p); 885 if (mbx == (uintptr_t)kg->kg_completed) { 886 kg->kg_completed = NULL; 887 PROC_UNLOCK(p); 888 break; 889 } 890 PROC_UNLOCK(p); 891 } 892 return (0); 893} 894 895/* 896 * This function should be called at statclock interrupt time 897 */ 898int 899thread_add_ticks_intr(int user, uint ticks) 900{ 901 struct thread *td = curthread; 902 struct kse *ke = td->td_kse; 903 904 if (ke->ke_mailbox == NULL) 905 return -1; 906 if (user) { 907 /* Current always do via ast() */ 908 ke->ke_flags |= KEF_ASTPENDING; 909 ke->ke_uuticks += ticks; 910 } else { 911 if (td->td_mailbox != NULL) 912 td->td_usticks += ticks; 913 else 914 ke->ke_usticks += ticks; 915 } 916 return 0; 917} 918 919static int 920thread_update_uticks(void) 921{ 922 struct thread *td = curthread; 923 struct proc *p = td->td_proc; 924 struct kse *ke = td->td_kse; 925 struct kse_thr_mailbox *tmbx; 926 caddr_t addr; 927 uint uticks, sticks; 928 929 if (ke->ke_mailbox == NULL) 930 return 0; 931 932 uticks = ke->ke_uuticks; 933 ke->ke_uuticks = 0; 934 sticks = ke->ke_usticks; 935 ke->ke_usticks = 0; 936#if 0 937 tmbx = (void *)fuword((caddr_t)ke->ke_mailbox 938 + offsetof(struct kse_mailbox, km_curthread)); 939#else /* if user pointer arithmetic is ok in the kernel */ 940 tmbx = (void *)fuword( (void *)&ke->ke_mailbox->km_curthread); 941#endif 942 if ((tmbx == NULL) || (tmbx == (void *)-1)) 943 return 0; 944 if (uticks) { 945 addr = (caddr_t)tmbx + offsetof(struct kse_thr_mailbox, tm_uticks); 946 uticks += fuword(addr); 947 if (suword(addr, uticks)) 948 goto bad; 949 } 950 if (sticks) { 951 addr = (caddr_t)tmbx + offsetof(struct kse_thr_mailbox, tm_sticks); 952 sticks += fuword(addr); 953 if (suword(addr, sticks)) 954 goto bad; 955 } 956 return 0; 957bad: 958 PROC_LOCK(p); 959 psignal(p, SIGSEGV); 960 PROC_UNLOCK(p); 961 return -1; 962} 963 964/* 965 * Discard the current thread and exit from its context. 966 * 967 * Because we can't free a thread while we're operating under its context, 968 * push the current thread into our CPU's deadthread holder. This means 969 * we needn't worry about someone else grabbing our context before we 970 * do a cpu_throw(). 971 */ 972void 973thread_exit(void) 974{ 975 struct thread *td; 976 struct kse *ke; 977 struct proc *p; 978 struct ksegrp *kg; 979 980 td = curthread; 981 kg = td->td_ksegrp; 982 p = td->td_proc; 983 ke = td->td_kse; 984 985 mtx_assert(&sched_lock, MA_OWNED); 986 KASSERT(p != NULL, ("thread exiting without a process")); 987 KASSERT(ke != NULL, ("thread exiting without a kse")); 988 KASSERT(kg != NULL, ("thread exiting without a kse group")); 989 PROC_LOCK_ASSERT(p, MA_OWNED); 990 CTR1(KTR_PROC, "thread_exit: thread %p", td); 991 KASSERT(!mtx_owned(&Giant), ("dying thread owns giant")); 992 993 if (td->td_standin != NULL) { 994 thread_stash(td->td_standin); 995 td->td_standin = NULL; 996 } 997 998 cpu_thread_exit(td); /* XXXSMP */ 999 1000 /* 1001 * The last thread is left attached to the process 1002 * So that the whole bundle gets recycled. Skip 1003 * all this stuff. 1004 */ 1005 if (p->p_numthreads > 1) { 1006 /* 1007 * Unlink this thread from its proc and the kseg. 1008 * In keeping with the other structs we probably should 1009 * have a thread_unlink() that does some of this but it 1010 * would only be called from here (I think) so it would 1011 * be a waste. (might be useful for proc_fini() as well.) 1012 */ 1013 TAILQ_REMOVE(&p->p_threads, td, td_plist); 1014 p->p_numthreads--; 1015 TAILQ_REMOVE(&kg->kg_threads, td, td_kglist); 1016 kg->kg_numthreads--; 1017 /* 1018 * The test below is NOT true if we are the 1019 * sole exiting thread. P_STOPPED_SNGL is unset 1020 * in exit1() after it is the only survivor. 1021 */ 1022 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 1023 if (p->p_numthreads == p->p_suspcount) { 1024 thread_unsuspend_one(p->p_singlethread); 1025 } 1026 } 1027 1028 /* Reassign this thread's KSE. */ 1029 ke->ke_state = KES_UNQUEUED; 1030 1031 /* 1032 * Decide what to do with the KSE attached to this thread. 1033 * XXX Possibly kse_reassign should do both cases as it already 1034 * does some of this. 1035 */ 1036 if (ke->ke_flags & KEF_EXIT) { 1037 KASSERT((ke->ke_owner == td), 1038 ("thread_exit: KSE exiting with non-owner thread")); 1039 ke->ke_thread = NULL; 1040 td->td_kse = NULL; 1041 kse_unlink(ke); 1042 } else { 1043 TD_SET_EXITING(td); /* definitly not runnable */ 1044 kse_reassign(ke); 1045 } 1046 PROC_UNLOCK(p); 1047 td->td_state = TDS_INACTIVE; 1048 td->td_proc = NULL; 1049 td->td_ksegrp = NULL; 1050 td->td_last_kse = NULL; 1051 PCPU_SET(deadthread, td); 1052 } else { 1053 PROC_UNLOCK(p); 1054 } 1055 cpu_throw(); 1056 /* NOTREACHED */ 1057} 1058 1059/* 1060 * Do any thread specific cleanups that may be needed in wait() 1061 * called with Giant held, proc and schedlock not held. 1062 */ 1063void 1064thread_wait(struct proc *p) 1065{ 1066 struct thread *td; 1067 1068 KASSERT((p->p_numthreads == 1), ("Muliple threads in wait1()")); 1069 KASSERT((p->p_numksegrps == 1), ("Muliple ksegrps in wait1()")); 1070 FOREACH_THREAD_IN_PROC(p, td) { 1071 if (td->td_standin != NULL) { 1072 thread_free(td->td_standin); 1073 td->td_standin = NULL; 1074 } 1075 cpu_thread_clean(td); 1076 } 1077 thread_reap(); /* check for zombie threads etc. */ 1078} 1079 1080/* 1081 * Link a thread to a process. 1082 * set up anything that needs to be initialized for it to 1083 * be used by the process. 1084 * 1085 * Note that we do not link to the proc's ucred here. 1086 * The thread is linked as if running but no KSE assigned. 1087 */ 1088void 1089thread_link(struct thread *td, struct ksegrp *kg) 1090{ 1091 struct proc *p; 1092 1093 p = kg->kg_proc; 1094 td->td_state = TDS_INACTIVE; 1095 td->td_proc = p; 1096 td->td_ksegrp = kg; 1097 td->td_last_kse = NULL; 1098 1099 LIST_INIT(&td->td_contested); 1100 callout_init(&td->td_slpcallout, 1); 1101 TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist); 1102 TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist); 1103 p->p_numthreads++; 1104 kg->kg_numthreads++; 1105 td->td_kse = NULL; 1106} 1107 1108void 1109kse_purge(struct proc *p, struct thread *td) 1110{ 1111 /* XXXKSE think about this.. 1112 may need to wake up threads on loan queue. */ 1113 struct ksegrp *kg; 1114 1115 KASSERT(p->p_numthreads == 1, ("bad thread number")); 1116 mtx_lock_spin(&sched_lock); 1117 while ((kg = TAILQ_FIRST(&p->p_ksegrps)) != NULL) { 1118 TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp); 1119 p->p_numksegrps--; 1120 KASSERT(((kg->kg_kses == 0) && (kg != td->td_ksegrp)) || 1121 ((kg->kg_kses == 1) && (kg == td->td_ksegrp)), 1122 ("wrong kg_kses")); 1123 if (kg != td->td_ksegrp) { 1124 ksegrp_stash(kg); 1125 } 1126 } 1127 TAILQ_INSERT_HEAD(&p->p_ksegrps, td->td_ksegrp, kg_ksegrp); 1128 p->p_numksegrps++; 1129 mtx_unlock_spin(&sched_lock); 1130} 1131 1132 1133/* 1134 * Create a thread and schedule it for upcall on the KSE given. 1135 * Use our thread's standin so that we don't have to allocate one. 1136 */ 1137struct thread * 1138thread_schedule_upcall(struct thread *td, struct kse *ke) 1139{ 1140 struct thread *td2; 1141 int newkse; 1142 1143 mtx_assert(&sched_lock, MA_OWNED); 1144 newkse = (ke != td->td_kse); 1145 1146 /* 1147 * If the owner and kse are BOUND then that thread is planning to 1148 * go to userland and upcalls are not expected. So don't make one. 1149 * If it is not bound then make it so with the spare thread 1150 * anf then borrw back the KSE to allow us to complete some in-kernel 1151 * work. When we complete, the Bound thread will have the chance to 1152 * complete. This thread will sleep as planned. Hopefully there will 1153 * eventually be un unbound thread that can be converted to an 1154 * upcall to report the completion of this thread. 1155 */ 1156 1157 if ((td2 = td->td_standin) != NULL) { 1158 td->td_standin = NULL; 1159 } else { 1160 if (newkse) 1161 panic("no reserve thread when called with a new kse"); 1162 /* 1163 * If called from (e.g.) sleep and we do not have 1164 * a reserve thread, then we've used it, so do not 1165 * create an upcall. 1166 */ 1167 return (NULL); 1168 } 1169 CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)", 1170 td2, td->td_proc->p_pid, td->td_proc->p_comm); 1171 bzero(&td2->td_startzero, 1172 (unsigned)RANGEOF(struct thread, td_startzero, td_endzero)); 1173 bcopy(&td->td_startcopy, &td2->td_startcopy, 1174 (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy)); 1175 thread_link(td2, ke->ke_ksegrp); 1176 cpu_set_upcall(td2, td->td_pcb); 1177 1178 /* 1179 * XXXKSE do we really need this? (default values for the 1180 * frame). 1181 */ 1182 bcopy(td->td_frame, td2->td_frame, sizeof(struct trapframe)); 1183 1184 /* 1185 * Bind the new thread to the KSE, 1186 * and if it's our KSE, lend it back to ourself 1187 * so we can continue running. 1188 */ 1189 td2->td_ucred = crhold(td->td_ucred); 1190 td2->td_flags = TDF_UPCALLING; /* note: BOUND */ 1191 td2->td_kse = ke; 1192 td2->td_state = TDS_CAN_RUN; 1193 td2->td_inhibitors = 0; 1194 ke->ke_owner = td2; 1195 /* 1196 * If called from kse_reassign(), we are working on the current 1197 * KSE so fake that we borrowed it. If called from 1198 * kse_create(), don't, as we have a new kse too. 1199 */ 1200 if (!newkse) { 1201 /* 1202 * This thread will be scheduled when the current thread 1203 * blocks, exits or tries to enter userspace, (which ever 1204 * happens first). When that happens the KSe will "revert" 1205 * to this thread in a BOUND manner. Since we are called 1206 * from msleep() this is going to be "very soon" in nearly 1207 * all cases. 1208 */ 1209 TD_SET_LOAN(td2); 1210 } else { 1211 ke->ke_thread = td2; 1212 ke->ke_state = KES_THREAD; 1213 setrunqueue(td2); 1214 } 1215 return (td2); /* bogus.. should be a void function */ 1216} 1217 1218/* 1219 * Schedule an upcall to notify a KSE process recieved signals. 1220 * 1221 * XXX - Modifying a sigset_t like this is totally bogus. 1222 */ 1223struct thread * 1224signal_upcall(struct proc *p, int sig) 1225{ 1226 struct thread *td, *td2; 1227 struct kse *ke; 1228 sigset_t ss; 1229 int error; 1230 1231 PROC_LOCK_ASSERT(p, MA_OWNED); 1232return (NULL); 1233 1234 td = FIRST_THREAD_IN_PROC(p); 1235 ke = td->td_kse; 1236 PROC_UNLOCK(p); 1237 error = copyin(&ke->ke_mailbox->km_sigscaught, &ss, sizeof(sigset_t)); 1238 PROC_LOCK(p); 1239 if (error) 1240 return (NULL); 1241 SIGADDSET(ss, sig); 1242 PROC_UNLOCK(p); 1243 error = copyout(&ss, &ke->ke_mailbox->km_sigscaught, sizeof(sigset_t)); 1244 PROC_LOCK(p); 1245 if (error) 1246 return (NULL); 1247 if (td->td_standin == NULL) 1248 td->td_standin = thread_alloc(); 1249 mtx_lock_spin(&sched_lock); 1250 td2 = thread_schedule_upcall(td, ke); /* Bogus JRE */ 1251 mtx_unlock_spin(&sched_lock); 1252 return (td2); 1253} 1254 1255/* 1256 * setup done on the thread when it enters the kernel. 1257 * XXXKSE Presently only for syscalls but eventually all kernel entries. 1258 */ 1259void 1260thread_user_enter(struct proc *p, struct thread *td) 1261{ 1262 struct kse *ke; 1263 1264 /* 1265 * First check that we shouldn't just abort. 1266 * But check if we are the single thread first! 1267 * XXX p_singlethread not locked, but should be safe. 1268 */ 1269 if ((p->p_flag & P_WEXIT) && (p->p_singlethread != td)) { 1270 PROC_LOCK(p); 1271 mtx_lock_spin(&sched_lock); 1272 thread_exit(); 1273 /* NOTREACHED */ 1274 } 1275 1276 /* 1277 * If we are doing a syscall in a KSE environment, 1278 * note where our mailbox is. There is always the 1279 * possibility that we could do this lazily (in kse_reassign()), 1280 * but for now do it every time. 1281 */ 1282 ke = td->td_kse; 1283 td->td_flags &= ~TDF_UNBOUND; 1284 if (ke->ke_mailbox != NULL) { 1285#if 0 1286 td->td_mailbox = (void *)fuword((caddr_t)ke->ke_mailbox 1287 + offsetof(struct kse_mailbox, km_curthread)); 1288#else /* if user pointer arithmetic is ok in the kernel */ 1289 td->td_mailbox = 1290 (void *)fuword( (void *)&ke->ke_mailbox->km_curthread); 1291#endif 1292 if ((td->td_mailbox == NULL) || 1293 (td->td_mailbox == (void *)-1)) { 1294 td->td_mailbox = NULL; /* single thread it.. */ 1295 mtx_lock_spin(&sched_lock); 1296 td->td_flags &= ~(TDF_UNBOUND|TDF_CAN_UNBIND); 1297 mtx_unlock_spin(&sched_lock); 1298 } else { 1299 /* 1300 * when thread limit reached, act like that the thread 1301 * has already done an upcall. 1302 */ 1303 if (p->p_numthreads > max_threads_per_proc) { 1304 if (td->td_standin != NULL) { 1305 thread_stash(td->td_standin); 1306 td->td_standin = NULL; 1307 } 1308 } else { 1309 if (td->td_standin == NULL) 1310 td->td_standin = thread_alloc(); 1311 } 1312 mtx_lock_spin(&sched_lock); 1313 td->td_flags |= TDF_CAN_UNBIND; 1314 mtx_unlock_spin(&sched_lock); 1315 KASSERT((ke->ke_owner == td), 1316 ("thread_user_enter: No starting owner ")); 1317 ke->ke_owner = td; 1318 td->td_usticks = 0; 1319 } 1320 } 1321} 1322 1323/* 1324 * The extra work we go through if we are a threaded process when we 1325 * return to userland. 1326 * 1327 * If we are a KSE process and returning to user mode, check for 1328 * extra work to do before we return (e.g. for more syscalls 1329 * to complete first). If we were in a critical section, we should 1330 * just return to let it finish. Same if we were in the UTS (in 1331 * which case the mailbox's context's busy indicator will be set). 1332 * The only traps we suport will have set the mailbox. 1333 * We will clear it here. 1334 */ 1335int 1336thread_userret(struct thread *td, struct trapframe *frame) 1337{ 1338 int error; 1339 int unbound; 1340 struct kse *ke; 1341 struct ksegrp *kg; 1342 struct thread *worktodo; 1343 struct proc *p; 1344 struct timespec ts; 1345 1346 KASSERT((td->td_kse && td->td_kse->ke_thread && td->td_kse->ke_owner), 1347 ("thread_userret: bad thread/kse pointers")); 1348 KASSERT((td == curthread), 1349 ("thread_userret: bad thread argument")); 1350 1351 1352 kg = td->td_ksegrp; 1353 p = td->td_proc; 1354 error = 0; 1355 unbound = TD_IS_UNBOUND(td); 1356 1357 mtx_lock_spin(&sched_lock); 1358 if ((worktodo = kg->kg_last_assigned)) 1359 worktodo = TAILQ_NEXT(worktodo, td_runq); 1360 else 1361 worktodo = TAILQ_FIRST(&kg->kg_runq); 1362 1363 /* 1364 * Permanently bound threads never upcall but they may 1365 * loan out their KSE at this point. 1366 * Upcalls imply bound.. They also may want to do some Philantropy. 1367 * Temporarily bound threads on the other hand either yield 1368 * to other work and transform into an upcall, or proceed back to 1369 * userland. 1370 */ 1371 1372 if (TD_CAN_UNBIND(td)) { 1373 td->td_flags &= ~(TDF_UNBOUND|TDF_CAN_UNBIND); 1374 if (!worktodo && (kg->kg_completed == NULL) && 1375 !(td->td_kse->ke_flags & KEF_DOUPCALL)) { 1376 /* 1377 * This thread has not started any upcall. 1378 * If there is no work to report other than 1379 * ourself, then it can return direct to userland. 1380 */ 1381justreturn: 1382 mtx_unlock_spin(&sched_lock); 1383 thread_update_uticks(); 1384 td->td_mailbox = NULL; 1385 return (0); 1386 } 1387 mtx_unlock_spin(&sched_lock); 1388 error = thread_export_context(td); 1389 td->td_usticks = 0; 1390 if (error) { 1391 /* 1392 * As we are not running on a borrowed KSE, 1393 * failing to do the KSE operation just defaults 1394 * back to synchonous operation, so just return from 1395 * the syscall. 1396 */ 1397 goto justreturn; 1398 } 1399 mtx_lock_spin(&sched_lock); 1400 /* 1401 * Turn ourself into a bound upcall. 1402 * We will rely on kse_reassign() 1403 * to make us run at a later time. 1404 */ 1405 td->td_flags |= TDF_UPCALLING; 1406 1407 /* there may be more work since we re-locked schedlock */ 1408 if ((worktodo = kg->kg_last_assigned)) 1409 worktodo = TAILQ_NEXT(worktodo, td_runq); 1410 else 1411 worktodo = TAILQ_FIRST(&kg->kg_runq); 1412 } else if (unbound) { 1413 /* 1414 * We are an unbound thread, looking to 1415 * return to user space. There must be another owner 1416 * of this KSE. 1417 * We are using a borrowed KSE. save state and exit. 1418 * kse_reassign() will recycle the kse as needed, 1419 */ 1420 mtx_unlock_spin(&sched_lock); 1421 error = thread_export_context(td); 1422 td->td_usticks = 0; 1423 if (error) { 1424 /* 1425 * There is nothing we can do. 1426 * We just lose that context. We 1427 * probably should note this somewhere and send 1428 * the process a signal. 1429 */ 1430 PROC_LOCK(td->td_proc); 1431 psignal(td->td_proc, SIGSEGV); 1432 mtx_lock_spin(&sched_lock); 1433 ke = td->td_kse; 1434 /* possibly upcall with error? */ 1435 } else { 1436 /* 1437 * Don't make an upcall, just exit so that the owner 1438 * can get its KSE if it wants it. 1439 * Our context is already safely stored for later 1440 * use by the UTS. 1441 */ 1442 PROC_LOCK(p); 1443 mtx_lock_spin(&sched_lock); 1444 ke = td->td_kse; 1445 } 1446 /* 1447 * If the owner is idling, we now have something for it 1448 * to report, so make it runnable. 1449 * If the owner is not an upcall, make an attempt to 1450 * ensure that at least one of any IDLED upcalls can 1451 * wake up. 1452 */ 1453 if (ke->ke_owner->td_flags & TDF_UPCALLING) { 1454 TD_CLR_IDLE(ke->ke_owner); 1455 } else { 1456 FOREACH_KSE_IN_GROUP(kg, ke) { 1457 if (TD_IS_IDLE(ke->ke_owner)) { 1458 TD_CLR_IDLE(ke->ke_owner); 1459 } 1460 } 1461 } 1462 thread_exit(); 1463 } 1464 /* 1465 * We ARE going back to userland with this KSE. 1466 * We are permanently bound. We may be an upcall. 1467 * If an upcall, check for threads that need to borrow the KSE. 1468 * Any other thread that comes ready after this missed the boat. 1469 */ 1470 ke = td->td_kse; 1471 1472 /* 1473 * If not upcalling, go back to userspace. 1474 * If we are, get the upcall set up. 1475 */ 1476 if (td->td_flags & TDF_UPCALLING) { 1477 if (worktodo) { 1478 /* 1479 * force a switch to more urgent 'in kernel' 1480 * work. Control will return to this thread 1481 * when there is no more work to do. 1482 * kse_reassign() will do that for us. 1483 */ 1484 TD_SET_LOAN(td); 1485 p->p_stats->p_ru.ru_nvcsw++; 1486 mi_switch(); /* kse_reassign() will (re)find worktodo */ 1487 } 1488 td->td_flags &= ~TDF_UPCALLING; 1489 if (ke->ke_flags & KEF_DOUPCALL) 1490 ke->ke_flags &= ~KEF_DOUPCALL; 1491 mtx_unlock_spin(&sched_lock); 1492 1493 /* 1494 * There is no more work to do and we are going to ride 1495 * this thread/KSE up to userland as an upcall. 1496 * Do the last parts of the setup needed for the upcall. 1497 */ 1498 CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)", 1499 td, td->td_proc->p_pid, td->td_proc->p_comm); 1500 1501 /* 1502 * Set user context to the UTS. 1503 * Will use Giant in cpu_thread_clean() because it uses 1504 * kmem_free(kernel_map, ...) 1505 */ 1506 cpu_set_upcall_kse(td, ke); 1507 1508 /* 1509 * Unhook the list of completed threads. 1510 * anything that completes after this gets to 1511 * come in next time. 1512 * Put the list of completed thread mailboxes on 1513 * this KSE's mailbox. 1514 */ 1515 error = thread_link_mboxes(kg, ke); 1516 if (error) 1517 goto bad; 1518 1519 /* 1520 * Set state and clear the thread mailbox pointer. 1521 * From now on we are just a bound outgoing process. 1522 * **Problem** userret is often called several times. 1523 * it would be nice if this all happenned only on the first 1524 * time through. (the scan for extra work etc.) 1525 */ 1526#if 0 1527 error = suword((caddr_t)ke->ke_mailbox + 1528 offsetof(struct kse_mailbox, km_curthread), 0); 1529#else /* if user pointer arithmetic is ok in the kernel */ 1530 error = suword((caddr_t)&ke->ke_mailbox->km_curthread, 0); 1531#endif 1532 ke->ke_uuticks = ke->ke_usticks = 0; 1533 if (error) 1534 goto bad; 1535 nanotime(&ts); 1536 if (copyout(&ts, 1537 (caddr_t)&ke->ke_mailbox->km_timeofday, sizeof(ts))) { 1538 goto bad; 1539 } 1540 } else { 1541 mtx_unlock_spin(&sched_lock); 1542 } 1543 /* 1544 * Optimisation: 1545 * Ensure that we have a spare thread available, 1546 * for when we re-enter the kernel. 1547 */ 1548 if (td->td_standin == NULL) { 1549 td->td_standin = thread_alloc(); 1550 } 1551 1552 thread_update_uticks(); 1553 td->td_mailbox = NULL; 1554 return (0); 1555 1556bad: 1557 /* 1558 * Things are going to be so screwed we should just kill the process. 1559 * how do we do that? 1560 */ 1561 PROC_LOCK(td->td_proc); 1562 psignal(td->td_proc, SIGSEGV); 1563 PROC_UNLOCK(td->td_proc); 1564 td->td_mailbox = NULL; 1565 return (error); /* go sync */ 1566} 1567 1568/* 1569 * Enforce single-threading. 1570 * 1571 * Returns 1 if the caller must abort (another thread is waiting to 1572 * exit the process or similar). Process is locked! 1573 * Returns 0 when you are successfully the only thread running. 1574 * A process has successfully single threaded in the suspend mode when 1575 * There are no threads in user mode. Threads in the kernel must be 1576 * allowed to continue until they get to the user boundary. They may even 1577 * copy out their return values and data before suspending. They may however be 1578 * accellerated in reaching the user boundary as we will wake up 1579 * any sleeping threads that are interruptable. (PCATCH). 1580 */ 1581int 1582thread_single(int force_exit) 1583{ 1584 struct thread *td; 1585 struct thread *td2; 1586 struct proc *p; 1587 1588 td = curthread; 1589 p = td->td_proc; 1590 mtx_assert(&Giant, MA_OWNED); 1591 PROC_LOCK_ASSERT(p, MA_OWNED); 1592 KASSERT((td != NULL), ("curthread is NULL")); 1593 1594 if ((p->p_flag & P_KSES) == 0) 1595 return (0); 1596 1597 /* Is someone already single threading? */ 1598 if (p->p_singlethread) 1599 return (1); 1600 1601 if (force_exit == SINGLE_EXIT) { 1602 p->p_flag |= P_SINGLE_EXIT; 1603 td->td_flags &= ~TDF_UNBOUND; 1604 } else 1605 p->p_flag &= ~P_SINGLE_EXIT; 1606 p->p_flag |= P_STOPPED_SINGLE; 1607 p->p_singlethread = td; 1608 /* XXXKSE Which lock protects the below values? */ 1609 while ((p->p_numthreads - p->p_suspcount) != 1) { 1610 mtx_lock_spin(&sched_lock); 1611 FOREACH_THREAD_IN_PROC(p, td2) { 1612 if (td2 == td) 1613 continue; 1614 if (TD_IS_INHIBITED(td2)) { 1615 if (force_exit == SINGLE_EXIT) { 1616 if (TD_IS_SUSPENDED(td2)) { 1617 thread_unsuspend_one(td2); 1618 } 1619 if (TD_ON_SLEEPQ(td2) && 1620 (td2->td_flags & TDF_SINTR)) { 1621 if (td2->td_flags & TDF_CVWAITQ) 1622 cv_abort(td2); 1623 else 1624 abortsleep(td2); 1625 } 1626 if (TD_IS_IDLE(td2)) { 1627 TD_CLR_IDLE(td2); 1628 } 1629 } else { 1630 if (TD_IS_SUSPENDED(td2)) 1631 continue; 1632 /* maybe other inhibitted states too? */ 1633 if (td2->td_inhibitors & 1634 (TDI_SLEEPING | TDI_SWAPPED | 1635 TDI_LOAN | TDI_IDLE | 1636 TDI_EXITING)) 1637 thread_suspend_one(td2); 1638 } 1639 } 1640 } 1641 /* 1642 * Maybe we suspended some threads.. was it enough? 1643 */ 1644 if ((p->p_numthreads - p->p_suspcount) == 1) { 1645 mtx_unlock_spin(&sched_lock); 1646 break; 1647 } 1648 1649 /* 1650 * Wake us up when everyone else has suspended. 1651 * In the mean time we suspend as well. 1652 */ 1653 thread_suspend_one(td); 1654 mtx_unlock(&Giant); 1655 PROC_UNLOCK(p); 1656 p->p_stats->p_ru.ru_nvcsw++; 1657 mi_switch(); 1658 mtx_unlock_spin(&sched_lock); 1659 mtx_lock(&Giant); 1660 PROC_LOCK(p); 1661 } 1662 if (force_exit == SINGLE_EXIT) 1663 kse_purge(p, td); 1664 return (0); 1665} 1666 1667/* 1668 * Called in from locations that can safely check to see 1669 * whether we have to suspend or at least throttle for a 1670 * single-thread event (e.g. fork). 1671 * 1672 * Such locations include userret(). 1673 * If the "return_instead" argument is non zero, the thread must be able to 1674 * accept 0 (caller may continue), or 1 (caller must abort) as a result. 1675 * 1676 * The 'return_instead' argument tells the function if it may do a 1677 * thread_exit() or suspend, or whether the caller must abort and back 1678 * out instead. 1679 * 1680 * If the thread that set the single_threading request has set the 1681 * P_SINGLE_EXIT bit in the process flags then this call will never return 1682 * if 'return_instead' is false, but will exit. 1683 * 1684 * P_SINGLE_EXIT | return_instead == 0| return_instead != 0 1685 *---------------+--------------------+--------------------- 1686 * 0 | returns 0 | returns 0 or 1 1687 * | when ST ends | immediatly 1688 *---------------+--------------------+--------------------- 1689 * 1 | thread exits | returns 1 1690 * | | immediatly 1691 * 0 = thread_exit() or suspension ok, 1692 * other = return error instead of stopping the thread. 1693 * 1694 * While a full suspension is under effect, even a single threading 1695 * thread would be suspended if it made this call (but it shouldn't). 1696 * This call should only be made from places where 1697 * thread_exit() would be safe as that may be the outcome unless 1698 * return_instead is set. 1699 */ 1700int 1701thread_suspend_check(int return_instead) 1702{ 1703 struct thread *td; 1704 struct proc *p; 1705 struct kse *ke; 1706 struct ksegrp *kg; 1707 1708 td = curthread; 1709 p = td->td_proc; 1710 kg = td->td_ksegrp; 1711 PROC_LOCK_ASSERT(p, MA_OWNED); 1712 while (P_SHOULDSTOP(p)) { 1713 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 1714 KASSERT(p->p_singlethread != NULL, 1715 ("singlethread not set")); 1716 /* 1717 * The only suspension in action is a 1718 * single-threading. Single threader need not stop. 1719 * XXX Should be safe to access unlocked 1720 * as it can only be set to be true by us. 1721 */ 1722 if (p->p_singlethread == td) 1723 return (0); /* Exempt from stopping. */ 1724 } 1725 if (return_instead) 1726 return (1); 1727 1728 /* 1729 * If the process is waiting for us to exit, 1730 * this thread should just suicide. 1731 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE. 1732 */ 1733 if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) { 1734 mtx_lock_spin(&sched_lock); 1735 while (mtx_owned(&Giant)) 1736 mtx_unlock(&Giant); 1737 /* 1738 * All threads should be exiting 1739 * Unless they are the active "singlethread". 1740 * destroy un-needed KSEs as we go.. 1741 * KSEGRPS may implode too as #kses -> 0. 1742 */ 1743 ke = td->td_kse; 1744 if (ke->ke_owner == td && 1745 (kg->kg_kses >= kg->kg_numthreads )) 1746 ke->ke_flags |= KEF_EXIT; 1747 thread_exit(); 1748 } 1749 1750 /* 1751 * When a thread suspends, it just 1752 * moves to the processes's suspend queue 1753 * and stays there. 1754 * 1755 * XXXKSE if TDF_BOUND is true 1756 * it will not release it's KSE which might 1757 * lead to deadlock if there are not enough KSEs 1758 * to complete all waiting threads. 1759 * Maybe be able to 'lend' it out again. 1760 * (lent kse's can not go back to userland?) 1761 * and can only be lent in STOPPED state. 1762 */ 1763 mtx_lock_spin(&sched_lock); 1764 if ((p->p_flag & P_STOPPED_SIG) && 1765 (p->p_suspcount+1 == p->p_numthreads)) { 1766 mtx_unlock_spin(&sched_lock); 1767 PROC_LOCK(p->p_pptr); 1768 if ((p->p_pptr->p_procsig->ps_flag & 1769 PS_NOCLDSTOP) == 0) { 1770 psignal(p->p_pptr, SIGCHLD); 1771 } 1772 PROC_UNLOCK(p->p_pptr); 1773 mtx_lock_spin(&sched_lock); 1774 } 1775 mtx_assert(&Giant, MA_NOTOWNED); 1776 thread_suspend_one(td); 1777 PROC_UNLOCK(p); 1778 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { 1779 if (p->p_numthreads == p->p_suspcount) { 1780 thread_unsuspend_one(p->p_singlethread); 1781 } 1782 } 1783 p->p_stats->p_ru.ru_nivcsw++; 1784 mi_switch(); 1785 mtx_unlock_spin(&sched_lock); 1786 PROC_LOCK(p); 1787 } 1788 return (0); 1789} 1790 1791void 1792thread_suspend_one(struct thread *td) 1793{ 1794 struct proc *p = td->td_proc; 1795 1796 mtx_assert(&sched_lock, MA_OWNED); 1797 p->p_suspcount++; 1798 TD_SET_SUSPENDED(td); 1799 TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq); 1800 /* 1801 * Hack: If we are suspending but are on the sleep queue 1802 * then we are in msleep or the cv equivalent. We 1803 * want to look like we have two Inhibitors. 1804 * May already be set.. doesn't matter. 1805 */ 1806 if (TD_ON_SLEEPQ(td)) 1807 TD_SET_SLEEPING(td); 1808} 1809 1810void 1811thread_unsuspend_one(struct thread *td) 1812{ 1813 struct proc *p = td->td_proc; 1814 1815 mtx_assert(&sched_lock, MA_OWNED); 1816 TAILQ_REMOVE(&p->p_suspended, td, td_runq); 1817 TD_CLR_SUSPENDED(td); 1818 p->p_suspcount--; 1819 setrunnable(td); 1820} 1821 1822/* 1823 * Allow all threads blocked by single threading to continue running. 1824 */ 1825void 1826thread_unsuspend(struct proc *p) 1827{ 1828 struct thread *td; 1829 1830 mtx_assert(&sched_lock, MA_OWNED); 1831 PROC_LOCK_ASSERT(p, MA_OWNED); 1832 if (!P_SHOULDSTOP(p)) { 1833 while (( td = TAILQ_FIRST(&p->p_suspended))) { 1834 thread_unsuspend_one(td); 1835 } 1836 } else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) && 1837 (p->p_numthreads == p->p_suspcount)) { 1838 /* 1839 * Stopping everything also did the job for the single 1840 * threading request. Now we've downgraded to single-threaded, 1841 * let it continue. 1842 */ 1843 thread_unsuspend_one(p->p_singlethread); 1844 } 1845} 1846 1847void 1848thread_single_end(void) 1849{ 1850 struct thread *td; 1851 struct proc *p; 1852 1853 td = curthread; 1854 p = td->td_proc; 1855 PROC_LOCK_ASSERT(p, MA_OWNED); 1856 p->p_flag &= ~P_STOPPED_SINGLE; 1857 p->p_singlethread = NULL; 1858 /* 1859 * If there are other threads they mey now run, 1860 * unless of course there is a blanket 'stop order' 1861 * on the process. The single threader must be allowed 1862 * to continue however as this is a bad place to stop. 1863 */ 1864 if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) { 1865 mtx_lock_spin(&sched_lock); 1866 while (( td = TAILQ_FIRST(&p->p_suspended))) { 1867 thread_unsuspend_one(td); 1868 } 1869 mtx_unlock_spin(&sched_lock); 1870 } 1871} 1872 1873 1874