1/* $NetBSD: sys_lwp.c,v 1.89 2023/10/15 10:29:24 riastradh Exp $ */ 2 3/*- 4 * Copyright (c) 2001, 2006, 2007, 2008, 2019, 2020, 2023 5 * The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Nathan J. Williams, and Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33/* 34 * Lightweight process (LWP) system calls. See kern_lwp.c for a description 35 * of LWPs. 36 */ 37 38#include <sys/cdefs.h> 39__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.89 2023/10/15 10:29:24 riastradh Exp $"); 40 41#include <sys/param.h> 42 43#include <sys/cpu.h> 44#include <sys/kauth.h> 45#include <sys/kmem.h> 46#include <sys/lwpctl.h> 47#include <sys/pool.h> 48#include <sys/proc.h> 49#include <sys/pserialize.h> 50#include <sys/ptrace.h> 51#include <sys/sleepq.h> 52#include <sys/syncobj.h> 53#include <sys/syscallargs.h> 54#include <sys/systm.h> 55#include <sys/types.h> 56 57#include <uvm/uvm_extern.h> 58 59#define LWP_UNPARK_MAX 1024 60 61static const stack_t lwp_ss_init = SS_INIT; 62 63/* 64 * Parked LWPs get no priority boost on awakening as they blocked on 65 * user space objects. Maybe revisit? 66 */ 67syncobj_t lwp_park_syncobj = { 68 .sobj_name = "lwp_park", 69 .sobj_flag = SOBJ_SLEEPQ_NULL, 70 .sobj_boostpri = PRI_USER, 71 .sobj_unsleep = sleepq_unsleep, 72 .sobj_changepri = sleepq_changepri, 73 .sobj_lendpri = sleepq_lendpri, 74 .sobj_owner = syncobj_noowner, 75}; 76 77static void 78mi_startlwp(void *arg) 79{ 80 struct lwp *l = curlwp; 81 struct proc *p = l->l_proc; 82 83 (p->p_emul->e_startlwp)(arg); 84 85 /* If the process is traced, report lwp creation to a debugger */ 86 if ((p->p_slflag & (PSL_TRACED|PSL_TRACELWP_CREATE)) == 87 (PSL_TRACED|PSL_TRACELWP_CREATE)) { 88 /* Paranoid check */ 89 mutex_enter(&proc_lock); 90 if ((p->p_slflag & (PSL_TRACED|PSL_TRACELWP_CREATE)) != 91 (PSL_TRACED|PSL_TRACELWP_CREATE)) { 92 mutex_exit(&proc_lock); 93 return; 94 } 95 96 mutex_enter(p->p_lock); 97 eventswitch(TRAP_LWP, PTRACE_LWP_CREATE, l->l_lid); 98 } 99} 100 101int 102do_lwp_create(lwp_t *l, void *arg, u_long flags, lwp_t **l2, 103 const sigset_t *sigmask, const stack_t *sigstk) 104{ 105 struct proc *p = l->l_proc; 106 vaddr_t uaddr; 107 int error; 108 109 /* XXX check against resource limits */ 110 111 uaddr = uvm_uarea_alloc(); 112 if (__predict_false(uaddr == 0)) 113 return ENOMEM; 114 115 error = lwp_create(l, p, uaddr, flags & LWP_DETACHED, NULL, 0, 116 mi_startlwp, arg, l2, l->l_class, sigmask, &lwp_ss_init); 117 if (__predict_false(error)) { 118 uvm_uarea_free(uaddr); 119 return error; 120 } 121 122 return 0; 123} 124 125int 126sys__lwp_create(struct lwp *l, const struct sys__lwp_create_args *uap, 127 register_t *retval) 128{ 129 /* { 130 syscallarg(const ucontext_t *) ucp; 131 syscallarg(u_long) flags; 132 syscallarg(lwpid_t *) new_lwp; 133 } */ 134 struct proc *p = l->l_proc; 135 ucontext_t *newuc; 136 lwp_t *l2; 137 int error; 138 139 newuc = kmem_alloc(sizeof(ucontext_t), KM_SLEEP); 140 error = copyin(SCARG(uap, ucp), newuc, p->p_emul->e_ucsize); 141 if (error) 142 goto fail; 143 144 /* validate the ucontext */ 145 if ((newuc->uc_flags & _UC_CPU) == 0) { 146 error = EINVAL; 147 goto fail; 148 } 149 error = cpu_mcontext_validate(l, &newuc->uc_mcontext); 150 if (error) 151 goto fail; 152 153 const sigset_t *sigmask = newuc->uc_flags & _UC_SIGMASK ? 154 &newuc->uc_sigmask : &l->l_sigmask; 155 error = do_lwp_create(l, newuc, SCARG(uap, flags), &l2, sigmask, 156 &SS_INIT); 157 if (error) 158 goto fail; 159 160 error = copyout(&l2->l_lid, SCARG(uap, new_lwp), sizeof(l2->l_lid)); 161 if (error == 0) { 162 lwp_start(l2, SCARG(uap, flags)); 163 return 0; 164 } 165 lwp_exit(l2); 166 fail: 167 kmem_free(newuc, sizeof(ucontext_t)); 168 return error; 169} 170 171int 172sys__lwp_exit(struct lwp *l, const void *v, register_t *retval) 173{ 174 175 lwp_exit(l); 176 return 0; 177} 178 179int 180sys__lwp_self(struct lwp *l, const void *v, register_t *retval) 181{ 182 183 *retval = l->l_lid; 184 return 0; 185} 186 187int 188sys__lwp_getprivate(struct lwp *l, const void *v, register_t *retval) 189{ 190 191 *retval = (uintptr_t)l->l_private; 192 return 0; 193} 194 195int 196sys__lwp_setprivate(struct lwp *l, const struct sys__lwp_setprivate_args *uap, 197 register_t *retval) 198{ 199 /* { 200 syscallarg(void *) ptr; 201 } */ 202 203 return lwp_setprivate(l, SCARG(uap, ptr)); 204} 205 206int 207sys__lwp_suspend(struct lwp *l, const struct sys__lwp_suspend_args *uap, 208 register_t *retval) 209{ 210 /* { 211 syscallarg(lwpid_t) target; 212 } */ 213 struct proc *p = l->l_proc; 214 struct lwp *t; 215 int error; 216 217 mutex_enter(p->p_lock); 218 if ((t = lwp_find(p, SCARG(uap, target))) == NULL) { 219 mutex_exit(p->p_lock); 220 return ESRCH; 221 } 222 223 /* 224 * Check for deadlock, which is only possible when we're suspending 225 * ourself. XXX There is a short race here, as p_nrlwps is only 226 * incremented when an LWP suspends itself on the kernel/user 227 * boundary. It's still possible to kill -9 the process so we 228 * don't bother checking further. 229 */ 230 lwp_lock(t); 231 if ((t == l && p->p_nrlwps == 1) || 232 (l->l_flag & (LW_WCORE | LW_WEXIT)) != 0) { 233 lwp_unlock(t); 234 mutex_exit(p->p_lock); 235 return EDEADLK; 236 } 237 238 /* 239 * Suspend the LWP. XXX If it's on a different CPU, we should wait 240 * for it to be preempted, where it will put itself to sleep. 241 * 242 * Suspension of the current LWP will happen on return to userspace. 243 */ 244 error = lwp_suspend(l, t); 245 if (error) { 246 mutex_exit(p->p_lock); 247 return error; 248 } 249 250 /* 251 * Wait for: 252 * o process exiting 253 * o target LWP suspended 254 * o target LWP not suspended and L_WSUSPEND clear 255 * o target LWP exited 256 */ 257 for (;;) { 258 error = cv_wait_sig(&p->p_lwpcv, p->p_lock); 259 if (error) { 260 error = ERESTART; 261 break; 262 } 263 if (lwp_find(p, SCARG(uap, target)) == NULL) { 264 error = ESRCH; 265 break; 266 } 267 if ((l->l_flag | t->l_flag) & (LW_WCORE | LW_WEXIT)) { 268 error = ERESTART; 269 break; 270 } 271 if (t->l_stat == LSSUSPENDED || 272 (t->l_flag & LW_WSUSPEND) == 0) 273 break; 274 } 275 mutex_exit(p->p_lock); 276 277 return error; 278} 279 280int 281sys__lwp_continue(struct lwp *l, const struct sys__lwp_continue_args *uap, 282 register_t *retval) 283{ 284 /* { 285 syscallarg(lwpid_t) target; 286 } */ 287 int error; 288 struct proc *p = l->l_proc; 289 struct lwp *t; 290 291 error = 0; 292 293 mutex_enter(p->p_lock); 294 if ((t = lwp_find(p, SCARG(uap, target))) == NULL) { 295 mutex_exit(p->p_lock); 296 return ESRCH; 297 } 298 299 lwp_lock(t); 300 lwp_continue(t); 301 mutex_exit(p->p_lock); 302 303 return error; 304} 305 306int 307sys__lwp_wakeup(struct lwp *l, const struct sys__lwp_wakeup_args *uap, 308 register_t *retval) 309{ 310 /* { 311 syscallarg(lwpid_t) target; 312 } */ 313 struct lwp *t; 314 struct proc *p; 315 int error; 316 317 p = l->l_proc; 318 mutex_enter(p->p_lock); 319 320 if ((t = lwp_find(p, SCARG(uap, target))) == NULL) { 321 mutex_exit(p->p_lock); 322 return ESRCH; 323 } 324 325 lwp_lock(t); 326 t->l_flag |= (LW_CANCELLED | LW_UNPARKED); 327 328 if (t->l_stat != LSSLEEP) { 329 lwp_unlock(t); 330 error = ENODEV; 331 } else if ((t->l_flag & LW_SINTR) == 0) { 332 lwp_unlock(t); 333 error = EBUSY; 334 } else { 335 /* Wake it up. lwp_unsleep() will release the LWP lock. */ 336 lwp_unsleep(t, true); 337 error = 0; 338 } 339 340 mutex_exit(p->p_lock); 341 342 return error; 343} 344 345int 346sys__lwp_wait(struct lwp *l, const struct sys__lwp_wait_args *uap, 347 register_t *retval) 348{ 349 /* { 350 syscallarg(lwpid_t) wait_for; 351 syscallarg(lwpid_t *) departed; 352 } */ 353 struct proc *p = l->l_proc; 354 int error; 355 lwpid_t dep; 356 357 mutex_enter(p->p_lock); 358 error = lwp_wait(l, SCARG(uap, wait_for), &dep, false); 359 mutex_exit(p->p_lock); 360 361 if (!error && SCARG(uap, departed)) { 362 error = copyout(&dep, SCARG(uap, departed), sizeof(dep)); 363 } 364 365 return error; 366} 367 368int 369sys__lwp_kill(struct lwp *l, const struct sys__lwp_kill_args *uap, 370 register_t *retval) 371{ 372 /* { 373 syscallarg(lwpid_t) target; 374 syscallarg(int) signo; 375 } */ 376 struct proc *p = l->l_proc; 377 struct lwp *t; 378 ksiginfo_t ksi; 379 int signo = SCARG(uap, signo); 380 int error = 0; 381 382 if ((u_int)signo >= NSIG) 383 return EINVAL; 384 385 KSI_INIT(&ksi); 386 ksi.ksi_signo = signo; 387 ksi.ksi_code = SI_LWP; 388 ksi.ksi_pid = p->p_pid; 389 ksi.ksi_uid = kauth_cred_geteuid(l->l_cred); 390 ksi.ksi_lid = SCARG(uap, target); 391 392 mutex_enter(&proc_lock); 393 mutex_enter(p->p_lock); 394 if ((t = lwp_find(p, ksi.ksi_lid)) == NULL) 395 error = ESRCH; 396 else if (signo != 0) 397 kpsignal2(p, &ksi); 398 mutex_exit(p->p_lock); 399 mutex_exit(&proc_lock); 400 401 return error; 402} 403 404int 405sys__lwp_detach(struct lwp *l, const struct sys__lwp_detach_args *uap, 406 register_t *retval) 407{ 408 /* { 409 syscallarg(lwpid_t) target; 410 } */ 411 struct proc *p; 412 struct lwp *t; 413 lwpid_t target; 414 int error; 415 416 target = SCARG(uap, target); 417 p = l->l_proc; 418 419 mutex_enter(p->p_lock); 420 421 if (l->l_lid == target) 422 t = l; 423 else { 424 /* 425 * We can't use lwp_find() here because the target might 426 * be a zombie. 427 */ 428 t = proc_find_lwp(p, target); 429 KASSERT(t == NULL || t->l_lid == target); 430 } 431 432 /* 433 * If the LWP is already detached, there's nothing to do. 434 * If it's a zombie, we need to clean up after it. LSZOMB 435 * is visible with the proc mutex held. 436 * 437 * After we have detached or released the LWP, kick any 438 * other LWPs that may be sitting in _lwp_wait(), waiting 439 * for the target LWP to exit. 440 */ 441 if (t != NULL && t->l_stat != LSIDL) { 442 if ((t->l_prflag & LPR_DETACHED) == 0) { 443 p->p_ndlwps++; 444 t->l_prflag |= LPR_DETACHED; 445 if (t->l_stat == LSZOMB) { 446 /* Releases proc mutex. */ 447 lwp_free(t, false, false); 448 return 0; 449 } 450 error = 0; 451 452 /* 453 * Have any LWPs sleeping in lwp_wait() recheck 454 * for deadlock. 455 */ 456 cv_broadcast(&p->p_lwpcv); 457 } else 458 error = EINVAL; 459 } else 460 error = ESRCH; 461 462 mutex_exit(p->p_lock); 463 464 return error; 465} 466 467int 468lwp_unpark(const lwpid_t *tp, const u_int ntargets) 469{ 470 u_int target; 471 kmutex_t *mp; 472 int error, s; 473 proc_t *p; 474 lwp_t *t; 475 476 p = curproc; 477 error = 0; 478 479 s = pserialize_read_enter(); 480 for (target = 0; target < ntargets; target++) { 481 t = proc_find_lwp_unlocked(p, tp[target]); 482 if (__predict_false(t == NULL)) { 483 error = ESRCH; 484 continue; 485 } 486 487 KASSERT(lwp_locked(t, NULL)); 488 489 if (__predict_true(t->l_syncobj == &lwp_park_syncobj)) { 490 /* As expected it's parked, so wake it up. */ 491 mp = t->l_mutex; 492 sleepq_remove(NULL, t, true); 493 mutex_spin_exit(mp); 494 } else if (__predict_false(t->l_stat == LSZOMB)) { 495 lwp_unlock(t); 496 error = ESRCH; 497 } else { 498 /* 499 * It hasn't parked yet because the wakeup side won 500 * the race, or something else has happened to make 501 * the thread not park. Why doesn't really matter. 502 * Set the operation pending, so that the next call 503 * to _lwp_park() in the LWP returns early. If it 504 * turns out to be a spurious wakeup, no harm done. 505 */ 506 t->l_flag |= LW_UNPARKED; 507 lwp_unlock(t); 508 } 509 } 510 pserialize_read_exit(s); 511 512 return error; 513} 514 515int 516lwp_park(clockid_t clock_id, int flags, struct timespec *ts) 517{ 518 int timo, error; 519 struct timespec start; 520 lwp_t *l; 521 bool timeremain = !(flags & TIMER_ABSTIME) && ts; 522 523 if (ts != NULL) { 524 if ((error = ts2timo(clock_id, flags, ts, &timo, 525 timeremain ? &start : NULL)) != 0) 526 return error; 527 KASSERT(timo != 0); 528 } else { 529 timo = 0; 530 } 531 532 /* 533 * Before going the full route and blocking, check to see if an 534 * unpark op is pending. 535 */ 536 l = curlwp; 537 lwp_lock(l); 538 if ((l->l_flag & (LW_CANCELLED | LW_UNPARKED)) != 0) { 539 l->l_flag &= ~(LW_CANCELLED | LW_UNPARKED); 540 lwp_unlock(l); 541 return EALREADY; 542 } 543 sleepq_enqueue(NULL, l, "parked", &lwp_park_syncobj, true); 544 error = sleepq_block(timo, true, &lwp_park_syncobj, 0); 545 switch (error) { 546 case EWOULDBLOCK: 547 error = ETIMEDOUT; 548 if (timeremain) 549 memset(ts, 0, sizeof(*ts)); 550 break; 551 case ERESTART: 552 error = EINTR; 553 /*FALLTHROUGH*/ 554 default: 555 if (timeremain) 556 clock_timeleft(clock_id, ts, &start); 557 break; 558 } 559 return error; 560} 561 562/* 563 * 'park' an LWP waiting on a user-level synchronisation object. The LWP 564 * will remain parked until another LWP in the same process calls in and 565 * requests that it be unparked. 566 */ 567int 568sys____lwp_park60(struct lwp *l, const struct sys____lwp_park60_args *uap, 569 register_t *retval) 570{ 571 /* { 572 syscallarg(clockid_t) clock_id; 573 syscallarg(int) flags; 574 syscallarg(struct timespec *) ts; 575 syscallarg(lwpid_t) unpark; 576 syscallarg(const void *) hint; 577 syscallarg(const void *) unparkhint; 578 } */ 579 struct timespec ts, *tsp; 580 int error; 581 582 if (SCARG(uap, ts) == NULL) 583 tsp = NULL; 584 else { 585 error = copyin(SCARG(uap, ts), &ts, sizeof(ts)); 586 if (error != 0) 587 return error; 588 tsp = &ts; 589 } 590 591 if (SCARG(uap, unpark) != 0) { 592 error = lwp_unpark(&SCARG(uap, unpark), 1); 593 if (error != 0) 594 return error; 595 } 596 597 error = lwp_park(SCARG(uap, clock_id), SCARG(uap, flags), tsp); 598 if (SCARG(uap, ts) != NULL && (SCARG(uap, flags) & TIMER_ABSTIME) == 0) 599 (void)copyout(tsp, SCARG(uap, ts), sizeof(*tsp)); 600 return error; 601} 602 603int 604sys__lwp_unpark(struct lwp *l, const struct sys__lwp_unpark_args *uap, 605 register_t *retval) 606{ 607 /* { 608 syscallarg(lwpid_t) target; 609 syscallarg(const void *) hint; 610 } */ 611 612 return lwp_unpark(&SCARG(uap, target), 1); 613} 614 615int 616sys__lwp_unpark_all(struct lwp *l, const struct sys__lwp_unpark_all_args *uap, 617 register_t *retval) 618{ 619 /* { 620 syscallarg(const lwpid_t *) targets; 621 syscallarg(size_t) ntargets; 622 syscallarg(const void *) hint; 623 } */ 624 lwpid_t targets[32], *tp; 625 int error; 626 u_int ntargets; 627 size_t sz; 628 629 ntargets = SCARG(uap, ntargets); 630 if (SCARG(uap, targets) == NULL) { 631 /* 632 * Let the caller know how much we are willing to do, and 633 * let it unpark the LWPs in blocks. 634 */ 635 *retval = LWP_UNPARK_MAX; 636 return 0; 637 } 638 if (ntargets > LWP_UNPARK_MAX || ntargets == 0) 639 return EINVAL; 640 641 /* 642 * Copy in the target array. If it's a small number of LWPs, then 643 * place the numbers on the stack. 644 */ 645 sz = sizeof(lwpid_t) * ntargets; 646 if (sz <= sizeof(targets)) 647 tp = targets; 648 else 649 tp = kmem_alloc(sz, KM_SLEEP); 650 error = copyin(SCARG(uap, targets), tp, sz); 651 if (error != 0) { 652 if (tp != targets) { 653 kmem_free(tp, sz); 654 } 655 return error; 656 } 657 error = lwp_unpark(tp, ntargets); 658 if (tp != targets) 659 kmem_free(tp, sz); 660 return error; 661} 662 663int 664sys__lwp_setname(struct lwp *l, const struct sys__lwp_setname_args *uap, 665 register_t *retval) 666{ 667 /* { 668 syscallarg(lwpid_t) target; 669 syscallarg(const char *) name; 670 } */ 671 char *name, *oname; 672 lwpid_t target; 673 proc_t *p; 674 lwp_t *t; 675 int error; 676 677 if ((target = SCARG(uap, target)) == 0) 678 target = l->l_lid; 679 680 name = kmem_alloc(MAXCOMLEN, KM_SLEEP); 681 error = copyinstr(SCARG(uap, name), name, MAXCOMLEN, NULL); 682 switch (error) { 683 case ENAMETOOLONG: 684 case 0: 685 name[MAXCOMLEN - 1] = '\0'; 686 break; 687 default: 688 kmem_free(name, MAXCOMLEN); 689 return error; 690 } 691 692 p = curproc; 693 mutex_enter(p->p_lock); 694 if ((t = lwp_find(p, target)) == NULL) { 695 mutex_exit(p->p_lock); 696 kmem_free(name, MAXCOMLEN); 697 return ESRCH; 698 } 699 lwp_lock(t); 700 oname = t->l_name; 701 t->l_name = name; 702 lwp_unlock(t); 703 mutex_exit(p->p_lock); 704 705 if (oname != NULL) 706 kmem_free(oname, MAXCOMLEN); 707 708 return 0; 709} 710 711int 712sys__lwp_getname(struct lwp *l, const struct sys__lwp_getname_args *uap, 713 register_t *retval) 714{ 715 /* { 716 syscallarg(lwpid_t) target; 717 syscallarg(char *) name; 718 syscallarg(size_t) len; 719 } */ 720 char name[MAXCOMLEN]; 721 lwpid_t target; 722 size_t len; 723 proc_t *p; 724 lwp_t *t; 725 726 if ((target = SCARG(uap, target)) == 0) 727 target = l->l_lid; 728 729 p = curproc; 730 mutex_enter(p->p_lock); 731 if ((t = lwp_find(p, target)) == NULL) { 732 mutex_exit(p->p_lock); 733 return ESRCH; 734 } 735 lwp_lock(t); 736 if (t->l_name == NULL) 737 name[0] = '\0'; 738 else 739 strlcpy(name, t->l_name, sizeof(name)); 740 lwp_unlock(t); 741 mutex_exit(p->p_lock); 742 743 len = uimin(SCARG(uap, len), sizeof(name)); 744 745 return copyoutstr(name, SCARG(uap, name), len, NULL); 746} 747 748int 749sys__lwp_ctl(struct lwp *l, const struct sys__lwp_ctl_args *uap, 750 register_t *retval) 751{ 752 /* { 753 syscallarg(int) features; 754 syscallarg(struct lwpctl **) address; 755 } */ 756 int error, features; 757 vaddr_t vaddr; 758 759 features = SCARG(uap, features); 760 features &= ~(LWPCTL_FEATURE_CURCPU | LWPCTL_FEATURE_PCTR); 761 if (features != 0) 762 return ENODEV; 763 if ((error = lwp_ctl_alloc(&vaddr)) != 0) 764 return error; 765 return copyout(&vaddr, SCARG(uap, address), sizeof(void *)); 766} 767