118 119SYSCTL_PROC(_kern, OID_AUTO, stackprot, CTLTYPE_INT|CTLFLAG_RD, 120 NULL, 0, sysctl_kern_stackprot, "I", ""); 121 122u_long ps_arg_cache_limit = PAGE_SIZE / 16; 123SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW, 124 &ps_arg_cache_limit, 0, ""); 125 126static int map_at_zero = 0; 127TUNABLE_INT("security.bsd.map_at_zero", &map_at_zero); 128SYSCTL_INT(_security_bsd, OID_AUTO, map_at_zero, CTLFLAG_RW, &map_at_zero, 0, 129 "Permit processes to map an object at virtual address 0."); 130 131static int 132sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS) 133{ 134 struct proc *p; 135 int error; 136 137 p = curproc; 138#ifdef SCTL_MASK32 139 if (req->flags & SCTL_MASK32) { 140 unsigned int val; 141 val = (unsigned int)p->p_sysent->sv_psstrings; 142 error = SYSCTL_OUT(req, &val, sizeof(val)); 143 } else 144#endif 145 error = SYSCTL_OUT(req, &p->p_sysent->sv_psstrings, 146 sizeof(p->p_sysent->sv_psstrings)); 147 return error; 148} 149 150static int 151sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS) 152{ 153 struct proc *p; 154 int error; 155 156 p = curproc; 157#ifdef SCTL_MASK32 158 if (req->flags & SCTL_MASK32) { 159 unsigned int val; 160 val = (unsigned int)p->p_sysent->sv_usrstack; 161 error = SYSCTL_OUT(req, &val, sizeof(val)); 162 } else 163#endif 164 error = SYSCTL_OUT(req, &p->p_sysent->sv_usrstack, 165 sizeof(p->p_sysent->sv_usrstack)); 166 return error; 167} 168 169static int 170sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS) 171{ 172 struct proc *p; 173 174 p = curproc; 175 return (SYSCTL_OUT(req, &p->p_sysent->sv_stackprot, 176 sizeof(p->p_sysent->sv_stackprot))); 177} 178 179/* 180 * Each of the items is a pointer to a `const struct execsw', hence the 181 * double pointer here. 182 */ 183static const struct execsw **execsw; 184 185#ifndef _SYS_SYSPROTO_H_ 186struct execve_args { 187 char *fname; 188 char **argv; 189 char **envv; 190}; 191#endif 192 193int 194execve(td, uap) 195 struct thread *td; 196 struct execve_args /* { 197 char *fname; 198 char **argv; 199 char **envv; 200 } */ *uap; 201{ 202 int error; 203 struct image_args args; 204 205 error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, 206 uap->argv, uap->envv); 207 if (error == 0) 208 error = kern_execve(td, &args, NULL); 209 return (error); 210} 211 212#ifndef _SYS_SYSPROTO_H_ 213struct fexecve_args { 214 int fd; 215 char **argv; 216 char **envv; 217} 218#endif 219int 220fexecve(struct thread *td, struct fexecve_args *uap) 221{ 222 int error; 223 struct image_args args; 224 225 error = exec_copyin_args(&args, NULL, UIO_SYSSPACE, 226 uap->argv, uap->envv); 227 if (error == 0) { 228 args.fd = uap->fd; 229 error = kern_execve(td, &args, NULL); 230 } 231 return (error); 232} 233 234#ifndef _SYS_SYSPROTO_H_ 235struct __mac_execve_args { 236 char *fname; 237 char **argv; 238 char **envv; 239 struct mac *mac_p; 240}; 241#endif 242 243int 244__mac_execve(td, uap) 245 struct thread *td; 246 struct __mac_execve_args /* { 247 char *fname; 248 char **argv; 249 char **envv; 250 struct mac *mac_p; 251 } */ *uap; 252{ 253#ifdef MAC 254 int error; 255 struct image_args args; 256 257 error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, 258 uap->argv, uap->envv); 259 if (error == 0) 260 error = kern_execve(td, &args, uap->mac_p); 261 return (error); 262#else 263 return (ENOSYS); 264#endif 265} 266 267/* 268 * XXX: kern_execve has the astonishing property of not always returning to 269 * the caller. If sufficiently bad things happen during the call to 270 * do_execve(), it can end up calling exit1(); as a result, callers must 271 * avoid doing anything which they might need to undo (e.g., allocating 272 * memory). 273 */ 274int 275kern_execve(td, args, mac_p) 276 struct thread *td; 277 struct image_args *args; 278 struct mac *mac_p; 279{ 280 struct proc *p = td->td_proc; 281 int error; 282 283 AUDIT_ARG_ARGV(args->begin_argv, args->argc, 284 args->begin_envv - args->begin_argv); 285 AUDIT_ARG_ENVV(args->begin_envv, args->envc, 286 args->endp - args->begin_envv); 287 if (p->p_flag & P_HADTHREADS) { 288 PROC_LOCK(p); 289 if (thread_single(SINGLE_BOUNDARY)) { 290 PROC_UNLOCK(p); 291 exec_free_args(args); 292 return (ERESTART); /* Try again later. */ 293 } 294 PROC_UNLOCK(p); 295 } 296 297 error = do_execve(td, args, mac_p); 298 299 if (p->p_flag & P_HADTHREADS) { 300 PROC_LOCK(p); 301 /* 302 * If success, we upgrade to SINGLE_EXIT state to 303 * force other threads to suicide. 304 */ 305 if (error == 0) 306 thread_single(SINGLE_EXIT); 307 else 308 thread_single_end(); 309 PROC_UNLOCK(p); 310 } 311 312 return (error); 313} 314 315/* 316 * In-kernel implementation of execve(). All arguments are assumed to be 317 * userspace pointers from the passed thread. 318 */ 319static int 320do_execve(td, args, mac_p) 321 struct thread *td; 322 struct image_args *args; 323 struct mac *mac_p; 324{ 325 struct proc *p = td->td_proc; 326 struct nameidata nd; 327 struct ucred *newcred = NULL, *oldcred; 328 struct uidinfo *euip; 329 register_t *stack_base; 330 int error, i; 331 struct image_params image_params, *imgp; 332 struct vattr attr; 333 int (*img_first)(struct image_params *); 334 struct pargs *oldargs = NULL, *newargs = NULL; 335 struct sigacts *oldsigacts, *newsigacts; 336#ifdef KTRACE 337 struct vnode *tracevp = NULL; 338 struct ucred *tracecred = NULL; 339#endif 340 struct vnode *textvp = NULL, *binvp = NULL; 341 int credential_changing; 342 int vfslocked; 343 int textset; 344#ifdef MAC 345 struct label *interpvplabel = NULL; 346 int will_transition; 347#endif 348#ifdef HWPMC_HOOKS 349 struct pmckern_procexec pe; 350#endif 351 static const char fexecv_proc_title[] = "(fexecv)"; 352 353 vfslocked = 0; 354 imgp = &image_params; 355 356 /* 357 * Lock the process and set the P_INEXEC flag to indicate that 358 * it should be left alone until we're done here. This is 359 * necessary to avoid race conditions - e.g. in ptrace() - 360 * that might allow a local user to illicitly obtain elevated 361 * privileges. 362 */ 363 PROC_LOCK(p); 364 KASSERT((p->p_flag & P_INEXEC) == 0, 365 ("%s(): process already has P_INEXEC flag", __func__)); 366 p->p_flag |= P_INEXEC; 367 PROC_UNLOCK(p); 368 369 /* 370 * Initialize part of the common data 371 */ 372 imgp->proc = p; 373 imgp->execlabel = NULL; 374 imgp->attr = &attr; 375 imgp->entry_addr = 0; 376 imgp->reloc_base = 0; 377 imgp->vmspace_destroyed = 0; 378 imgp->interpreted = 0; 379 imgp->opened = 0; 380 imgp->interpreter_name = NULL; 381 imgp->auxargs = NULL; 382 imgp->vp = NULL; 383 imgp->object = NULL; 384 imgp->firstpage = NULL; 385 imgp->ps_strings = 0; 386 imgp->auxarg_size = 0; 387 imgp->args = args; 388 imgp->execpath = imgp->freepath = NULL; 389 imgp->execpathp = 0; 390 imgp->canary = 0; 391 imgp->canarylen = 0; 392 imgp->pagesizes = 0; 393 imgp->pagesizeslen = 0; 394 imgp->stack_prot = 0; 395 396#ifdef MAC 397 error = mac_execve_enter(imgp, mac_p); 398 if (error) 399 goto exec_fail; 400#endif 401 402 imgp->image_header = NULL; 403 404 /* 405 * Translate the file name. namei() returns a vnode pointer 406 * in ni_vp amoung other things. 407 * 408 * XXXAUDIT: It would be desirable to also audit the name of the 409 * interpreter if this is an interpreted binary. 410 */ 411 if (args->fname != NULL) { 412 NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | FOLLOW | SAVENAME 413 | MPSAFE | AUDITVNODE1, UIO_SYSSPACE, args->fname, td); 414 } 415 416 SDT_PROBE(proc, kernel, , exec, args->fname, 0, 0, 0, 0 ); 417 418interpret: 419 if (args->fname != NULL) { 420#ifdef CAPABILITY_MODE 421 /* 422 * While capability mode can't reach this point via direct 423 * path arguments to execve(), we also don't allow 424 * interpreters to be used in capability mode (for now). 425 * Catch indirect lookups and return a permissions error. 426 */ 427 if (IN_CAPABILITY_MODE(td)) { 428 error = ECAPMODE; 429 goto exec_fail; 430 } 431#endif 432 error = namei(&nd); 433 if (error) 434 goto exec_fail; 435 436 vfslocked = NDHASGIANT(&nd); 437 binvp = nd.ni_vp; 438 imgp->vp = binvp; 439 } else { 440 AUDIT_ARG_FD(args->fd); 441 error = fgetvp(td, args->fd, &binvp); 442 if (error) 443 goto exec_fail; 444 vfslocked = VFS_LOCK_GIANT(binvp->v_mount); 445 vn_lock(binvp, LK_EXCLUSIVE | LK_RETRY); 446 AUDIT_ARG_VNODE1(binvp); 447 imgp->vp = binvp; 448 } 449 450 /* 451 * Check file permissions (also 'opens' file) 452 */ 453 error = exec_check_permissions(imgp); 454 if (error) 455 goto exec_fail_dealloc; 456 457 imgp->object = imgp->vp->v_object; 458 if (imgp->object != NULL) 459 vm_object_reference(imgp->object); 460 461 /* 462 * Set VV_TEXT now so no one can write to the executable while we're 463 * activating it. 464 * 465 * Remember if this was set before and unset it in case this is not 466 * actually an executable image. 467 */ 468 textset = imgp->vp->v_vflag & VV_TEXT; 469 imgp->vp->v_vflag |= VV_TEXT; 470 471 error = exec_map_first_page(imgp); 472 if (error) 473 goto exec_fail_dealloc; 474 475 imgp->proc->p_osrel = 0; 476 /* 477 * If the current process has a special image activator it 478 * wants to try first, call it. For example, emulating shell 479 * scripts differently. 480 */ 481 error = -1; 482 if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL) 483 error = img_first(imgp); 484 485 /* 486 * Loop through the list of image activators, calling each one. 487 * An activator returns -1 if there is no match, 0 on success, 488 * and an error otherwise. 489 */ 490 for (i = 0; error == -1 && execsw[i]; ++i) { 491 if (execsw[i]->ex_imgact == NULL || 492 execsw[i]->ex_imgact == img_first) { 493 continue; 494 } 495 error = (*execsw[i]->ex_imgact)(imgp); 496 } 497 498 if (error) { 499 if (error == -1) { 500 if (textset == 0) 501 imgp->vp->v_vflag &= ~VV_TEXT; 502 error = ENOEXEC; 503 } 504 goto exec_fail_dealloc; 505 } 506 507 /* 508 * Special interpreter operation, cleanup and loop up to try to 509 * activate the interpreter. 510 */ 511 if (imgp->interpreted) { 512 exec_unmap_first_page(imgp); 513 /* 514 * VV_TEXT needs to be unset for scripts. There is a short 515 * period before we determine that something is a script where 516 * VV_TEXT will be set. The vnode lock is held over this 517 * entire period so nothing should illegitimately be blocked. 518 */ 519 imgp->vp->v_vflag &= ~VV_TEXT; 520 /* free name buffer and old vnode */ 521 if (args->fname != NULL) 522 NDFREE(&nd, NDF_ONLY_PNBUF); 523#ifdef MAC 524 mac_execve_interpreter_enter(binvp, &interpvplabel); 525#endif 526 if (imgp->opened) { 527 VOP_CLOSE(binvp, FREAD, td->td_ucred, td); 528 imgp->opened = 0; 529 } 530 vput(binvp); 531 vm_object_deallocate(imgp->object); 532 imgp->object = NULL; 533 VFS_UNLOCK_GIANT(vfslocked); 534 vfslocked = 0; 535 /* set new name to that of the interpreter */ 536 NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME | MPSAFE, 537 UIO_SYSSPACE, imgp->interpreter_name, td); 538 args->fname = imgp->interpreter_name; 539 goto interpret; 540 } 541 542 /* 543 * NB: We unlock the vnode here because it is believed that none 544 * of the sv_copyout_strings/sv_fixup operations require the vnode. 545 */ 546 VOP_UNLOCK(imgp->vp, 0); 547 548 /* 549 * Do the best to calculate the full path to the image file. 550 */ 551 if (imgp->auxargs != NULL && 552 ((args->fname != NULL && args->fname[0] == '/') || 553 vn_fullpath(td, imgp->vp, &imgp->execpath, &imgp->freepath) != 0)) 554 imgp->execpath = args->fname; 555 556 /* 557 * Copy out strings (args and env) and initialize stack base 558 */ 559 if (p->p_sysent->sv_copyout_strings) 560 stack_base = (*p->p_sysent->sv_copyout_strings)(imgp); 561 else 562 stack_base = exec_copyout_strings(imgp); 563 564 /* 565 * If custom stack fixup routine present for this process 566 * let it do the stack setup. 567 * Else stuff argument count as first item on stack 568 */ 569 if (p->p_sysent->sv_fixup != NULL) 570 (*p->p_sysent->sv_fixup)(&stack_base, imgp); 571 else 572 suword(--stack_base, imgp->args->argc); 573 574 /* 575 * For security and other reasons, the file descriptor table cannot 576 * be shared after an exec. 577 */ 578 fdunshare(p, td); 579 580 /* 581 * Malloc things before we need locks. 582 */ 583 newcred = crget(); 584 euip = uifind(attr.va_uid); 585 i = imgp->args->begin_envv - imgp->args->begin_argv; 586 /* Cache arguments if they fit inside our allowance */ 587 if (ps_arg_cache_limit >= i + sizeof(struct pargs)) { 588 newargs = pargs_alloc(i); 589 bcopy(imgp->args->begin_argv, newargs->ar_args, i); 590 } 591 592 /* close files on exec */ 593 fdcloseexec(td); 594 vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); 595 596 /* Get a reference to the vnode prior to locking the proc */ 597 VREF(binvp); 598 599 /* 600 * For security and other reasons, signal handlers cannot 601 * be shared after an exec. The new process gets a copy of the old 602 * handlers. In execsigs(), the new process will have its signals 603 * reset. 604 */ 605 PROC_LOCK(p); 606 oldcred = crcopysafe(p, newcred); 607 if (sigacts_shared(p->p_sigacts)) { 608 oldsigacts = p->p_sigacts; 609 PROC_UNLOCK(p); 610 newsigacts = sigacts_alloc(); 611 sigacts_copy(newsigacts, oldsigacts); 612 PROC_LOCK(p); 613 p->p_sigacts = newsigacts; 614 } else 615 oldsigacts = NULL; 616 617 /* Stop profiling */ 618 stopprofclock(p); 619 620 /* reset caught signals */ 621 execsigs(p); 622 623 /* name this process - nameiexec(p, ndp) */ 624 bzero(p->p_comm, sizeof(p->p_comm)); 625 if (args->fname) 626 bcopy(nd.ni_cnd.cn_nameptr, p->p_comm, 627 min(nd.ni_cnd.cn_namelen, MAXCOMLEN)); 628 else if (vn_commname(binvp, p->p_comm, sizeof(p->p_comm)) != 0) 629 bcopy(fexecv_proc_title, p->p_comm, sizeof(fexecv_proc_title)); 630 bcopy(p->p_comm, td->td_name, sizeof(td->td_name)); 631 632 /* 633 * mark as execed, wakeup the process that vforked (if any) and tell 634 * it that it now has its own resources back 635 */ 636 p->p_flag |= P_EXEC; 637 if (p->p_pptr && (p->p_flag & P_PPWAIT)) { 638 p->p_flag &= ~P_PPWAIT; 639 cv_broadcast(&p->p_pwait); 640 } 641 642 /* 643 * Implement image setuid/setgid. 644 * 645 * Don't honor setuid/setgid if the filesystem prohibits it or if 646 * the process is being traced. 647 * 648 * We disable setuid/setgid/etc in compatibility mode on the basis 649 * that most setugid applications are not written with that 650 * environment in mind, and will therefore almost certainly operate 651 * incorrectly. In principle there's no reason that setugid 652 * applications might not be useful in capability mode, so we may want 653 * to reconsider this conservative design choice in the future. 654 * 655 * XXXMAC: For the time being, use NOSUID to also prohibit 656 * transitions on the file system. 657 */ 658 credential_changing = 0; 659 credential_changing |= (attr.va_mode & S_ISUID) && oldcred->cr_uid != 660 attr.va_uid; 661 credential_changing |= (attr.va_mode & S_ISGID) && oldcred->cr_gid != 662 attr.va_gid; 663#ifdef MAC 664 will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp, 665 interpvplabel, imgp); 666 credential_changing |= will_transition; 667#endif 668 669 if (credential_changing && 670#ifdef CAPABILITY_MODE 671 ((oldcred->cr_flags & CRED_FLAG_CAPMODE) == 0) && 672#endif 673 (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 && 674 (p->p_flag & P_TRACED) == 0) { 675 /* 676 * Turn off syscall tracing for set-id programs, except for 677 * root. Record any set-id flags first to make sure that 678 * we do not regain any tracing during a possible block. 679 */ 680 setsugid(p); 681 682#ifdef KTRACE 683 if (priv_check_cred(oldcred, PRIV_DEBUG_DIFFCRED, 0)) 684 ktrprocexec(p, &tracecred, &tracevp); 685#endif 686 /* 687 * Close any file descriptors 0..2 that reference procfs, 688 * then make sure file descriptors 0..2 are in use. 689 * 690 * setugidsafety() may call closef() and then pfind() 691 * which may grab the process lock. 692 * fdcheckstd() may call falloc() which may block to 693 * allocate memory, so temporarily drop the process lock. 694 */ 695 PROC_UNLOCK(p); 696 VOP_UNLOCK(imgp->vp, 0); 697 setugidsafety(td); 698 error = fdcheckstd(td); 699 vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); 700 if (error != 0) 701 goto done1; 702 PROC_LOCK(p); 703 /* 704 * Set the new credentials. 705 */ 706 if (attr.va_mode & S_ISUID) 707 change_euid(newcred, euip); 708 if (attr.va_mode & S_ISGID) 709 change_egid(newcred, attr.va_gid); 710#ifdef MAC 711 if (will_transition) { 712 mac_vnode_execve_transition(oldcred, newcred, imgp->vp, 713 interpvplabel, imgp); 714 } 715#endif 716 /* 717 * Implement correct POSIX saved-id behavior. 718 * 719 * XXXMAC: Note that the current logic will save the 720 * uid and gid if a MAC domain transition occurs, even 721 * though maybe it shouldn't. 722 */ 723 change_svuid(newcred, newcred->cr_uid); 724 change_svgid(newcred, newcred->cr_gid); 725 p->p_ucred = newcred; 726 newcred = NULL; 727 } else { 728 if (oldcred->cr_uid == oldcred->cr_ruid && 729 oldcred->cr_gid == oldcred->cr_rgid) 730 p->p_flag &= ~P_SUGID; 731 /* 732 * Implement correct POSIX saved-id behavior. 733 * 734 * XXX: It's not clear that the existing behavior is 735 * POSIX-compliant. A number of sources indicate that the 736 * saved uid/gid should only be updated if the new ruid is 737 * not equal to the old ruid, or the new euid is not equal 738 * to the old euid and the new euid is not equal to the old 739 * ruid. The FreeBSD code always updates the saved uid/gid. 740 * Also, this code uses the new (replaced) euid and egid as 741 * the source, which may or may not be the right ones to use. 742 */ 743 if (oldcred->cr_svuid != oldcred->cr_uid || 744 oldcred->cr_svgid != oldcred->cr_gid) { 745 change_svuid(newcred, newcred->cr_uid); 746 change_svgid(newcred, newcred->cr_gid); 747 p->p_ucred = newcred; 748 newcred = NULL; 749 } 750 } 751 752 /* 753 * Store the vp for use in procfs. This vnode was referenced prior 754 * to locking the proc lock. 755 */ 756 textvp = p->p_textvp; 757 p->p_textvp = binvp; 758 759#ifdef KDTRACE_HOOKS 760 /* 761 * Tell the DTrace fasttrap provider about the exec if it 762 * has declared an interest. 763 */ 764 if (dtrace_fasttrap_exec) 765 dtrace_fasttrap_exec(p); 766#endif 767 768 /* 769 * Notify others that we exec'd, and clear the P_INEXEC flag 770 * as we're now a bona fide freshly-execed process. 771 */ 772 KNOTE_LOCKED(&p->p_klist, NOTE_EXEC); 773 p->p_flag &= ~P_INEXEC; 774 775 /* 776 * If tracing the process, trap to the debugger so that 777 * breakpoints can be set before the program executes. We 778 * have to use tdsignal() to deliver the signal to the current 779 * thread since any other threads in this process will exit if 780 * execve() succeeds. 781 */ 782 if (p->p_flag & P_TRACED) 783 tdsignal(td, SIGTRAP); 784 785 /* clear "fork but no exec" flag, as we _are_ execing */ 786 p->p_acflag &= ~AFORK; 787 788 /* 789 * Free any previous argument cache and replace it with 790 * the new argument cache, if any. 791 */ 792 oldargs = p->p_args; 793 p->p_args = newargs; 794 newargs = NULL; 795 796#ifdef HWPMC_HOOKS 797 /* 798 * Check if system-wide sampling is in effect or if the 799 * current process is using PMCs. If so, do exec() time 800 * processing. This processing needs to happen AFTER the 801 * P_INEXEC flag is cleared. 802 * 803 * The proc lock needs to be released before taking the PMC 804 * SX. 805 */ 806 if (PMC_SYSTEM_SAMPLING_ACTIVE() || PMC_PROC_IS_USING_PMCS(p)) { 807 PROC_UNLOCK(p); 808 VOP_UNLOCK(imgp->vp, 0); 809 pe.pm_credentialschanged = credential_changing; 810 pe.pm_entryaddr = imgp->entry_addr; 811 812 PMC_CALL_HOOK_X(td, PMC_FN_PROCESS_EXEC, (void *) &pe); 813 vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); 814 } else 815 PROC_UNLOCK(p); 816#else /* !HWPMC_HOOKS */ 817 PROC_UNLOCK(p); 818#endif 819 820 /* Set values passed into the program in registers. */ 821 if (p->p_sysent->sv_setregs) 822 (*p->p_sysent->sv_setregs)(td, imgp, 823 (u_long)(uintptr_t)stack_base); 824 else 825 exec_setregs(td, imgp, (u_long)(uintptr_t)stack_base); 826 827 vfs_mark_atime(imgp->vp, td->td_ucred); 828 829 SDT_PROBE(proc, kernel, , exec_success, args->fname, 0, 0, 0, 0); 830 831done1: 832 /* 833 * Free any resources malloc'd earlier that we didn't use. 834 */ 835 uifree(euip); 836 if (newcred == NULL) 837 crfree(oldcred); 838 else 839 crfree(newcred); 840 VOP_UNLOCK(imgp->vp, 0); 841 842 /* 843 * Handle deferred decrement of ref counts. 844 */ 845 if (textvp != NULL) { 846 int tvfslocked; 847 848 tvfslocked = VFS_LOCK_GIANT(textvp->v_mount); 849 vrele(textvp); 850 VFS_UNLOCK_GIANT(tvfslocked); 851 } 852 if (binvp && error != 0) 853 vrele(binvp); 854#ifdef KTRACE 855 if (tracevp != NULL) { 856 int tvfslocked; 857 858 tvfslocked = VFS_LOCK_GIANT(tracevp->v_mount); 859 vrele(tracevp); 860 VFS_UNLOCK_GIANT(tvfslocked); 861 } 862 if (tracecred != NULL) 863 crfree(tracecred); 864#endif 865 vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); 866 pargs_drop(oldargs); 867 pargs_drop(newargs); 868 if (oldsigacts != NULL) 869 sigacts_free(oldsigacts); 870 871exec_fail_dealloc: 872 873 /* 874 * free various allocated resources 875 */ 876 if (imgp->firstpage != NULL) 877 exec_unmap_first_page(imgp); 878 879 if (imgp->vp != NULL) { 880 if (args->fname) 881 NDFREE(&nd, NDF_ONLY_PNBUF); 882 if (imgp->opened) 883 VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td); 884 vput(imgp->vp); 885 } 886 887 if (imgp->object != NULL) 888 vm_object_deallocate(imgp->object); 889 890 free(imgp->freepath, M_TEMP); 891 892 if (error == 0) { 893 PROC_LOCK(p); 894 td->td_dbgflags |= TDB_EXEC; 895 PROC_UNLOCK(p); 896 897 /* 898 * Stop the process here if its stop event mask has 899 * the S_EXEC bit set. 900 */ 901 STOPEVENT(p, S_EXEC, 0); 902 goto done2; 903 } 904 905exec_fail: 906 /* we're done here, clear P_INEXEC */ 907 PROC_LOCK(p); 908 p->p_flag &= ~P_INEXEC; 909 PROC_UNLOCK(p); 910 911 SDT_PROBE(proc, kernel, , exec_failure, error, 0, 0, 0, 0); 912 913done2: 914#ifdef MAC 915 mac_execve_exit(imgp); 916 mac_execve_interpreter_exit(interpvplabel); 917#endif 918 VFS_UNLOCK_GIANT(vfslocked); 919 exec_free_args(args); 920 921 if (error && imgp->vmspace_destroyed) { 922 /* sorry, no more process anymore. exit gracefully */ 923 exit1(td, W_EXITCODE(0, SIGABRT)); 924 /* NOT REACHED */ 925 } 926 927#ifdef KTRACE 928 if (error == 0) 929 ktrprocctor(p); 930#endif 931 932 return (error); 933} 934 935int 936exec_map_first_page(imgp) 937 struct image_params *imgp; 938{ 939 int rv, i; 940 int initial_pagein; 941 vm_page_t ma[VM_INITIAL_PAGEIN]; 942 vm_object_t object; 943 944 if (imgp->firstpage != NULL) 945 exec_unmap_first_page(imgp); 946 947 object = imgp->vp->v_object; 948 if (object == NULL) 949 return (EACCES); 950 VM_OBJECT_LOCK(object); 951#if VM_NRESERVLEVEL > 0 952 if ((object->flags & OBJ_COLORED) == 0) { 953 object->flags |= OBJ_COLORED; 954 object->pg_color = 0; 955 } 956#endif 957 ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 958 if (ma[0]->valid != VM_PAGE_BITS_ALL) { 959 initial_pagein = VM_INITIAL_PAGEIN; 960 if (initial_pagein > object->size) 961 initial_pagein = object->size; 962 for (i = 1; i < initial_pagein; i++) { 963 if ((ma[i] = vm_page_next(ma[i - 1])) != NULL) { 964 if (ma[i]->valid) 965 break; 966 if ((ma[i]->oflags & VPO_BUSY) || ma[i]->busy) 967 break; 968 vm_page_busy(ma[i]); 969 } else { 970 ma[i] = vm_page_alloc(object, i, 971 VM_ALLOC_NORMAL | VM_ALLOC_IFNOTCACHED); 972 if (ma[i] == NULL) 973 break; 974 } 975 } 976 initial_pagein = i; 977 rv = vm_pager_get_pages(object, ma, initial_pagein, 0); 978 ma[0] = vm_page_lookup(object, 0); 979 if ((rv != VM_PAGER_OK) || (ma[0] == NULL)) { 980 if (ma[0] != NULL) { 981 vm_page_lock(ma[0]); 982 vm_page_free(ma[0]); 983 vm_page_unlock(ma[0]); 984 } 985 VM_OBJECT_UNLOCK(object); 986 return (EIO); 987 } 988 } 989 vm_page_lock(ma[0]); 990 vm_page_hold(ma[0]); 991 vm_page_unlock(ma[0]); 992 vm_page_wakeup(ma[0]); 993 VM_OBJECT_UNLOCK(object); 994 995 imgp->firstpage = sf_buf_alloc(ma[0], 0); 996 imgp->image_header = (char *)sf_buf_kva(imgp->firstpage); 997 998 return (0); 999} 1000 1001void 1002exec_unmap_first_page(imgp) 1003 struct image_params *imgp; 1004{ 1005 vm_page_t m; 1006 1007 if (imgp->firstpage != NULL) { 1008 m = sf_buf_page(imgp->firstpage); 1009 sf_buf_free(imgp->firstpage); 1010 imgp->firstpage = NULL; 1011 vm_page_lock(m); 1012 vm_page_unhold(m); 1013 vm_page_unlock(m); 1014 } 1015} 1016 1017/* 1018 * Destroy old address space, and allocate a new stack 1019 * The new stack is only SGROWSIZ large because it is grown 1020 * automatically in trap.c. 1021 */ 1022int 1023exec_new_vmspace(imgp, sv) 1024 struct image_params *imgp; 1025 struct sysentvec *sv; 1026{ 1027 int error; 1028 struct proc *p = imgp->proc; 1029 struct vmspace *vmspace = p->p_vmspace; 1030 vm_object_t obj; 1031 vm_offset_t sv_minuser, stack_addr; 1032 vm_map_t map; 1033 u_long ssiz; 1034 1035 imgp->vmspace_destroyed = 1; 1036 imgp->sysent = sv; 1037 1038 /* May be called with Giant held */ 1039 EVENTHANDLER_INVOKE(process_exec, p, imgp); 1040 1041 /* 1042 * Blow away entire process VM, if address space not shared, 1043 * otherwise, create a new VM space so that other threads are 1044 * not disrupted 1045 */ 1046 map = &vmspace->vm_map; 1047 if (map_at_zero) 1048 sv_minuser = sv->sv_minuser; 1049 else 1050 sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE); 1051 if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv_minuser && 1052 vm_map_max(map) == sv->sv_maxuser) { 1053 shmexit(vmspace); 1054 pmap_remove_pages(vmspace_pmap(vmspace)); 1055 vm_map_remove(map, vm_map_min(map), vm_map_max(map)); 1056 } else { 1057 error = vmspace_exec(p, sv_minuser, sv->sv_maxuser); 1058 if (error) 1059 return (error); 1060 vmspace = p->p_vmspace; 1061 map = &vmspace->vm_map; 1062 } 1063 1064 /* Map a shared page */ 1065 obj = sv->sv_shared_page_obj; 1066 if (obj != NULL) { 1067 vm_object_reference(obj); 1068 error = vm_map_fixed(map, obj, 0, 1069 sv->sv_shared_page_base, sv->sv_shared_page_len, 1070 VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_ALL, 1071 MAP_COPY_ON_WRITE | MAP_ACC_NO_CHARGE); 1072 if (error) { 1073 vm_object_deallocate(obj); 1074 return (error); 1075 } 1076 } 1077 1078 /* Allocate a new stack */ 1079 if (sv->sv_maxssiz != NULL) 1080 ssiz = *sv->sv_maxssiz; 1081 else 1082 ssiz = maxssiz; 1083 stack_addr = sv->sv_usrstack - ssiz; 1084 error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz, 1085 obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot : 1086 sv->sv_stackprot, 1087 VM_PROT_ALL, MAP_STACK_GROWS_DOWN); 1088 if (error) 1089 return (error); 1090 1091#ifdef __ia64__ 1092 /* Allocate a new register stack */ 1093 stack_addr = IA64_BACKINGSTORE; 1094 error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz, 1095 sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_UP); 1096 if (error) 1097 return (error); 1098#endif 1099 1100 /* vm_ssize and vm_maxsaddr are somewhat antiquated concepts in the 1101 * VM_STACK case, but they are still used to monitor the size of the 1102 * process stack so we can check the stack rlimit. 1103 */ 1104 vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT; 1105 vmspace->vm_maxsaddr = (char *)sv->sv_usrstack - ssiz; 1106 1107 return (0); 1108} 1109 1110/* 1111 * Copy out argument and environment strings from the old process address 1112 * space into the temporary string buffer. 1113 */ 1114int 1115exec_copyin_args(struct image_args *args, char *fname, 1116 enum uio_seg segflg, char **argv, char **envv) 1117{ 1118 char *argp, *envp; 1119 int error; 1120 size_t length; 1121 1122 bzero(args, sizeof(*args)); 1123 if (argv == NULL) 1124 return (EFAULT); 1125 1126 /* 1127 * Allocate demand-paged memory for the file name, argument, and 1128 * environment strings. 1129 */ 1130 error = exec_alloc_args(args); 1131 if (error != 0) 1132 return (error); 1133 1134 /* 1135 * Copy the file name. 1136 */ 1137 if (fname != NULL) { 1138 args->fname = args->buf; 1139 error = (segflg == UIO_SYSSPACE) ? 1140 copystr(fname, args->fname, PATH_MAX, &length) : 1141 copyinstr(fname, args->fname, PATH_MAX, &length); 1142 if (error != 0) 1143 goto err_exit; 1144 } else 1145 length = 0; 1146 1147 args->begin_argv = args->buf + length; 1148 args->endp = args->begin_argv; 1149 args->stringspace = ARG_MAX; 1150 1151 /* 1152 * extract arguments first 1153 */ 1154 while ((argp = (caddr_t) (intptr_t) fuword(argv++))) { 1155 if (argp == (caddr_t) -1) { 1156 error = EFAULT; 1157 goto err_exit; 1158 } 1159 if ((error = copyinstr(argp, args->endp, 1160 args->stringspace, &length))) { 1161 if (error == ENAMETOOLONG) 1162 error = E2BIG; 1163 goto err_exit; 1164 } 1165 args->stringspace -= length; 1166 args->endp += length; 1167 args->argc++; 1168 } 1169 1170 args->begin_envv = args->endp; 1171 1172 /* 1173 * extract environment strings 1174 */ 1175 if (envv) { 1176 while ((envp = (caddr_t)(intptr_t)fuword(envv++))) { 1177 if (envp == (caddr_t)-1) { 1178 error = EFAULT; 1179 goto err_exit; 1180 } 1181 if ((error = copyinstr(envp, args->endp, 1182 args->stringspace, &length))) { 1183 if (error == ENAMETOOLONG) 1184 error = E2BIG; 1185 goto err_exit; 1186 } 1187 args->stringspace -= length; 1188 args->endp += length; 1189 args->envc++; 1190 } 1191 } 1192 1193 return (0); 1194 1195err_exit: 1196 exec_free_args(args); 1197 return (error); 1198} 1199 1200/* 1201 * Allocate temporary demand-paged, zero-filled memory for the file name, 1202 * argument, and environment strings. Returns zero if the allocation succeeds 1203 * and ENOMEM otherwise. 1204 */ 1205int 1206exec_alloc_args(struct image_args *args) 1207{ 1208 1209 args->buf = (char *)kmem_alloc_wait(exec_map, PATH_MAX + ARG_MAX); 1210 return (args->buf != NULL ? 0 : ENOMEM); 1211} 1212 1213void 1214exec_free_args(struct image_args *args) 1215{ 1216 1217 if (args->buf != NULL) { 1218 kmem_free_wakeup(exec_map, (vm_offset_t)args->buf, 1219 PATH_MAX + ARG_MAX); 1220 args->buf = NULL; 1221 } 1222 if (args->fname_buf != NULL) { 1223 free(args->fname_buf, M_TEMP); 1224 args->fname_buf = NULL; 1225 } 1226} 1227 1228/* 1229 * Copy strings out to the new process address space, constructing new arg 1230 * and env vector tables. Return a pointer to the base so that it can be used 1231 * as the initial stack pointer. 1232 */ 1233register_t * 1234exec_copyout_strings(imgp) 1235 struct image_params *imgp; 1236{ 1237 int argc, envc; 1238 char **vectp; 1239 char *stringp, *destp; 1240 register_t *stack_base; 1241 struct ps_strings *arginfo; 1242 struct proc *p; 1243 size_t execpath_len; 1244 int szsigcode, szps; 1245 char canary[sizeof(long) * 8]; 1246 1247 szps = sizeof(pagesizes[0]) * MAXPAGESIZES; 1248 /* 1249 * Calculate string base and vector table pointers. 1250 * Also deal with signal trampoline code for this exec type. 1251 */ 1252 if (imgp->execpath != NULL && imgp->auxargs != NULL) 1253 execpath_len = strlen(imgp->execpath) + 1; 1254 else 1255 execpath_len = 0; 1256 p = imgp->proc; 1257 szsigcode = 0; 1258 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; 1259 if (p->p_sysent->sv_sigcode_base == 0) { 1260 if (p->p_sysent->sv_szsigcode != NULL) 1261 szsigcode = *(p->p_sysent->sv_szsigcode); 1262 } 1263 destp = (caddr_t)arginfo - szsigcode - SPARE_USRSPACE - 1264 roundup(execpath_len, sizeof(char *)) - 1265 roundup(sizeof(canary), sizeof(char *)) - 1266 roundup(szps, sizeof(char *)) - 1267 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); 1268 1269 /* 1270 * install sigcode 1271 */ 1272 if (szsigcode != 0) 1273 copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo - 1274 szsigcode), szsigcode); 1275 1276 /* 1277 * Copy the image path for the rtld. 1278 */ 1279 if (execpath_len != 0) { 1280 imgp->execpathp = (uintptr_t)arginfo - szsigcode - execpath_len; 1281 copyout(imgp->execpath, (void *)imgp->execpathp, 1282 execpath_len); 1283 } 1284 1285 /* 1286 * Prepare the canary for SSP. 1287 */ 1288 arc4rand(canary, sizeof(canary), 0); 1289 imgp->canary = (uintptr_t)arginfo - szsigcode - execpath_len - 1290 sizeof(canary); 1291 copyout(canary, (void *)imgp->canary, sizeof(canary)); 1292 imgp->canarylen = sizeof(canary); 1293 1294 /* 1295 * Prepare the pagesizes array. 1296 */ 1297 imgp->pagesizes = (uintptr_t)arginfo - szsigcode - execpath_len - 1298 roundup(sizeof(canary), sizeof(char *)) - szps; 1299 copyout(pagesizes, (void *)imgp->pagesizes, szps); 1300 imgp->pagesizeslen = szps; 1301 1302 /* 1303 * If we have a valid auxargs ptr, prepare some room 1304 * on the stack. 1305 */ 1306 if (imgp->auxargs) { 1307 /* 1308 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 1309 * lower compatibility. 1310 */ 1311 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : 1312 (AT_COUNT * 2); 1313 /* 1314 * The '+ 2' is for the null pointers at the end of each of 1315 * the arg and env vector sets,and imgp->auxarg_size is room 1316 * for argument of Runtime loader. 1317 */ 1318 vectp = (char **)(destp - (imgp->args->argc + 1319 imgp->args->envc + 2 + imgp->auxarg_size) 1320 * sizeof(char *)); 1321 } else { 1322 /* 1323 * The '+ 2' is for the null pointers at the end of each of 1324 * the arg and env vector sets 1325 */ 1326 vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) * 1327 sizeof(char *)); 1328 } 1329 1330 /* 1331 * vectp also becomes our initial stack base 1332 */ 1333 stack_base = (register_t *)vectp; 1334 1335 stringp = imgp->args->begin_argv; 1336 argc = imgp->args->argc; 1337 envc = imgp->args->envc; 1338 1339 /* 1340 * Copy out strings - arguments and environment. 1341 */ 1342 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 1343 1344 /* 1345 * Fill in "ps_strings" struct for ps, w, etc. 1346 */ 1347 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp); 1348 suword32(&arginfo->ps_nargvstr, argc); 1349 1350 /* 1351 * Fill in argument portion of vector table. 1352 */ 1353 for (; argc > 0; --argc) { 1354 suword(vectp++, (long)(intptr_t)destp); 1355 while (*stringp++ != 0) 1356 destp++; 1357 destp++; 1358 } 1359 1360 /* a null vector table pointer separates the argp's from the envp's */ 1361 suword(vectp++, 0); 1362 1363 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp); 1364 suword32(&arginfo->ps_nenvstr, envc); 1365 1366 /* 1367 * Fill in environment portion of vector table. 1368 */ 1369 for (; envc > 0; --envc) { 1370 suword(vectp++, (long)(intptr_t)destp); 1371 while (*stringp++ != 0) 1372 destp++; 1373 destp++; 1374 } 1375 1376 /* end of vector table is a null pointer */ 1377 suword(vectp, 0); 1378 1379 return (stack_base); 1380} 1381 1382/* 1383 * Check permissions of file to execute. 1384 * Called with imgp->vp locked. 1385 * Return 0 for success or error code on failure. 1386 */ 1387int 1388exec_check_permissions(imgp) 1389 struct image_params *imgp; 1390{ 1391 struct vnode *vp = imgp->vp; 1392 struct vattr *attr = imgp->attr; 1393 struct thread *td; 1394 int error; 1395 1396 td = curthread; 1397 1398 /* Get file attributes */ 1399 error = VOP_GETATTR(vp, attr, td->td_ucred); 1400 if (error) 1401 return (error); 1402 1403#ifdef MAC 1404 error = mac_vnode_check_exec(td->td_ucred, imgp->vp, imgp); 1405 if (error) 1406 return (error); 1407#endif 1408 1409 /* 1410 * 1) Check if file execution is disabled for the filesystem that 1411 * this file resides on. 1412 * 2) Ensure that at least one execute bit is on. Otherwise, a 1413 * privileged user will always succeed, and we don't want this 1414 * to happen unless the file really is executable. 1415 * 3) Ensure that the file is a regular file. 1416 */ 1417 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 1418 (attr->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0 || 1419 (attr->va_type != VREG)) 1420 return (EACCES); 1421 1422 /* 1423 * Zero length files can't be exec'd 1424 */ 1425 if (attr->va_size == 0) 1426 return (ENOEXEC); 1427 1428 /* 1429 * Check for execute permission to file based on current credentials. 1430 */ 1431 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 1432 if (error) 1433 return (error); 1434 1435 /* 1436 * Check number of open-for-writes on the file and deny execution 1437 * if there are any. 1438 */ 1439 if (vp->v_writecount) 1440 return (ETXTBSY); 1441 1442 /* 1443 * Call filesystem specific open routine (which does nothing in the 1444 * general case). 1445 */ 1446 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 1447 if (error == 0) 1448 imgp->opened = 1; 1449 return (error); 1450} 1451 1452/* 1453 * Exec handler registration 1454 */ 1455int 1456exec_register(execsw_arg) 1457 const struct execsw *execsw_arg; 1458{ 1459 const struct execsw **es, **xs, **newexecsw; 1460 int count = 2; /* New slot and trailing NULL */ 1461 1462 if (execsw) 1463 for (es = execsw; *es; es++) 1464 count++; 1465 newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); 1466 if (newexecsw == NULL) 1467 return (ENOMEM); 1468 xs = newexecsw; 1469 if (execsw) 1470 for (es = execsw; *es; es++) 1471 *xs++ = *es; 1472 *xs++ = execsw_arg; 1473 *xs = NULL; 1474 if (execsw) 1475 free(execsw, M_TEMP); 1476 execsw = newexecsw; 1477 return (0); 1478} 1479 1480int 1481exec_unregister(execsw_arg) 1482 const struct execsw *execsw_arg; 1483{ 1484 const struct execsw **es, **xs, **newexecsw; 1485 int count = 1; 1486 1487 if (execsw == NULL) 1488 panic("unregister with no handlers left?\n"); 1489 1490 for (es = execsw; *es; es++) { 1491 if (*es == execsw_arg) 1492 break; 1493 } 1494 if (*es == NULL) 1495 return (ENOENT); 1496 for (es = execsw; *es; es++) 1497 if (*es != execsw_arg) 1498 count++; 1499 newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); 1500 if (newexecsw == NULL) 1501 return (ENOMEM); 1502 xs = newexecsw; 1503 for (es = execsw; *es; es++) 1504 if (*es != execsw_arg) 1505 *xs++ = *es; 1506 *xs = NULL; 1507 if (execsw) 1508 free(execsw, M_TEMP); 1509 execsw = newexecsw; 1510 return (0); 1511} 1512 1513static vm_object_t shared_page_obj; 1514static int shared_page_free; 1515 1516int 1517shared_page_fill(int size, int align, const char *data) 1518{ 1519 vm_page_t m; 1520 struct sf_buf *s; 1521 vm_offset_t sk; 1522 int res; 1523 1524 VM_OBJECT_LOCK(shared_page_obj); 1525 m = vm_page_grab(shared_page_obj, 0, VM_ALLOC_RETRY); 1526 res = roundup(shared_page_free, align); 1527 if (res + size >= IDX_TO_OFF(shared_page_obj->size)) 1528 res = -1; 1529 else { 1530 VM_OBJECT_UNLOCK(shared_page_obj); 1531 s = sf_buf_alloc(m, SFB_DEFAULT); 1532 sk = sf_buf_kva(s); 1533 bcopy(data, (void *)(sk + res), size); 1534 shared_page_free = res + size; 1535 sf_buf_free(s); 1536 VM_OBJECT_LOCK(shared_page_obj); 1537 } 1538 vm_page_wakeup(m); 1539 VM_OBJECT_UNLOCK(shared_page_obj); 1540 return (res); 1541} 1542 1543static void 1544shared_page_init(void *dummy __unused) 1545{ 1546 vm_page_t m; 1547 1548 shared_page_obj = vm_pager_allocate(OBJT_PHYS, 0, PAGE_SIZE, 1549 VM_PROT_DEFAULT, 0, NULL); 1550 VM_OBJECT_LOCK(shared_page_obj); 1551 m = vm_page_grab(shared_page_obj, 0, VM_ALLOC_RETRY | VM_ALLOC_NOBUSY | 1552 VM_ALLOC_ZERO); 1553 m->valid = VM_PAGE_BITS_ALL; 1554 VM_OBJECT_UNLOCK(shared_page_obj); 1555} 1556 1557SYSINIT(shp, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)shared_page_init, 1558 NULL); 1559 1560void 1561exec_sysvec_init(void *param) 1562{ 1563 struct sysentvec *sv; 1564 1565 sv = (struct sysentvec *)param; 1566 1567 if ((sv->sv_flags & SV_SHP) == 0) 1568 return; 1569 sv->sv_shared_page_obj = shared_page_obj; 1570 sv->sv_sigcode_base = sv->sv_shared_page_base + 1571 shared_page_fill(*(sv->sv_szsigcode), 16, sv->sv_sigcode); 1572}
| 119 120SYSCTL_PROC(_kern, OID_AUTO, stackprot, CTLTYPE_INT|CTLFLAG_RD, 121 NULL, 0, sysctl_kern_stackprot, "I", ""); 122 123u_long ps_arg_cache_limit = PAGE_SIZE / 16; 124SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW, 125 &ps_arg_cache_limit, 0, ""); 126 127static int map_at_zero = 0; 128TUNABLE_INT("security.bsd.map_at_zero", &map_at_zero); 129SYSCTL_INT(_security_bsd, OID_AUTO, map_at_zero, CTLFLAG_RW, &map_at_zero, 0, 130 "Permit processes to map an object at virtual address 0."); 131 132static int 133sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS) 134{ 135 struct proc *p; 136 int error; 137 138 p = curproc; 139#ifdef SCTL_MASK32 140 if (req->flags & SCTL_MASK32) { 141 unsigned int val; 142 val = (unsigned int)p->p_sysent->sv_psstrings; 143 error = SYSCTL_OUT(req, &val, sizeof(val)); 144 } else 145#endif 146 error = SYSCTL_OUT(req, &p->p_sysent->sv_psstrings, 147 sizeof(p->p_sysent->sv_psstrings)); 148 return error; 149} 150 151static int 152sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS) 153{ 154 struct proc *p; 155 int error; 156 157 p = curproc; 158#ifdef SCTL_MASK32 159 if (req->flags & SCTL_MASK32) { 160 unsigned int val; 161 val = (unsigned int)p->p_sysent->sv_usrstack; 162 error = SYSCTL_OUT(req, &val, sizeof(val)); 163 } else 164#endif 165 error = SYSCTL_OUT(req, &p->p_sysent->sv_usrstack, 166 sizeof(p->p_sysent->sv_usrstack)); 167 return error; 168} 169 170static int 171sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS) 172{ 173 struct proc *p; 174 175 p = curproc; 176 return (SYSCTL_OUT(req, &p->p_sysent->sv_stackprot, 177 sizeof(p->p_sysent->sv_stackprot))); 178} 179 180/* 181 * Each of the items is a pointer to a `const struct execsw', hence the 182 * double pointer here. 183 */ 184static const struct execsw **execsw; 185 186#ifndef _SYS_SYSPROTO_H_ 187struct execve_args { 188 char *fname; 189 char **argv; 190 char **envv; 191}; 192#endif 193 194int 195execve(td, uap) 196 struct thread *td; 197 struct execve_args /* { 198 char *fname; 199 char **argv; 200 char **envv; 201 } */ *uap; 202{ 203 int error; 204 struct image_args args; 205 206 error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, 207 uap->argv, uap->envv); 208 if (error == 0) 209 error = kern_execve(td, &args, NULL); 210 return (error); 211} 212 213#ifndef _SYS_SYSPROTO_H_ 214struct fexecve_args { 215 int fd; 216 char **argv; 217 char **envv; 218} 219#endif 220int 221fexecve(struct thread *td, struct fexecve_args *uap) 222{ 223 int error; 224 struct image_args args; 225 226 error = exec_copyin_args(&args, NULL, UIO_SYSSPACE, 227 uap->argv, uap->envv); 228 if (error == 0) { 229 args.fd = uap->fd; 230 error = kern_execve(td, &args, NULL); 231 } 232 return (error); 233} 234 235#ifndef _SYS_SYSPROTO_H_ 236struct __mac_execve_args { 237 char *fname; 238 char **argv; 239 char **envv; 240 struct mac *mac_p; 241}; 242#endif 243 244int 245__mac_execve(td, uap) 246 struct thread *td; 247 struct __mac_execve_args /* { 248 char *fname; 249 char **argv; 250 char **envv; 251 struct mac *mac_p; 252 } */ *uap; 253{ 254#ifdef MAC 255 int error; 256 struct image_args args; 257 258 error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, 259 uap->argv, uap->envv); 260 if (error == 0) 261 error = kern_execve(td, &args, uap->mac_p); 262 return (error); 263#else 264 return (ENOSYS); 265#endif 266} 267 268/* 269 * XXX: kern_execve has the astonishing property of not always returning to 270 * the caller. If sufficiently bad things happen during the call to 271 * do_execve(), it can end up calling exit1(); as a result, callers must 272 * avoid doing anything which they might need to undo (e.g., allocating 273 * memory). 274 */ 275int 276kern_execve(td, args, mac_p) 277 struct thread *td; 278 struct image_args *args; 279 struct mac *mac_p; 280{ 281 struct proc *p = td->td_proc; 282 int error; 283 284 AUDIT_ARG_ARGV(args->begin_argv, args->argc, 285 args->begin_envv - args->begin_argv); 286 AUDIT_ARG_ENVV(args->begin_envv, args->envc, 287 args->endp - args->begin_envv); 288 if (p->p_flag & P_HADTHREADS) { 289 PROC_LOCK(p); 290 if (thread_single(SINGLE_BOUNDARY)) { 291 PROC_UNLOCK(p); 292 exec_free_args(args); 293 return (ERESTART); /* Try again later. */ 294 } 295 PROC_UNLOCK(p); 296 } 297 298 error = do_execve(td, args, mac_p); 299 300 if (p->p_flag & P_HADTHREADS) { 301 PROC_LOCK(p); 302 /* 303 * If success, we upgrade to SINGLE_EXIT state to 304 * force other threads to suicide. 305 */ 306 if (error == 0) 307 thread_single(SINGLE_EXIT); 308 else 309 thread_single_end(); 310 PROC_UNLOCK(p); 311 } 312 313 return (error); 314} 315 316/* 317 * In-kernel implementation of execve(). All arguments are assumed to be 318 * userspace pointers from the passed thread. 319 */ 320static int 321do_execve(td, args, mac_p) 322 struct thread *td; 323 struct image_args *args; 324 struct mac *mac_p; 325{ 326 struct proc *p = td->td_proc; 327 struct nameidata nd; 328 struct ucred *newcred = NULL, *oldcred; 329 struct uidinfo *euip; 330 register_t *stack_base; 331 int error, i; 332 struct image_params image_params, *imgp; 333 struct vattr attr; 334 int (*img_first)(struct image_params *); 335 struct pargs *oldargs = NULL, *newargs = NULL; 336 struct sigacts *oldsigacts, *newsigacts; 337#ifdef KTRACE 338 struct vnode *tracevp = NULL; 339 struct ucred *tracecred = NULL; 340#endif 341 struct vnode *textvp = NULL, *binvp = NULL; 342 int credential_changing; 343 int vfslocked; 344 int textset; 345#ifdef MAC 346 struct label *interpvplabel = NULL; 347 int will_transition; 348#endif 349#ifdef HWPMC_HOOKS 350 struct pmckern_procexec pe; 351#endif 352 static const char fexecv_proc_title[] = "(fexecv)"; 353 354 vfslocked = 0; 355 imgp = &image_params; 356 357 /* 358 * Lock the process and set the P_INEXEC flag to indicate that 359 * it should be left alone until we're done here. This is 360 * necessary to avoid race conditions - e.g. in ptrace() - 361 * that might allow a local user to illicitly obtain elevated 362 * privileges. 363 */ 364 PROC_LOCK(p); 365 KASSERT((p->p_flag & P_INEXEC) == 0, 366 ("%s(): process already has P_INEXEC flag", __func__)); 367 p->p_flag |= P_INEXEC; 368 PROC_UNLOCK(p); 369 370 /* 371 * Initialize part of the common data 372 */ 373 imgp->proc = p; 374 imgp->execlabel = NULL; 375 imgp->attr = &attr; 376 imgp->entry_addr = 0; 377 imgp->reloc_base = 0; 378 imgp->vmspace_destroyed = 0; 379 imgp->interpreted = 0; 380 imgp->opened = 0; 381 imgp->interpreter_name = NULL; 382 imgp->auxargs = NULL; 383 imgp->vp = NULL; 384 imgp->object = NULL; 385 imgp->firstpage = NULL; 386 imgp->ps_strings = 0; 387 imgp->auxarg_size = 0; 388 imgp->args = args; 389 imgp->execpath = imgp->freepath = NULL; 390 imgp->execpathp = 0; 391 imgp->canary = 0; 392 imgp->canarylen = 0; 393 imgp->pagesizes = 0; 394 imgp->pagesizeslen = 0; 395 imgp->stack_prot = 0; 396 397#ifdef MAC 398 error = mac_execve_enter(imgp, mac_p); 399 if (error) 400 goto exec_fail; 401#endif 402 403 imgp->image_header = NULL; 404 405 /* 406 * Translate the file name. namei() returns a vnode pointer 407 * in ni_vp amoung other things. 408 * 409 * XXXAUDIT: It would be desirable to also audit the name of the 410 * interpreter if this is an interpreted binary. 411 */ 412 if (args->fname != NULL) { 413 NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | FOLLOW | SAVENAME 414 | MPSAFE | AUDITVNODE1, UIO_SYSSPACE, args->fname, td); 415 } 416 417 SDT_PROBE(proc, kernel, , exec, args->fname, 0, 0, 0, 0 ); 418 419interpret: 420 if (args->fname != NULL) { 421#ifdef CAPABILITY_MODE 422 /* 423 * While capability mode can't reach this point via direct 424 * path arguments to execve(), we also don't allow 425 * interpreters to be used in capability mode (for now). 426 * Catch indirect lookups and return a permissions error. 427 */ 428 if (IN_CAPABILITY_MODE(td)) { 429 error = ECAPMODE; 430 goto exec_fail; 431 } 432#endif 433 error = namei(&nd); 434 if (error) 435 goto exec_fail; 436 437 vfslocked = NDHASGIANT(&nd); 438 binvp = nd.ni_vp; 439 imgp->vp = binvp; 440 } else { 441 AUDIT_ARG_FD(args->fd); 442 error = fgetvp(td, args->fd, &binvp); 443 if (error) 444 goto exec_fail; 445 vfslocked = VFS_LOCK_GIANT(binvp->v_mount); 446 vn_lock(binvp, LK_EXCLUSIVE | LK_RETRY); 447 AUDIT_ARG_VNODE1(binvp); 448 imgp->vp = binvp; 449 } 450 451 /* 452 * Check file permissions (also 'opens' file) 453 */ 454 error = exec_check_permissions(imgp); 455 if (error) 456 goto exec_fail_dealloc; 457 458 imgp->object = imgp->vp->v_object; 459 if (imgp->object != NULL) 460 vm_object_reference(imgp->object); 461 462 /* 463 * Set VV_TEXT now so no one can write to the executable while we're 464 * activating it. 465 * 466 * Remember if this was set before and unset it in case this is not 467 * actually an executable image. 468 */ 469 textset = imgp->vp->v_vflag & VV_TEXT; 470 imgp->vp->v_vflag |= VV_TEXT; 471 472 error = exec_map_first_page(imgp); 473 if (error) 474 goto exec_fail_dealloc; 475 476 imgp->proc->p_osrel = 0; 477 /* 478 * If the current process has a special image activator it 479 * wants to try first, call it. For example, emulating shell 480 * scripts differently. 481 */ 482 error = -1; 483 if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL) 484 error = img_first(imgp); 485 486 /* 487 * Loop through the list of image activators, calling each one. 488 * An activator returns -1 if there is no match, 0 on success, 489 * and an error otherwise. 490 */ 491 for (i = 0; error == -1 && execsw[i]; ++i) { 492 if (execsw[i]->ex_imgact == NULL || 493 execsw[i]->ex_imgact == img_first) { 494 continue; 495 } 496 error = (*execsw[i]->ex_imgact)(imgp); 497 } 498 499 if (error) { 500 if (error == -1) { 501 if (textset == 0) 502 imgp->vp->v_vflag &= ~VV_TEXT; 503 error = ENOEXEC; 504 } 505 goto exec_fail_dealloc; 506 } 507 508 /* 509 * Special interpreter operation, cleanup and loop up to try to 510 * activate the interpreter. 511 */ 512 if (imgp->interpreted) { 513 exec_unmap_first_page(imgp); 514 /* 515 * VV_TEXT needs to be unset for scripts. There is a short 516 * period before we determine that something is a script where 517 * VV_TEXT will be set. The vnode lock is held over this 518 * entire period so nothing should illegitimately be blocked. 519 */ 520 imgp->vp->v_vflag &= ~VV_TEXT; 521 /* free name buffer and old vnode */ 522 if (args->fname != NULL) 523 NDFREE(&nd, NDF_ONLY_PNBUF); 524#ifdef MAC 525 mac_execve_interpreter_enter(binvp, &interpvplabel); 526#endif 527 if (imgp->opened) { 528 VOP_CLOSE(binvp, FREAD, td->td_ucred, td); 529 imgp->opened = 0; 530 } 531 vput(binvp); 532 vm_object_deallocate(imgp->object); 533 imgp->object = NULL; 534 VFS_UNLOCK_GIANT(vfslocked); 535 vfslocked = 0; 536 /* set new name to that of the interpreter */ 537 NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME | MPSAFE, 538 UIO_SYSSPACE, imgp->interpreter_name, td); 539 args->fname = imgp->interpreter_name; 540 goto interpret; 541 } 542 543 /* 544 * NB: We unlock the vnode here because it is believed that none 545 * of the sv_copyout_strings/sv_fixup operations require the vnode. 546 */ 547 VOP_UNLOCK(imgp->vp, 0); 548 549 /* 550 * Do the best to calculate the full path to the image file. 551 */ 552 if (imgp->auxargs != NULL && 553 ((args->fname != NULL && args->fname[0] == '/') || 554 vn_fullpath(td, imgp->vp, &imgp->execpath, &imgp->freepath) != 0)) 555 imgp->execpath = args->fname; 556 557 /* 558 * Copy out strings (args and env) and initialize stack base 559 */ 560 if (p->p_sysent->sv_copyout_strings) 561 stack_base = (*p->p_sysent->sv_copyout_strings)(imgp); 562 else 563 stack_base = exec_copyout_strings(imgp); 564 565 /* 566 * If custom stack fixup routine present for this process 567 * let it do the stack setup. 568 * Else stuff argument count as first item on stack 569 */ 570 if (p->p_sysent->sv_fixup != NULL) 571 (*p->p_sysent->sv_fixup)(&stack_base, imgp); 572 else 573 suword(--stack_base, imgp->args->argc); 574 575 /* 576 * For security and other reasons, the file descriptor table cannot 577 * be shared after an exec. 578 */ 579 fdunshare(p, td); 580 581 /* 582 * Malloc things before we need locks. 583 */ 584 newcred = crget(); 585 euip = uifind(attr.va_uid); 586 i = imgp->args->begin_envv - imgp->args->begin_argv; 587 /* Cache arguments if they fit inside our allowance */ 588 if (ps_arg_cache_limit >= i + sizeof(struct pargs)) { 589 newargs = pargs_alloc(i); 590 bcopy(imgp->args->begin_argv, newargs->ar_args, i); 591 } 592 593 /* close files on exec */ 594 fdcloseexec(td); 595 vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); 596 597 /* Get a reference to the vnode prior to locking the proc */ 598 VREF(binvp); 599 600 /* 601 * For security and other reasons, signal handlers cannot 602 * be shared after an exec. The new process gets a copy of the old 603 * handlers. In execsigs(), the new process will have its signals 604 * reset. 605 */ 606 PROC_LOCK(p); 607 oldcred = crcopysafe(p, newcred); 608 if (sigacts_shared(p->p_sigacts)) { 609 oldsigacts = p->p_sigacts; 610 PROC_UNLOCK(p); 611 newsigacts = sigacts_alloc(); 612 sigacts_copy(newsigacts, oldsigacts); 613 PROC_LOCK(p); 614 p->p_sigacts = newsigacts; 615 } else 616 oldsigacts = NULL; 617 618 /* Stop profiling */ 619 stopprofclock(p); 620 621 /* reset caught signals */ 622 execsigs(p); 623 624 /* name this process - nameiexec(p, ndp) */ 625 bzero(p->p_comm, sizeof(p->p_comm)); 626 if (args->fname) 627 bcopy(nd.ni_cnd.cn_nameptr, p->p_comm, 628 min(nd.ni_cnd.cn_namelen, MAXCOMLEN)); 629 else if (vn_commname(binvp, p->p_comm, sizeof(p->p_comm)) != 0) 630 bcopy(fexecv_proc_title, p->p_comm, sizeof(fexecv_proc_title)); 631 bcopy(p->p_comm, td->td_name, sizeof(td->td_name)); 632 633 /* 634 * mark as execed, wakeup the process that vforked (if any) and tell 635 * it that it now has its own resources back 636 */ 637 p->p_flag |= P_EXEC; 638 if (p->p_pptr && (p->p_flag & P_PPWAIT)) { 639 p->p_flag &= ~P_PPWAIT; 640 cv_broadcast(&p->p_pwait); 641 } 642 643 /* 644 * Implement image setuid/setgid. 645 * 646 * Don't honor setuid/setgid if the filesystem prohibits it or if 647 * the process is being traced. 648 * 649 * We disable setuid/setgid/etc in compatibility mode on the basis 650 * that most setugid applications are not written with that 651 * environment in mind, and will therefore almost certainly operate 652 * incorrectly. In principle there's no reason that setugid 653 * applications might not be useful in capability mode, so we may want 654 * to reconsider this conservative design choice in the future. 655 * 656 * XXXMAC: For the time being, use NOSUID to also prohibit 657 * transitions on the file system. 658 */ 659 credential_changing = 0; 660 credential_changing |= (attr.va_mode & S_ISUID) && oldcred->cr_uid != 661 attr.va_uid; 662 credential_changing |= (attr.va_mode & S_ISGID) && oldcred->cr_gid != 663 attr.va_gid; 664#ifdef MAC 665 will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp, 666 interpvplabel, imgp); 667 credential_changing |= will_transition; 668#endif 669 670 if (credential_changing && 671#ifdef CAPABILITY_MODE 672 ((oldcred->cr_flags & CRED_FLAG_CAPMODE) == 0) && 673#endif 674 (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 && 675 (p->p_flag & P_TRACED) == 0) { 676 /* 677 * Turn off syscall tracing for set-id programs, except for 678 * root. Record any set-id flags first to make sure that 679 * we do not regain any tracing during a possible block. 680 */ 681 setsugid(p); 682 683#ifdef KTRACE 684 if (priv_check_cred(oldcred, PRIV_DEBUG_DIFFCRED, 0)) 685 ktrprocexec(p, &tracecred, &tracevp); 686#endif 687 /* 688 * Close any file descriptors 0..2 that reference procfs, 689 * then make sure file descriptors 0..2 are in use. 690 * 691 * setugidsafety() may call closef() and then pfind() 692 * which may grab the process lock. 693 * fdcheckstd() may call falloc() which may block to 694 * allocate memory, so temporarily drop the process lock. 695 */ 696 PROC_UNLOCK(p); 697 VOP_UNLOCK(imgp->vp, 0); 698 setugidsafety(td); 699 error = fdcheckstd(td); 700 vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); 701 if (error != 0) 702 goto done1; 703 PROC_LOCK(p); 704 /* 705 * Set the new credentials. 706 */ 707 if (attr.va_mode & S_ISUID) 708 change_euid(newcred, euip); 709 if (attr.va_mode & S_ISGID) 710 change_egid(newcred, attr.va_gid); 711#ifdef MAC 712 if (will_transition) { 713 mac_vnode_execve_transition(oldcred, newcred, imgp->vp, 714 interpvplabel, imgp); 715 } 716#endif 717 /* 718 * Implement correct POSIX saved-id behavior. 719 * 720 * XXXMAC: Note that the current logic will save the 721 * uid and gid if a MAC domain transition occurs, even 722 * though maybe it shouldn't. 723 */ 724 change_svuid(newcred, newcred->cr_uid); 725 change_svgid(newcred, newcred->cr_gid); 726 p->p_ucred = newcred; 727 newcred = NULL; 728 } else { 729 if (oldcred->cr_uid == oldcred->cr_ruid && 730 oldcred->cr_gid == oldcred->cr_rgid) 731 p->p_flag &= ~P_SUGID; 732 /* 733 * Implement correct POSIX saved-id behavior. 734 * 735 * XXX: It's not clear that the existing behavior is 736 * POSIX-compliant. A number of sources indicate that the 737 * saved uid/gid should only be updated if the new ruid is 738 * not equal to the old ruid, or the new euid is not equal 739 * to the old euid and the new euid is not equal to the old 740 * ruid. The FreeBSD code always updates the saved uid/gid. 741 * Also, this code uses the new (replaced) euid and egid as 742 * the source, which may or may not be the right ones to use. 743 */ 744 if (oldcred->cr_svuid != oldcred->cr_uid || 745 oldcred->cr_svgid != oldcred->cr_gid) { 746 change_svuid(newcred, newcred->cr_uid); 747 change_svgid(newcred, newcred->cr_gid); 748 p->p_ucred = newcred; 749 newcred = NULL; 750 } 751 } 752 753 /* 754 * Store the vp for use in procfs. This vnode was referenced prior 755 * to locking the proc lock. 756 */ 757 textvp = p->p_textvp; 758 p->p_textvp = binvp; 759 760#ifdef KDTRACE_HOOKS 761 /* 762 * Tell the DTrace fasttrap provider about the exec if it 763 * has declared an interest. 764 */ 765 if (dtrace_fasttrap_exec) 766 dtrace_fasttrap_exec(p); 767#endif 768 769 /* 770 * Notify others that we exec'd, and clear the P_INEXEC flag 771 * as we're now a bona fide freshly-execed process. 772 */ 773 KNOTE_LOCKED(&p->p_klist, NOTE_EXEC); 774 p->p_flag &= ~P_INEXEC; 775 776 /* 777 * If tracing the process, trap to the debugger so that 778 * breakpoints can be set before the program executes. We 779 * have to use tdsignal() to deliver the signal to the current 780 * thread since any other threads in this process will exit if 781 * execve() succeeds. 782 */ 783 if (p->p_flag & P_TRACED) 784 tdsignal(td, SIGTRAP); 785 786 /* clear "fork but no exec" flag, as we _are_ execing */ 787 p->p_acflag &= ~AFORK; 788 789 /* 790 * Free any previous argument cache and replace it with 791 * the new argument cache, if any. 792 */ 793 oldargs = p->p_args; 794 p->p_args = newargs; 795 newargs = NULL; 796 797#ifdef HWPMC_HOOKS 798 /* 799 * Check if system-wide sampling is in effect or if the 800 * current process is using PMCs. If so, do exec() time 801 * processing. This processing needs to happen AFTER the 802 * P_INEXEC flag is cleared. 803 * 804 * The proc lock needs to be released before taking the PMC 805 * SX. 806 */ 807 if (PMC_SYSTEM_SAMPLING_ACTIVE() || PMC_PROC_IS_USING_PMCS(p)) { 808 PROC_UNLOCK(p); 809 VOP_UNLOCK(imgp->vp, 0); 810 pe.pm_credentialschanged = credential_changing; 811 pe.pm_entryaddr = imgp->entry_addr; 812 813 PMC_CALL_HOOK_X(td, PMC_FN_PROCESS_EXEC, (void *) &pe); 814 vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); 815 } else 816 PROC_UNLOCK(p); 817#else /* !HWPMC_HOOKS */ 818 PROC_UNLOCK(p); 819#endif 820 821 /* Set values passed into the program in registers. */ 822 if (p->p_sysent->sv_setregs) 823 (*p->p_sysent->sv_setregs)(td, imgp, 824 (u_long)(uintptr_t)stack_base); 825 else 826 exec_setregs(td, imgp, (u_long)(uintptr_t)stack_base); 827 828 vfs_mark_atime(imgp->vp, td->td_ucred); 829 830 SDT_PROBE(proc, kernel, , exec_success, args->fname, 0, 0, 0, 0); 831 832done1: 833 /* 834 * Free any resources malloc'd earlier that we didn't use. 835 */ 836 uifree(euip); 837 if (newcred == NULL) 838 crfree(oldcred); 839 else 840 crfree(newcred); 841 VOP_UNLOCK(imgp->vp, 0); 842 843 /* 844 * Handle deferred decrement of ref counts. 845 */ 846 if (textvp != NULL) { 847 int tvfslocked; 848 849 tvfslocked = VFS_LOCK_GIANT(textvp->v_mount); 850 vrele(textvp); 851 VFS_UNLOCK_GIANT(tvfslocked); 852 } 853 if (binvp && error != 0) 854 vrele(binvp); 855#ifdef KTRACE 856 if (tracevp != NULL) { 857 int tvfslocked; 858 859 tvfslocked = VFS_LOCK_GIANT(tracevp->v_mount); 860 vrele(tracevp); 861 VFS_UNLOCK_GIANT(tvfslocked); 862 } 863 if (tracecred != NULL) 864 crfree(tracecred); 865#endif 866 vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); 867 pargs_drop(oldargs); 868 pargs_drop(newargs); 869 if (oldsigacts != NULL) 870 sigacts_free(oldsigacts); 871 872exec_fail_dealloc: 873 874 /* 875 * free various allocated resources 876 */ 877 if (imgp->firstpage != NULL) 878 exec_unmap_first_page(imgp); 879 880 if (imgp->vp != NULL) { 881 if (args->fname) 882 NDFREE(&nd, NDF_ONLY_PNBUF); 883 if (imgp->opened) 884 VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td); 885 vput(imgp->vp); 886 } 887 888 if (imgp->object != NULL) 889 vm_object_deallocate(imgp->object); 890 891 free(imgp->freepath, M_TEMP); 892 893 if (error == 0) { 894 PROC_LOCK(p); 895 td->td_dbgflags |= TDB_EXEC; 896 PROC_UNLOCK(p); 897 898 /* 899 * Stop the process here if its stop event mask has 900 * the S_EXEC bit set. 901 */ 902 STOPEVENT(p, S_EXEC, 0); 903 goto done2; 904 } 905 906exec_fail: 907 /* we're done here, clear P_INEXEC */ 908 PROC_LOCK(p); 909 p->p_flag &= ~P_INEXEC; 910 PROC_UNLOCK(p); 911 912 SDT_PROBE(proc, kernel, , exec_failure, error, 0, 0, 0, 0); 913 914done2: 915#ifdef MAC 916 mac_execve_exit(imgp); 917 mac_execve_interpreter_exit(interpvplabel); 918#endif 919 VFS_UNLOCK_GIANT(vfslocked); 920 exec_free_args(args); 921 922 if (error && imgp->vmspace_destroyed) { 923 /* sorry, no more process anymore. exit gracefully */ 924 exit1(td, W_EXITCODE(0, SIGABRT)); 925 /* NOT REACHED */ 926 } 927 928#ifdef KTRACE 929 if (error == 0) 930 ktrprocctor(p); 931#endif 932 933 return (error); 934} 935 936int 937exec_map_first_page(imgp) 938 struct image_params *imgp; 939{ 940 int rv, i; 941 int initial_pagein; 942 vm_page_t ma[VM_INITIAL_PAGEIN]; 943 vm_object_t object; 944 945 if (imgp->firstpage != NULL) 946 exec_unmap_first_page(imgp); 947 948 object = imgp->vp->v_object; 949 if (object == NULL) 950 return (EACCES); 951 VM_OBJECT_LOCK(object); 952#if VM_NRESERVLEVEL > 0 953 if ((object->flags & OBJ_COLORED) == 0) { 954 object->flags |= OBJ_COLORED; 955 object->pg_color = 0; 956 } 957#endif 958 ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 959 if (ma[0]->valid != VM_PAGE_BITS_ALL) { 960 initial_pagein = VM_INITIAL_PAGEIN; 961 if (initial_pagein > object->size) 962 initial_pagein = object->size; 963 for (i = 1; i < initial_pagein; i++) { 964 if ((ma[i] = vm_page_next(ma[i - 1])) != NULL) { 965 if (ma[i]->valid) 966 break; 967 if ((ma[i]->oflags & VPO_BUSY) || ma[i]->busy) 968 break; 969 vm_page_busy(ma[i]); 970 } else { 971 ma[i] = vm_page_alloc(object, i, 972 VM_ALLOC_NORMAL | VM_ALLOC_IFNOTCACHED); 973 if (ma[i] == NULL) 974 break; 975 } 976 } 977 initial_pagein = i; 978 rv = vm_pager_get_pages(object, ma, initial_pagein, 0); 979 ma[0] = vm_page_lookup(object, 0); 980 if ((rv != VM_PAGER_OK) || (ma[0] == NULL)) { 981 if (ma[0] != NULL) { 982 vm_page_lock(ma[0]); 983 vm_page_free(ma[0]); 984 vm_page_unlock(ma[0]); 985 } 986 VM_OBJECT_UNLOCK(object); 987 return (EIO); 988 } 989 } 990 vm_page_lock(ma[0]); 991 vm_page_hold(ma[0]); 992 vm_page_unlock(ma[0]); 993 vm_page_wakeup(ma[0]); 994 VM_OBJECT_UNLOCK(object); 995 996 imgp->firstpage = sf_buf_alloc(ma[0], 0); 997 imgp->image_header = (char *)sf_buf_kva(imgp->firstpage); 998 999 return (0); 1000} 1001 1002void 1003exec_unmap_first_page(imgp) 1004 struct image_params *imgp; 1005{ 1006 vm_page_t m; 1007 1008 if (imgp->firstpage != NULL) { 1009 m = sf_buf_page(imgp->firstpage); 1010 sf_buf_free(imgp->firstpage); 1011 imgp->firstpage = NULL; 1012 vm_page_lock(m); 1013 vm_page_unhold(m); 1014 vm_page_unlock(m); 1015 } 1016} 1017 1018/* 1019 * Destroy old address space, and allocate a new stack 1020 * The new stack is only SGROWSIZ large because it is grown 1021 * automatically in trap.c. 1022 */ 1023int 1024exec_new_vmspace(imgp, sv) 1025 struct image_params *imgp; 1026 struct sysentvec *sv; 1027{ 1028 int error; 1029 struct proc *p = imgp->proc; 1030 struct vmspace *vmspace = p->p_vmspace; 1031 vm_object_t obj; 1032 vm_offset_t sv_minuser, stack_addr; 1033 vm_map_t map; 1034 u_long ssiz; 1035 1036 imgp->vmspace_destroyed = 1; 1037 imgp->sysent = sv; 1038 1039 /* May be called with Giant held */ 1040 EVENTHANDLER_INVOKE(process_exec, p, imgp); 1041 1042 /* 1043 * Blow away entire process VM, if address space not shared, 1044 * otherwise, create a new VM space so that other threads are 1045 * not disrupted 1046 */ 1047 map = &vmspace->vm_map; 1048 if (map_at_zero) 1049 sv_minuser = sv->sv_minuser; 1050 else 1051 sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE); 1052 if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv_minuser && 1053 vm_map_max(map) == sv->sv_maxuser) { 1054 shmexit(vmspace); 1055 pmap_remove_pages(vmspace_pmap(vmspace)); 1056 vm_map_remove(map, vm_map_min(map), vm_map_max(map)); 1057 } else { 1058 error = vmspace_exec(p, sv_minuser, sv->sv_maxuser); 1059 if (error) 1060 return (error); 1061 vmspace = p->p_vmspace; 1062 map = &vmspace->vm_map; 1063 } 1064 1065 /* Map a shared page */ 1066 obj = sv->sv_shared_page_obj; 1067 if (obj != NULL) { 1068 vm_object_reference(obj); 1069 error = vm_map_fixed(map, obj, 0, 1070 sv->sv_shared_page_base, sv->sv_shared_page_len, 1071 VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_ALL, 1072 MAP_COPY_ON_WRITE | MAP_ACC_NO_CHARGE); 1073 if (error) { 1074 vm_object_deallocate(obj); 1075 return (error); 1076 } 1077 } 1078 1079 /* Allocate a new stack */ 1080 if (sv->sv_maxssiz != NULL) 1081 ssiz = *sv->sv_maxssiz; 1082 else 1083 ssiz = maxssiz; 1084 stack_addr = sv->sv_usrstack - ssiz; 1085 error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz, 1086 obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot : 1087 sv->sv_stackprot, 1088 VM_PROT_ALL, MAP_STACK_GROWS_DOWN); 1089 if (error) 1090 return (error); 1091 1092#ifdef __ia64__ 1093 /* Allocate a new register stack */ 1094 stack_addr = IA64_BACKINGSTORE; 1095 error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz, 1096 sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_UP); 1097 if (error) 1098 return (error); 1099#endif 1100 1101 /* vm_ssize and vm_maxsaddr are somewhat antiquated concepts in the 1102 * VM_STACK case, but they are still used to monitor the size of the 1103 * process stack so we can check the stack rlimit. 1104 */ 1105 vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT; 1106 vmspace->vm_maxsaddr = (char *)sv->sv_usrstack - ssiz; 1107 1108 return (0); 1109} 1110 1111/* 1112 * Copy out argument and environment strings from the old process address 1113 * space into the temporary string buffer. 1114 */ 1115int 1116exec_copyin_args(struct image_args *args, char *fname, 1117 enum uio_seg segflg, char **argv, char **envv) 1118{ 1119 char *argp, *envp; 1120 int error; 1121 size_t length; 1122 1123 bzero(args, sizeof(*args)); 1124 if (argv == NULL) 1125 return (EFAULT); 1126 1127 /* 1128 * Allocate demand-paged memory for the file name, argument, and 1129 * environment strings. 1130 */ 1131 error = exec_alloc_args(args); 1132 if (error != 0) 1133 return (error); 1134 1135 /* 1136 * Copy the file name. 1137 */ 1138 if (fname != NULL) { 1139 args->fname = args->buf; 1140 error = (segflg == UIO_SYSSPACE) ? 1141 copystr(fname, args->fname, PATH_MAX, &length) : 1142 copyinstr(fname, args->fname, PATH_MAX, &length); 1143 if (error != 0) 1144 goto err_exit; 1145 } else 1146 length = 0; 1147 1148 args->begin_argv = args->buf + length; 1149 args->endp = args->begin_argv; 1150 args->stringspace = ARG_MAX; 1151 1152 /* 1153 * extract arguments first 1154 */ 1155 while ((argp = (caddr_t) (intptr_t) fuword(argv++))) { 1156 if (argp == (caddr_t) -1) { 1157 error = EFAULT; 1158 goto err_exit; 1159 } 1160 if ((error = copyinstr(argp, args->endp, 1161 args->stringspace, &length))) { 1162 if (error == ENAMETOOLONG) 1163 error = E2BIG; 1164 goto err_exit; 1165 } 1166 args->stringspace -= length; 1167 args->endp += length; 1168 args->argc++; 1169 } 1170 1171 args->begin_envv = args->endp; 1172 1173 /* 1174 * extract environment strings 1175 */ 1176 if (envv) { 1177 while ((envp = (caddr_t)(intptr_t)fuword(envv++))) { 1178 if (envp == (caddr_t)-1) { 1179 error = EFAULT; 1180 goto err_exit; 1181 } 1182 if ((error = copyinstr(envp, args->endp, 1183 args->stringspace, &length))) { 1184 if (error == ENAMETOOLONG) 1185 error = E2BIG; 1186 goto err_exit; 1187 } 1188 args->stringspace -= length; 1189 args->endp += length; 1190 args->envc++; 1191 } 1192 } 1193 1194 return (0); 1195 1196err_exit: 1197 exec_free_args(args); 1198 return (error); 1199} 1200 1201/* 1202 * Allocate temporary demand-paged, zero-filled memory for the file name, 1203 * argument, and environment strings. Returns zero if the allocation succeeds 1204 * and ENOMEM otherwise. 1205 */ 1206int 1207exec_alloc_args(struct image_args *args) 1208{ 1209 1210 args->buf = (char *)kmem_alloc_wait(exec_map, PATH_MAX + ARG_MAX); 1211 return (args->buf != NULL ? 0 : ENOMEM); 1212} 1213 1214void 1215exec_free_args(struct image_args *args) 1216{ 1217 1218 if (args->buf != NULL) { 1219 kmem_free_wakeup(exec_map, (vm_offset_t)args->buf, 1220 PATH_MAX + ARG_MAX); 1221 args->buf = NULL; 1222 } 1223 if (args->fname_buf != NULL) { 1224 free(args->fname_buf, M_TEMP); 1225 args->fname_buf = NULL; 1226 } 1227} 1228 1229/* 1230 * Copy strings out to the new process address space, constructing new arg 1231 * and env vector tables. Return a pointer to the base so that it can be used 1232 * as the initial stack pointer. 1233 */ 1234register_t * 1235exec_copyout_strings(imgp) 1236 struct image_params *imgp; 1237{ 1238 int argc, envc; 1239 char **vectp; 1240 char *stringp, *destp; 1241 register_t *stack_base; 1242 struct ps_strings *arginfo; 1243 struct proc *p; 1244 size_t execpath_len; 1245 int szsigcode, szps; 1246 char canary[sizeof(long) * 8]; 1247 1248 szps = sizeof(pagesizes[0]) * MAXPAGESIZES; 1249 /* 1250 * Calculate string base and vector table pointers. 1251 * Also deal with signal trampoline code for this exec type. 1252 */ 1253 if (imgp->execpath != NULL && imgp->auxargs != NULL) 1254 execpath_len = strlen(imgp->execpath) + 1; 1255 else 1256 execpath_len = 0; 1257 p = imgp->proc; 1258 szsigcode = 0; 1259 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; 1260 if (p->p_sysent->sv_sigcode_base == 0) { 1261 if (p->p_sysent->sv_szsigcode != NULL) 1262 szsigcode = *(p->p_sysent->sv_szsigcode); 1263 } 1264 destp = (caddr_t)arginfo - szsigcode - SPARE_USRSPACE - 1265 roundup(execpath_len, sizeof(char *)) - 1266 roundup(sizeof(canary), sizeof(char *)) - 1267 roundup(szps, sizeof(char *)) - 1268 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); 1269 1270 /* 1271 * install sigcode 1272 */ 1273 if (szsigcode != 0) 1274 copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo - 1275 szsigcode), szsigcode); 1276 1277 /* 1278 * Copy the image path for the rtld. 1279 */ 1280 if (execpath_len != 0) { 1281 imgp->execpathp = (uintptr_t)arginfo - szsigcode - execpath_len; 1282 copyout(imgp->execpath, (void *)imgp->execpathp, 1283 execpath_len); 1284 } 1285 1286 /* 1287 * Prepare the canary for SSP. 1288 */ 1289 arc4rand(canary, sizeof(canary), 0); 1290 imgp->canary = (uintptr_t)arginfo - szsigcode - execpath_len - 1291 sizeof(canary); 1292 copyout(canary, (void *)imgp->canary, sizeof(canary)); 1293 imgp->canarylen = sizeof(canary); 1294 1295 /* 1296 * Prepare the pagesizes array. 1297 */ 1298 imgp->pagesizes = (uintptr_t)arginfo - szsigcode - execpath_len - 1299 roundup(sizeof(canary), sizeof(char *)) - szps; 1300 copyout(pagesizes, (void *)imgp->pagesizes, szps); 1301 imgp->pagesizeslen = szps; 1302 1303 /* 1304 * If we have a valid auxargs ptr, prepare some room 1305 * on the stack. 1306 */ 1307 if (imgp->auxargs) { 1308 /* 1309 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 1310 * lower compatibility. 1311 */ 1312 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : 1313 (AT_COUNT * 2); 1314 /* 1315 * The '+ 2' is for the null pointers at the end of each of 1316 * the arg and env vector sets,and imgp->auxarg_size is room 1317 * for argument of Runtime loader. 1318 */ 1319 vectp = (char **)(destp - (imgp->args->argc + 1320 imgp->args->envc + 2 + imgp->auxarg_size) 1321 * sizeof(char *)); 1322 } else { 1323 /* 1324 * The '+ 2' is for the null pointers at the end of each of 1325 * the arg and env vector sets 1326 */ 1327 vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) * 1328 sizeof(char *)); 1329 } 1330 1331 /* 1332 * vectp also becomes our initial stack base 1333 */ 1334 stack_base = (register_t *)vectp; 1335 1336 stringp = imgp->args->begin_argv; 1337 argc = imgp->args->argc; 1338 envc = imgp->args->envc; 1339 1340 /* 1341 * Copy out strings - arguments and environment. 1342 */ 1343 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 1344 1345 /* 1346 * Fill in "ps_strings" struct for ps, w, etc. 1347 */ 1348 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp); 1349 suword32(&arginfo->ps_nargvstr, argc); 1350 1351 /* 1352 * Fill in argument portion of vector table. 1353 */ 1354 for (; argc > 0; --argc) { 1355 suword(vectp++, (long)(intptr_t)destp); 1356 while (*stringp++ != 0) 1357 destp++; 1358 destp++; 1359 } 1360 1361 /* a null vector table pointer separates the argp's from the envp's */ 1362 suword(vectp++, 0); 1363 1364 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp); 1365 suword32(&arginfo->ps_nenvstr, envc); 1366 1367 /* 1368 * Fill in environment portion of vector table. 1369 */ 1370 for (; envc > 0; --envc) { 1371 suword(vectp++, (long)(intptr_t)destp); 1372 while (*stringp++ != 0) 1373 destp++; 1374 destp++; 1375 } 1376 1377 /* end of vector table is a null pointer */ 1378 suword(vectp, 0); 1379 1380 return (stack_base); 1381} 1382 1383/* 1384 * Check permissions of file to execute. 1385 * Called with imgp->vp locked. 1386 * Return 0 for success or error code on failure. 1387 */ 1388int 1389exec_check_permissions(imgp) 1390 struct image_params *imgp; 1391{ 1392 struct vnode *vp = imgp->vp; 1393 struct vattr *attr = imgp->attr; 1394 struct thread *td; 1395 int error; 1396 1397 td = curthread; 1398 1399 /* Get file attributes */ 1400 error = VOP_GETATTR(vp, attr, td->td_ucred); 1401 if (error) 1402 return (error); 1403 1404#ifdef MAC 1405 error = mac_vnode_check_exec(td->td_ucred, imgp->vp, imgp); 1406 if (error) 1407 return (error); 1408#endif 1409 1410 /* 1411 * 1) Check if file execution is disabled for the filesystem that 1412 * this file resides on. 1413 * 2) Ensure that at least one execute bit is on. Otherwise, a 1414 * privileged user will always succeed, and we don't want this 1415 * to happen unless the file really is executable. 1416 * 3) Ensure that the file is a regular file. 1417 */ 1418 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 1419 (attr->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0 || 1420 (attr->va_type != VREG)) 1421 return (EACCES); 1422 1423 /* 1424 * Zero length files can't be exec'd 1425 */ 1426 if (attr->va_size == 0) 1427 return (ENOEXEC); 1428 1429 /* 1430 * Check for execute permission to file based on current credentials. 1431 */ 1432 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 1433 if (error) 1434 return (error); 1435 1436 /* 1437 * Check number of open-for-writes on the file and deny execution 1438 * if there are any. 1439 */ 1440 if (vp->v_writecount) 1441 return (ETXTBSY); 1442 1443 /* 1444 * Call filesystem specific open routine (which does nothing in the 1445 * general case). 1446 */ 1447 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 1448 if (error == 0) 1449 imgp->opened = 1; 1450 return (error); 1451} 1452 1453/* 1454 * Exec handler registration 1455 */ 1456int 1457exec_register(execsw_arg) 1458 const struct execsw *execsw_arg; 1459{ 1460 const struct execsw **es, **xs, **newexecsw; 1461 int count = 2; /* New slot and trailing NULL */ 1462 1463 if (execsw) 1464 for (es = execsw; *es; es++) 1465 count++; 1466 newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); 1467 if (newexecsw == NULL) 1468 return (ENOMEM); 1469 xs = newexecsw; 1470 if (execsw) 1471 for (es = execsw; *es; es++) 1472 *xs++ = *es; 1473 *xs++ = execsw_arg; 1474 *xs = NULL; 1475 if (execsw) 1476 free(execsw, M_TEMP); 1477 execsw = newexecsw; 1478 return (0); 1479} 1480 1481int 1482exec_unregister(execsw_arg) 1483 const struct execsw *execsw_arg; 1484{ 1485 const struct execsw **es, **xs, **newexecsw; 1486 int count = 1; 1487 1488 if (execsw == NULL) 1489 panic("unregister with no handlers left?\n"); 1490 1491 for (es = execsw; *es; es++) { 1492 if (*es == execsw_arg) 1493 break; 1494 } 1495 if (*es == NULL) 1496 return (ENOENT); 1497 for (es = execsw; *es; es++) 1498 if (*es != execsw_arg) 1499 count++; 1500 newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); 1501 if (newexecsw == NULL) 1502 return (ENOMEM); 1503 xs = newexecsw; 1504 for (es = execsw; *es; es++) 1505 if (*es != execsw_arg) 1506 *xs++ = *es; 1507 *xs = NULL; 1508 if (execsw) 1509 free(execsw, M_TEMP); 1510 execsw = newexecsw; 1511 return (0); 1512} 1513 1514static vm_object_t shared_page_obj; 1515static int shared_page_free; 1516 1517int 1518shared_page_fill(int size, int align, const char *data) 1519{ 1520 vm_page_t m; 1521 struct sf_buf *s; 1522 vm_offset_t sk; 1523 int res; 1524 1525 VM_OBJECT_LOCK(shared_page_obj); 1526 m = vm_page_grab(shared_page_obj, 0, VM_ALLOC_RETRY); 1527 res = roundup(shared_page_free, align); 1528 if (res + size >= IDX_TO_OFF(shared_page_obj->size)) 1529 res = -1; 1530 else { 1531 VM_OBJECT_UNLOCK(shared_page_obj); 1532 s = sf_buf_alloc(m, SFB_DEFAULT); 1533 sk = sf_buf_kva(s); 1534 bcopy(data, (void *)(sk + res), size); 1535 shared_page_free = res + size; 1536 sf_buf_free(s); 1537 VM_OBJECT_LOCK(shared_page_obj); 1538 } 1539 vm_page_wakeup(m); 1540 VM_OBJECT_UNLOCK(shared_page_obj); 1541 return (res); 1542} 1543 1544static void 1545shared_page_init(void *dummy __unused) 1546{ 1547 vm_page_t m; 1548 1549 shared_page_obj = vm_pager_allocate(OBJT_PHYS, 0, PAGE_SIZE, 1550 VM_PROT_DEFAULT, 0, NULL); 1551 VM_OBJECT_LOCK(shared_page_obj); 1552 m = vm_page_grab(shared_page_obj, 0, VM_ALLOC_RETRY | VM_ALLOC_NOBUSY | 1553 VM_ALLOC_ZERO); 1554 m->valid = VM_PAGE_BITS_ALL; 1555 VM_OBJECT_UNLOCK(shared_page_obj); 1556} 1557 1558SYSINIT(shp, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)shared_page_init, 1559 NULL); 1560 1561void 1562exec_sysvec_init(void *param) 1563{ 1564 struct sysentvec *sv; 1565 1566 sv = (struct sysentvec *)param; 1567 1568 if ((sv->sv_flags & SV_SHP) == 0) 1569 return; 1570 sv->sv_shared_page_obj = shared_page_obj; 1571 sv->sv_sigcode_base = sv->sv_shared_page_base + 1572 shared_page_fill(*(sv->sv_szsigcode), 16, sv->sv_sigcode); 1573}
|