kern_sharedpage.c revision 100384
1/* 2 * Copyright (c) 1993, David Greenman 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: head/sys/kern/kern_exec.c 100384 2002-07-20 02:56:12Z peter $ 27 */ 28 29#include "opt_ktrace.h" 30 31#include <sys/param.h> 32#include <sys/systm.h> 33#include <sys/lock.h> 34#include <sys/mutex.h> 35#include <sys/sysproto.h> 36#include <sys/signalvar.h> 37#include <sys/kernel.h> 38#include <sys/mount.h> 39#include <sys/filedesc.h> 40#include <sys/fcntl.h> 41#include <sys/acct.h> 42#include <sys/exec.h> 43#include <sys/imgact.h> 44#include <sys/imgact_elf.h> 45#include <sys/wait.h> 46#include <sys/malloc.h> 47#include <sys/proc.h> 48#include <sys/pioctl.h> 49#include <sys/namei.h> 50#include <sys/sysent.h> 51#include <sys/shm.h> 52#include <sys/sysctl.h> 53#include <sys/user.h> 54#include <sys/vnode.h> 55#ifdef KTRACE 56#include <sys/ktrace.h> 57#endif 58 59#include <vm/vm.h> 60#include <vm/vm_param.h> 61#include <vm/pmap.h> 62#include <vm/vm_page.h> 63#include <vm/vm_map.h> 64#include <vm/vm_kern.h> 65#include <vm/vm_extern.h> 66#include <vm/vm_object.h> 67#include <vm/vm_pager.h> 68 69#include <machine/reg.h> 70 71MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments"); 72 73static MALLOC_DEFINE(M_ATEXEC, "atexec", "atexec callback"); 74 75/* 76 * callout list for things to do at exec time 77 */ 78struct execlist { 79 execlist_fn function; 80 TAILQ_ENTRY(execlist) next; 81}; 82 83TAILQ_HEAD(exec_list_head, execlist); 84static struct exec_list_head exec_list = TAILQ_HEAD_INITIALIZER(exec_list); 85 86static register_t *exec_copyout_strings(struct image_params *); 87 88/* XXX This should be vm_size_t. */ 89static u_long ps_strings = PS_STRINGS; 90SYSCTL_ULONG(_kern, KERN_PS_STRINGS, ps_strings, CTLFLAG_RD, &ps_strings, 0, ""); 91 92/* XXX This should be vm_size_t. */ 93static u_long usrstack = USRSTACK; 94SYSCTL_ULONG(_kern, KERN_USRSTACK, usrstack, CTLFLAG_RD, &usrstack, 0, ""); 95 96u_long ps_arg_cache_limit = PAGE_SIZE / 16; 97SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW, 98 &ps_arg_cache_limit, 0, ""); 99 100int ps_argsopen = 1; 101SYSCTL_INT(_kern, OID_AUTO, ps_argsopen, CTLFLAG_RW, &ps_argsopen, 0, ""); 102 103#ifdef __ia64__ 104/* XXX HACK */ 105static int regstkpages = 256; 106SYSCTL_INT(_machdep, OID_AUTO, regstkpages, CTLFLAG_RW, ®stkpages, 0, ""); 107#endif 108 109/* 110 * Each of the items is a pointer to a `const struct execsw', hence the 111 * double pointer here. 112 */ 113static const struct execsw **execsw; 114 115#ifndef _SYS_SYSPROTO_H_ 116struct execve_args { 117 char *fname; 118 char **argv; 119 char **envv; 120}; 121#endif 122 123/* 124 * execve() system call. 125 * 126 * MPSAFE 127 */ 128int 129execve(td, uap) 130 struct thread *td; 131 register struct execve_args *uap; 132{ 133 struct proc *p = td->td_proc; 134 struct nameidata nd, *ndp; 135 struct ucred *newcred = NULL, *oldcred; 136 struct uidinfo *euip; 137 register_t *stack_base; 138 int error, len, i; 139 struct image_params image_params, *imgp; 140 struct vattr attr; 141 int (*img_first)(struct image_params *); 142 struct pargs *oldargs = NULL, *newargs = NULL; 143 struct procsig *oldprocsig, *newprocsig; 144#ifdef KTRACE 145 struct vnode *tracevp = NULL; 146#endif 147 struct vnode *textvp = NULL; 148 149 imgp = &image_params; 150 151 /* 152 * Lock the process and set the P_INEXEC flag to indicate that 153 * it should be left alone until we're done here. This is 154 * necessary to avoid race conditions - e.g. in ptrace() - 155 * that might allow a local user to illicitly obtain elevated 156 * privileges. 157 */ 158 PROC_LOCK(p); 159 KASSERT((p->p_flag & P_INEXEC) == 0, 160 ("%s(): process already has P_INEXEC flag", __func__)); 161 if ((p->p_flag & P_KSES) && thread_single(SNGLE_EXIT)) { 162 PROC_UNLOCK(p); 163 return (ERESTART); /* Try again later. */ 164 } 165 /* If we get here all other threads are dead. */ 166 p->p_flag |= P_INEXEC; 167 PROC_UNLOCK(p); 168 169 /* 170 * Initialize part of the common data 171 */ 172 imgp->proc = p; 173 imgp->uap = uap; 174 imgp->attr = &attr; 175 imgp->argc = imgp->envc = 0; 176 imgp->argv0 = NULL; 177 imgp->entry_addr = 0; 178 imgp->vmspace_destroyed = 0; 179 imgp->interpreted = 0; 180 imgp->interpreter_name[0] = '\0'; 181 imgp->auxargs = NULL; 182 imgp->vp = NULL; 183 imgp->object = NULL; 184 imgp->firstpage = NULL; 185 imgp->ps_strings = 0; 186 imgp->auxarg_size = 0; 187 188 /* 189 * Allocate temporary demand zeroed space for argument and 190 * environment strings 191 */ 192 imgp->stringbase = (char *)kmem_alloc_wait(exec_map, ARG_MAX + PAGE_SIZE); 193 if (imgp->stringbase == NULL) { 194 error = ENOMEM; 195 mtx_lock(&Giant); 196 goto exec_fail; 197 } 198 imgp->stringp = imgp->stringbase; 199 imgp->stringspace = ARG_MAX; 200 imgp->image_header = imgp->stringbase + ARG_MAX; 201 202 /* 203 * Translate the file name. namei() returns a vnode pointer 204 * in ni_vp amoung other things. 205 */ 206 ndp = &nd; 207 NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME, 208 UIO_USERSPACE, uap->fname, td); 209 210 mtx_lock(&Giant); 211interpret: 212 213 error = namei(ndp); 214 if (error) { 215 kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase, 216 ARG_MAX + PAGE_SIZE); 217 goto exec_fail; 218 } 219 220 imgp->vp = ndp->ni_vp; 221 imgp->fname = uap->fname; 222 223 /* 224 * Check file permissions (also 'opens' file) 225 */ 226 error = exec_check_permissions(imgp); 227 if (error) { 228 VOP_UNLOCK(imgp->vp, 0, td); 229 goto exec_fail_dealloc; 230 } 231 VOP_GETVOBJECT(imgp->vp, &imgp->object); 232 vm_object_reference(imgp->object); 233 234 error = exec_map_first_page(imgp); 235 VOP_UNLOCK(imgp->vp, 0, td); 236 if (error) 237 goto exec_fail_dealloc; 238 239 /* 240 * If the current process has a special image activator it 241 * wants to try first, call it. For example, emulating shell 242 * scripts differently. 243 */ 244 error = -1; 245 if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL) 246 error = img_first(imgp); 247 248 /* 249 * Loop through the list of image activators, calling each one. 250 * An activator returns -1 if there is no match, 0 on success, 251 * and an error otherwise. 252 */ 253 for (i = 0; error == -1 && execsw[i]; ++i) { 254 if (execsw[i]->ex_imgact == NULL || 255 execsw[i]->ex_imgact == img_first) { 256 continue; 257 } 258 error = (*execsw[i]->ex_imgact)(imgp); 259 } 260 261 if (error) { 262 if (error == -1) 263 error = ENOEXEC; 264 goto exec_fail_dealloc; 265 } 266 267 /* 268 * Special interpreter operation, cleanup and loop up to try to 269 * activate the interpreter. 270 */ 271 if (imgp->interpreted) { 272 exec_unmap_first_page(imgp); 273 /* free name buffer and old vnode */ 274 NDFREE(ndp, NDF_ONLY_PNBUF); 275 vrele(ndp->ni_vp); 276 vm_object_deallocate(imgp->object); 277 imgp->object = NULL; 278 /* set new name to that of the interpreter */ 279 NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME, 280 UIO_SYSSPACE, imgp->interpreter_name, td); 281 goto interpret; 282 } 283 284 /* 285 * Copy out strings (args and env) and initialize stack base 286 */ 287 if (p->p_sysent->sv_copyout_strings) 288 stack_base = (*p->p_sysent->sv_copyout_strings)(imgp); 289 else 290 stack_base = exec_copyout_strings(imgp); 291 292 /* 293 * If custom stack fixup routine present for this process 294 * let it do the stack setup. 295 * Else stuff argument count as first item on stack 296 */ 297 if (p->p_sysent->sv_fixup) 298 (*p->p_sysent->sv_fixup)(&stack_base, imgp); 299 else 300 suword(--stack_base, imgp->argc); 301 302 /* 303 * For security and other reasons, the file descriptor table cannot 304 * be shared after an exec. 305 */ 306 FILEDESC_LOCK(p->p_fd); 307 if (p->p_fd->fd_refcnt > 1) { 308 struct filedesc *tmp; 309 310 tmp = fdcopy(td); 311 FILEDESC_UNLOCK(p->p_fd); 312 fdfree(td); 313 p->p_fd = tmp; 314 } else 315 FILEDESC_UNLOCK(p->p_fd); 316 317 /* 318 * Malloc things before we need locks. 319 */ 320 newcred = crget(); 321 euip = uifind(attr.va_uid); 322 i = imgp->endargs - imgp->stringbase; 323 if (ps_arg_cache_limit >= i + sizeof(struct pargs)) 324 newargs = pargs_alloc(i); 325 326 /* close files on exec */ 327 fdcloseexec(td); 328 329 /* 330 * For security and other reasons, signal handlers cannot 331 * be shared after an exec. The new process gets a copy of the old 332 * handlers. In execsigs(), the new process will have its signals 333 * reset. 334 */ 335 PROC_LOCK(p); 336 mp_fixme("procsig needs a lock"); 337 if (p->p_procsig->ps_refcnt > 1) { 338 oldprocsig = p->p_procsig; 339 PROC_UNLOCK(p); 340 MALLOC(newprocsig, struct procsig *, sizeof(struct procsig), 341 M_SUBPROC, M_WAITOK); 342 bcopy(oldprocsig, newprocsig, sizeof(*newprocsig)); 343 newprocsig->ps_refcnt = 1; 344 oldprocsig->ps_refcnt--; 345 PROC_LOCK(p); 346 p->p_procsig = newprocsig; 347 if (p->p_sigacts == &p->p_uarea->u_sigacts) 348 panic("shared procsig but private sigacts?"); 349 350 p->p_uarea->u_sigacts = *p->p_sigacts; 351 p->p_sigacts = &p->p_uarea->u_sigacts; 352 } 353 /* Stop profiling */ 354 stopprofclock(p); 355 356 /* reset caught signals */ 357 execsigs(p); 358 359 /* name this process - nameiexec(p, ndp) */ 360 len = min(ndp->ni_cnd.cn_namelen,MAXCOMLEN); 361 bcopy(ndp->ni_cnd.cn_nameptr, p->p_comm, len); 362 p->p_comm[len] = 0; 363 364 /* 365 * mark as execed, wakeup the process that vforked (if any) and tell 366 * it that it now has its own resources back 367 */ 368 p->p_flag |= P_EXEC; 369 if (p->p_pptr && (p->p_flag & P_PPWAIT)) { 370 p->p_flag &= ~P_PPWAIT; 371 wakeup(p->p_pptr); 372 } 373 374 /* 375 * Implement image setuid/setgid. 376 * 377 * Don't honor setuid/setgid if the filesystem prohibits it or if 378 * the process is being traced. 379 */ 380 oldcred = p->p_ucred; 381 if ((((attr.va_mode & VSUID) && oldcred->cr_uid != attr.va_uid) || 382 ((attr.va_mode & VSGID) && oldcred->cr_gid != attr.va_gid)) && 383 (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 && 384 (p->p_flag & P_TRACED) == 0) { 385 /* 386 * Turn off syscall tracing for set-id programs, except for 387 * root. Record any set-id flags first to make sure that 388 * we do not regain any tracing during a possible block. 389 */ 390 setsugid(p); 391#ifdef KTRACE 392 if (p->p_tracep && suser_cred(oldcred, PRISON_ROOT)) { 393 mtx_lock(&ktrace_mtx); 394 p->p_traceflag = 0; 395 tracevp = p->p_tracep; 396 p->p_tracep = NULL; 397 mtx_unlock(&ktrace_mtx); 398 } 399#endif 400 /* Make sure file descriptors 0..2 are in use. */ 401 error = fdcheckstd(td); 402 if (error != 0) 403 goto done1; 404 /* 405 * Set the new credentials. 406 */ 407 crcopy(newcred, oldcred); 408 if (attr.va_mode & VSUID) 409 change_euid(newcred, euip); 410 if (attr.va_mode & VSGID) 411 change_egid(newcred, attr.va_gid); 412 setugidsafety(td); 413 /* 414 * Implement correct POSIX saved-id behavior. 415 */ 416 change_svuid(newcred, newcred->cr_uid); 417 change_svgid(newcred, newcred->cr_gid); 418 p->p_ucred = newcred; 419 newcred = NULL; 420 } else { 421 if (oldcred->cr_uid == oldcred->cr_ruid && 422 oldcred->cr_gid == oldcred->cr_rgid) 423 p->p_flag &= ~P_SUGID; 424 /* 425 * Implement correct POSIX saved-id behavior. 426 * 427 * XXX: It's not clear that the existing behavior is 428 * POSIX-compliant. A number of sources indicate that the 429 * saved uid/gid should only be updated if the new ruid is 430 * not equal to the old ruid, or the new euid is not equal 431 * to the old euid and the new euid is not equal to the old 432 * ruid. The FreeBSD code always updates the saved uid/gid. 433 * Also, this code uses the new (replaced) euid and egid as 434 * the source, which may or may not be the right ones to use. 435 */ 436 if (oldcred->cr_svuid != oldcred->cr_uid || 437 oldcred->cr_svgid != oldcred->cr_gid) { 438 crcopy(newcred, oldcred); 439 change_svuid(newcred, newcred->cr_uid); 440 change_svgid(newcred, newcred->cr_gid); 441 p->p_ucred = newcred; 442 newcred = NULL; 443 } 444 } 445 446 /* 447 * Store the vp for use in procfs 448 */ 449 textvp = p->p_textvp; 450 VREF(ndp->ni_vp); 451 p->p_textvp = ndp->ni_vp; 452 453 /* 454 * Notify others that we exec'd, and clear the P_INEXEC flag 455 * as we're now a bona fide freshly-execed process. 456 */ 457 KNOTE(&p->p_klist, NOTE_EXEC); 458 p->p_flag &= ~P_INEXEC; 459 460 /* 461 * If tracing the process, trap to debugger so breakpoints 462 * can be set before the program executes. 463 */ 464 _STOPEVENT(p, S_EXEC, 0); 465 466 if (p->p_flag & P_TRACED) 467 psignal(p, SIGTRAP); 468 469 /* clear "fork but no exec" flag, as we _are_ execing */ 470 p->p_acflag &= ~AFORK; 471 472 /* Free any previous argument cache */ 473 oldargs = p->p_args; 474 p->p_args = NULL; 475 476 /* Set values passed into the program in registers. */ 477 if (p->p_sysent->sv_setregs) 478 (*p->p_sysent->sv_setregs)(td, imgp->entry_addr, 479 (u_long)(uintptr_t)stack_base, imgp->ps_strings); 480 else 481 setregs(td, imgp->entry_addr, (u_long)(uintptr_t)stack_base, 482 imgp->ps_strings); 483 484 /* Cache arguments if they fit inside our allowance */ 485 if (ps_arg_cache_limit >= i + sizeof(struct pargs)) { 486 bcopy(imgp->stringbase, newargs->ar_args, i); 487 p->p_args = newargs; 488 newargs = NULL; 489 } 490done1: 491 PROC_UNLOCK(p); 492 493 /* 494 * Free any resources malloc'd earlier that we didn't use. 495 */ 496 uifree(euip); 497 if (newcred == NULL) 498 crfree(oldcred); 499 else 500 crfree(newcred); 501 /* 502 * Handle deferred decrement of ref counts. 503 */ 504 if (textvp != NULL) 505 vrele(textvp); 506#ifdef KTRACE 507 if (tracevp != NULL) 508 vrele(tracevp); 509#endif 510 if (oldargs != NULL) 511 pargs_drop(oldargs); 512 if (newargs != NULL) 513 pargs_drop(newargs); 514 515exec_fail_dealloc: 516 517 /* 518 * free various allocated resources 519 */ 520 if (imgp->firstpage) 521 exec_unmap_first_page(imgp); 522 523 if (imgp->stringbase != NULL) 524 kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase, 525 ARG_MAX + PAGE_SIZE); 526 527 if (imgp->vp) { 528 NDFREE(ndp, NDF_ONLY_PNBUF); 529 vrele(imgp->vp); 530 } 531 532 if (imgp->object) 533 vm_object_deallocate(imgp->object); 534 535 if (error == 0) 536 goto done2; 537 538exec_fail: 539 /* we're done here, clear P_INEXEC */ 540 PROC_LOCK(p); 541 p->p_flag &= ~P_INEXEC; 542 PROC_UNLOCK(p); 543 544 if (imgp->vmspace_destroyed) { 545 /* sorry, no more process anymore. exit gracefully */ 546 exit1(td, W_EXITCODE(0, SIGABRT)); 547 /* NOT REACHED */ 548 error = 0; 549 } 550done2: 551 mtx_unlock(&Giant); 552 return (error); 553} 554 555int 556exec_map_first_page(imgp) 557 struct image_params *imgp; 558{ 559 int rv, i; 560 int initial_pagein; 561 vm_page_t ma[VM_INITIAL_PAGEIN]; 562 vm_object_t object; 563 564 GIANT_REQUIRED; 565 566 if (imgp->firstpage) { 567 exec_unmap_first_page(imgp); 568 } 569 570 VOP_GETVOBJECT(imgp->vp, &object); 571 572 ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 573 574 if ((ma[0]->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) { 575 initial_pagein = VM_INITIAL_PAGEIN; 576 if (initial_pagein > object->size) 577 initial_pagein = object->size; 578 for (i = 1; i < initial_pagein; i++) { 579 if ((ma[i] = vm_page_lookup(object, i)) != NULL) { 580 if ((ma[i]->flags & PG_BUSY) || ma[i]->busy) 581 break; 582 if (ma[i]->valid) 583 break; 584 vm_page_busy(ma[i]); 585 } else { 586 ma[i] = vm_page_alloc(object, i, VM_ALLOC_NORMAL); 587 if (ma[i] == NULL) 588 break; 589 } 590 } 591 initial_pagein = i; 592 593 rv = vm_pager_get_pages(object, ma, initial_pagein, 0); 594 ma[0] = vm_page_lookup(object, 0); 595 596 if ((rv != VM_PAGER_OK) || (ma[0] == NULL) || (ma[0]->valid == 0)) { 597 if (ma[0]) { 598 vm_page_lock_queues(); 599 vm_page_protect(ma[0], VM_PROT_NONE); 600 vm_page_free(ma[0]); 601 vm_page_unlock_queues(); 602 } 603 return EIO; 604 } 605 } 606 vm_page_lock_queues(); 607 vm_page_wire(ma[0]); 608 vm_page_wakeup(ma[0]); 609 vm_page_unlock_queues(); 610 611 pmap_qenter((vm_offset_t)imgp->image_header, ma, 1); 612 imgp->firstpage = ma[0]; 613 614 return 0; 615} 616 617void 618exec_unmap_first_page(imgp) 619 struct image_params *imgp; 620{ 621 GIANT_REQUIRED; 622 623 if (imgp->firstpage) { 624 pmap_qremove((vm_offset_t)imgp->image_header, 1); 625 vm_page_lock_queues(); 626 vm_page_unwire(imgp->firstpage, 1); 627 vm_page_unlock_queues(); 628 imgp->firstpage = NULL; 629 } 630} 631 632/* 633 * Destroy old address space, and allocate a new stack 634 * The new stack is only SGROWSIZ large because it is grown 635 * automatically in trap.c. 636 */ 637int 638exec_new_vmspace(imgp, minuser, maxuser, stack_addr) 639 struct image_params *imgp; 640 vm_offset_t minuser, maxuser, stack_addr; 641{ 642 int error; 643 struct execlist *ep; 644 struct proc *p = imgp->proc; 645 struct vmspace *vmspace = p->p_vmspace; 646 647 GIANT_REQUIRED; 648 649 stack_addr = stack_addr - maxssiz; 650 651 imgp->vmspace_destroyed = 1; 652 653 /* 654 * Perform functions registered with at_exec(). 655 */ 656 TAILQ_FOREACH(ep, &exec_list, next) 657 (*ep->function)(p); 658 659 /* 660 * Blow away entire process VM, if address space not shared, 661 * otherwise, create a new VM space so that other threads are 662 * not disrupted 663 */ 664 if (vmspace->vm_refcnt == 1 665 && vm_map_min(&vmspace->vm_map) == minuser 666 && vm_map_max(&vmspace->vm_map) == maxuser) { 667 if (vmspace->vm_shm) 668 shmexit(p); 669 pmap_remove_pages(vmspace_pmap(vmspace), minuser, maxuser); 670 vm_map_remove(&vmspace->vm_map, minuser, maxuser); 671 } else { 672 vmspace_exec(p, minuser, maxuser); 673 vmspace = p->p_vmspace; 674 } 675 676 /* Allocate a new stack */ 677 error = vm_map_stack(&vmspace->vm_map, stack_addr, (vm_size_t)maxssiz, 678 VM_PROT_ALL, VM_PROT_ALL, 0); 679 if (error) 680 return (error); 681 682#ifdef __ia64__ 683 { 684 /* 685 * Allocate backing store. We really need something 686 * similar to vm_map_stack which can allow the backing 687 * store to grow upwards. This will do for now. 688 */ 689 vm_offset_t bsaddr; 690 bsaddr = USRSTACK - 2*maxssiz; 691 error = vm_map_find(&vmspace->vm_map, 0, 0, &bsaddr, 692 regstkpages * PAGE_SIZE, 0, 693 VM_PROT_ALL, VM_PROT_ALL, 0); 694 FIRST_THREAD_IN_PROC(p)->td_md.md_bspstore = bsaddr; 695 } 696#endif 697 698 /* vm_ssize and vm_maxsaddr are somewhat antiquated concepts in the 699 * VM_STACK case, but they are still used to monitor the size of the 700 * process stack so we can check the stack rlimit. 701 */ 702 vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT; 703 vmspace->vm_maxsaddr = (char *)USRSTACK - maxssiz; 704 705 return(0); 706} 707 708/* 709 * Copy out argument and environment strings from the old process 710 * address space into the temporary string buffer. 711 */ 712int 713exec_extract_strings(imgp) 714 struct image_params *imgp; 715{ 716 char **argv, **envv; 717 char *argp, *envp; 718 int error; 719 size_t length; 720 721 /* 722 * extract arguments first 723 */ 724 725 argv = imgp->uap->argv; 726 727 if (argv) { 728 argp = (caddr_t) (intptr_t) fuword(argv); 729 if (argp == (caddr_t) -1) 730 return (EFAULT); 731 if (argp) 732 argv++; 733 if (imgp->argv0) 734 argp = imgp->argv0; 735 if (argp) { 736 do { 737 if (argp == (caddr_t) -1) 738 return (EFAULT); 739 if ((error = copyinstr(argp, imgp->stringp, 740 imgp->stringspace, &length))) { 741 if (error == ENAMETOOLONG) 742 return(E2BIG); 743 return (error); 744 } 745 imgp->stringspace -= length; 746 imgp->stringp += length; 747 imgp->argc++; 748 } while ((argp = (caddr_t) (intptr_t) fuword(argv++))); 749 } 750 } 751 752 imgp->endargs = imgp->stringp; 753 754 /* 755 * extract environment strings 756 */ 757 758 envv = imgp->uap->envv; 759 760 if (envv) { 761 while ((envp = (caddr_t) (intptr_t) fuword(envv++))) { 762 if (envp == (caddr_t) -1) 763 return (EFAULT); 764 if ((error = copyinstr(envp, imgp->stringp, 765 imgp->stringspace, &length))) { 766 if (error == ENAMETOOLONG) 767 return(E2BIG); 768 return (error); 769 } 770 imgp->stringspace -= length; 771 imgp->stringp += length; 772 imgp->envc++; 773 } 774 } 775 776 return (0); 777} 778 779/* 780 * Copy strings out to the new process address space, constructing 781 * new arg and env vector tables. Return a pointer to the base 782 * so that it can be used as the initial stack pointer. 783 */ 784register_t * 785exec_copyout_strings(imgp) 786 struct image_params *imgp; 787{ 788 int argc, envc; 789 char **vectp; 790 char *stringp, *destp; 791 register_t *stack_base; 792 struct ps_strings *arginfo; 793 int szsigcode; 794 795 /* 796 * Calculate string base and vector table pointers. 797 * Also deal with signal trampoline code for this exec type. 798 */ 799 arginfo = (struct ps_strings *)PS_STRINGS; 800 szsigcode = *(imgp->proc->p_sysent->sv_szsigcode); 801 destp = (caddr_t)arginfo - szsigcode - SPARE_USRSPACE - 802 roundup((ARG_MAX - imgp->stringspace), sizeof(char *)); 803 804 /* 805 * install sigcode 806 */ 807 if (szsigcode) 808 copyout(imgp->proc->p_sysent->sv_sigcode, 809 ((caddr_t)arginfo - szsigcode), szsigcode); 810 811 /* 812 * If we have a valid auxargs ptr, prepare some room 813 * on the stack. 814 */ 815 if (imgp->auxargs) { 816 /* 817 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 818 * lower compatibility. 819 */ 820 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size 821 : (AT_COUNT * 2); 822 /* 823 * The '+ 2' is for the null pointers at the end of each of 824 * the arg and env vector sets,and imgp->auxarg_size is room 825 * for argument of Runtime loader. 826 */ 827 vectp = (char **) (destp - (imgp->argc + imgp->envc + 2 + 828 imgp->auxarg_size) * sizeof(char *)); 829 830 } else 831 /* 832 * The '+ 2' is for the null pointers at the end of each of 833 * the arg and env vector sets 834 */ 835 vectp = (char **) 836 (destp - (imgp->argc + imgp->envc + 2) * sizeof(char *)); 837 838 /* 839 * vectp also becomes our initial stack base 840 */ 841 stack_base = (register_t *)vectp; 842 843 stringp = imgp->stringbase; 844 argc = imgp->argc; 845 envc = imgp->envc; 846 847 /* 848 * Copy out strings - arguments and environment. 849 */ 850 copyout(stringp, destp, ARG_MAX - imgp->stringspace); 851 852 /* 853 * Fill in "ps_strings" struct for ps, w, etc. 854 */ 855 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp); 856 suword(&arginfo->ps_nargvstr, argc); 857 858 /* 859 * Fill in argument portion of vector table. 860 */ 861 for (; argc > 0; --argc) { 862 suword(vectp++, (long)(intptr_t)destp); 863 while (*stringp++ != 0) 864 destp++; 865 destp++; 866 } 867 868 /* a null vector table pointer separates the argp's from the envp's */ 869 suword(vectp++, 0); 870 871 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp); 872 suword(&arginfo->ps_nenvstr, envc); 873 874 /* 875 * Fill in environment portion of vector table. 876 */ 877 for (; envc > 0; --envc) { 878 suword(vectp++, (long)(intptr_t)destp); 879 while (*stringp++ != 0) 880 destp++; 881 destp++; 882 } 883 884 /* end of vector table is a null pointer */ 885 suword(vectp, 0); 886 887 return (stack_base); 888} 889 890/* 891 * Check permissions of file to execute. 892 * Called with imgp->vp locked. 893 * Return 0 for success or error code on failure. 894 */ 895int 896exec_check_permissions(imgp) 897 struct image_params *imgp; 898{ 899 struct vnode *vp = imgp->vp; 900 struct vattr *attr = imgp->attr; 901 struct thread *td; 902 int error; 903 904 td = curthread; /* XXXKSE */ 905 /* Get file attributes */ 906 error = VOP_GETATTR(vp, attr, td->td_ucred, td); 907 if (error) 908 return (error); 909 910 /* 911 * 1) Check if file execution is disabled for the filesystem that this 912 * file resides on. 913 * 2) Insure that at least one execute bit is on - otherwise root 914 * will always succeed, and we don't want to happen unless the 915 * file really is executable. 916 * 3) Insure that the file is a regular file. 917 */ 918 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 919 ((attr->va_mode & 0111) == 0) || 920 (attr->va_type != VREG)) 921 return (EACCES); 922 923 /* 924 * Zero length files can't be exec'd 925 */ 926 if (attr->va_size == 0) 927 return (ENOEXEC); 928 929 /* 930 * Check for execute permission to file based on current credentials. 931 */ 932 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 933 if (error) 934 return (error); 935 936 /* 937 * Check number of open-for-writes on the file and deny execution 938 * if there are any. 939 */ 940 if (vp->v_writecount) 941 return (ETXTBSY); 942 943 /* 944 * Call filesystem specific open routine (which does nothing in the 945 * general case). 946 */ 947 error = VOP_OPEN(vp, FREAD, td->td_ucred, td); 948 return (error); 949} 950 951/* 952 * Exec handler registration 953 */ 954int 955exec_register(execsw_arg) 956 const struct execsw *execsw_arg; 957{ 958 const struct execsw **es, **xs, **newexecsw; 959 int count = 2; /* New slot and trailing NULL */ 960 961 if (execsw) 962 for (es = execsw; *es; es++) 963 count++; 964 newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); 965 if (newexecsw == NULL) 966 return ENOMEM; 967 xs = newexecsw; 968 if (execsw) 969 for (es = execsw; *es; es++) 970 *xs++ = *es; 971 *xs++ = execsw_arg; 972 *xs = NULL; 973 if (execsw) 974 free(execsw, M_TEMP); 975 execsw = newexecsw; 976 return 0; 977} 978 979int 980exec_unregister(execsw_arg) 981 const struct execsw *execsw_arg; 982{ 983 const struct execsw **es, **xs, **newexecsw; 984 int count = 1; 985 986 if (execsw == NULL) 987 panic("unregister with no handlers left?\n"); 988 989 for (es = execsw; *es; es++) { 990 if (*es == execsw_arg) 991 break; 992 } 993 if (*es == NULL) 994 return ENOENT; 995 for (es = execsw; *es; es++) 996 if (*es != execsw_arg) 997 count++; 998 newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); 999 if (newexecsw == NULL) 1000 return ENOMEM; 1001 xs = newexecsw; 1002 for (es = execsw; *es; es++) 1003 if (*es != execsw_arg) 1004 *xs++ = *es; 1005 *xs = NULL; 1006 if (execsw) 1007 free(execsw, M_TEMP); 1008 execsw = newexecsw; 1009 return 0; 1010} 1011 1012int 1013at_exec(function) 1014 execlist_fn function; 1015{ 1016 struct execlist *ep; 1017 1018#ifdef INVARIANTS 1019 /* Be noisy if the programmer has lost track of things */ 1020 if (rm_at_exec(function)) 1021 printf("WARNING: exec callout entry (%p) already present\n", 1022 function); 1023#endif 1024 ep = malloc(sizeof(*ep), M_ATEXEC, M_NOWAIT); 1025 if (ep == NULL) 1026 return (ENOMEM); 1027 ep->function = function; 1028 TAILQ_INSERT_TAIL(&exec_list, ep, next); 1029 return (0); 1030} 1031 1032/* 1033 * Scan the exec callout list for the given item and remove it. 1034 * Returns the number of items removed (0 or 1) 1035 */ 1036int 1037rm_at_exec(function) 1038 execlist_fn function; 1039{ 1040 struct execlist *ep; 1041 1042 TAILQ_FOREACH(ep, &exec_list, next) { 1043 if (ep->function == function) { 1044 TAILQ_REMOVE(&exec_list, ep, next); 1045 free(ep, M_ATEXEC); 1046 return(1); 1047 } 1048 } 1049 return (0); 1050} 1051 1052