1/* $NetBSD: exec_subr.c,v 1.69 2011/06/23 23:42:44 matt Exp $ */ 2 3/* 4 * Copyright (c) 1993, 1994, 1996 Christopher G. Demetriou 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by Christopher G. Demetriou. 18 * 4. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33#include <sys/cdefs.h> 34__KERNEL_RCSID(0, "$NetBSD: exec_subr.c,v 1.69 2011/06/23 23:42:44 matt Exp $"); 35 36#include "opt_pax.h" 37 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/proc.h> 41#include <sys/kmem.h> 42#include <sys/vnode.h> 43#include <sys/filedesc.h> 44#include <sys/exec.h> 45#include <sys/mman.h> 46#include <sys/resourcevar.h> 47#include <sys/device.h> 48 49#if defined(PAX_ASLR) || defined(PAX_MPROTECT) 50#include <sys/pax.h> 51#endif /* PAX_ASLR || PAX_MPROTECT */ 52 53#include <uvm/uvm_extern.h> 54 55#define VMCMD_EVCNT_DECL(name) \ 56static struct evcnt vmcmd_ev_##name = \ 57 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "vmcmd", #name); \ 58EVCNT_ATTACH_STATIC(vmcmd_ev_##name) 59 60#define VMCMD_EVCNT_INCR(name) \ 61 vmcmd_ev_##name.ev_count++ 62 63VMCMD_EVCNT_DECL(calls); 64VMCMD_EVCNT_DECL(extends); 65VMCMD_EVCNT_DECL(kills); 66 67#ifdef DEBUG_STACK 68#define DPRINTF(a) uprintf a 69#else 70#define DPRINTF(a) 71#endif 72 73/* 74 * new_vmcmd(): 75 * create a new vmcmd structure and fill in its fields based 76 * on function call arguments. make sure objects ref'd by 77 * the vmcmd are 'held'. 78 */ 79 80void 81new_vmcmd(struct exec_vmcmd_set *evsp, 82 int (*proc)(struct lwp * l, struct exec_vmcmd *), 83 vsize_t len, vaddr_t addr, struct vnode *vp, u_long offset, 84 u_int prot, int flags) 85{ 86 struct exec_vmcmd *vcp; 87 88 VMCMD_EVCNT_INCR(calls); 89 KASSERT(proc != vmcmd_map_pagedvn || (vp->v_iflag & VI_TEXT)); 90 KASSERT(vp == NULL || vp->v_usecount > 0); 91 92 if (evsp->evs_used >= evsp->evs_cnt) 93 vmcmdset_extend(evsp); 94 vcp = &evsp->evs_cmds[evsp->evs_used++]; 95 vcp->ev_proc = proc; 96 vcp->ev_len = len; 97 vcp->ev_addr = addr; 98 if ((vcp->ev_vp = vp) != NULL) 99 vref(vp); 100 vcp->ev_offset = offset; 101 vcp->ev_prot = prot; 102 vcp->ev_flags = flags; 103} 104 105void 106vmcmdset_extend(struct exec_vmcmd_set *evsp) 107{ 108 struct exec_vmcmd *nvcp; 109 u_int ocnt; 110 111#ifdef DIAGNOSTIC 112 if (evsp->evs_used < evsp->evs_cnt) 113 panic("vmcmdset_extend: not necessary"); 114#endif 115 116 /* figure out number of entries in new set */ 117 if ((ocnt = evsp->evs_cnt) != 0) { 118 evsp->evs_cnt += ocnt; 119 VMCMD_EVCNT_INCR(extends); 120 } else 121 evsp->evs_cnt = EXEC_DEFAULT_VMCMD_SETSIZE; 122 123 /* allocate it */ 124 nvcp = kmem_alloc(evsp->evs_cnt * sizeof(struct exec_vmcmd), KM_SLEEP); 125 126 /* free the old struct, if there was one, and record the new one */ 127 if (ocnt) { 128 memcpy(nvcp, evsp->evs_cmds, 129 (ocnt * sizeof(struct exec_vmcmd))); 130 kmem_free(evsp->evs_cmds, ocnt * sizeof(struct exec_vmcmd)); 131 } 132 evsp->evs_cmds = nvcp; 133} 134 135void 136kill_vmcmds(struct exec_vmcmd_set *evsp) 137{ 138 struct exec_vmcmd *vcp; 139 u_int i; 140 141 VMCMD_EVCNT_INCR(kills); 142 143 if (evsp->evs_cnt == 0) 144 return; 145 146 for (i = 0; i < evsp->evs_used; i++) { 147 vcp = &evsp->evs_cmds[i]; 148 if (vcp->ev_vp != NULL) 149 vrele(vcp->ev_vp); 150 } 151 kmem_free(evsp->evs_cmds, evsp->evs_cnt * sizeof(struct exec_vmcmd)); 152 evsp->evs_used = evsp->evs_cnt = 0; 153} 154 155/* 156 * vmcmd_map_pagedvn(): 157 * handle vmcmd which specifies that a vnode should be mmap'd. 158 * appropriate for handling demand-paged text and data segments. 159 */ 160 161int 162vmcmd_map_pagedvn(struct lwp *l, struct exec_vmcmd *cmd) 163{ 164 struct uvm_object *uobj; 165 struct vnode *vp = cmd->ev_vp; 166 struct proc *p = l->l_proc; 167 int error; 168 vm_prot_t prot, maxprot; 169 170 KASSERT(vp->v_iflag & VI_TEXT); 171 172 /* 173 * map the vnode in using uvm_map. 174 */ 175 176 if (cmd->ev_len == 0) 177 return(0); 178 if (cmd->ev_offset & PAGE_MASK) 179 return(EINVAL); 180 if (cmd->ev_addr & PAGE_MASK) 181 return(EINVAL); 182 if (cmd->ev_len & PAGE_MASK) 183 return(EINVAL); 184 185 prot = cmd->ev_prot; 186 maxprot = UVM_PROT_ALL; 187#ifdef PAX_MPROTECT 188 pax_mprotect(l, &prot, &maxprot); 189#endif /* PAX_MPROTECT */ 190 191 /* 192 * check the file system's opinion about mmapping the file 193 */ 194 195 error = VOP_MMAP(vp, prot, l->l_cred); 196 if (error) 197 return error; 198 199 if ((vp->v_vflag & VV_MAPPED) == 0) { 200 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 201 vp->v_vflag |= VV_MAPPED; 202 VOP_UNLOCK(vp); 203 } 204 205 /* 206 * do the map, reference the object for this map entry 207 */ 208 uobj = &vp->v_uobj; 209 vref(vp); 210 211 error = uvm_map(&p->p_vmspace->vm_map, &cmd->ev_addr, cmd->ev_len, 212 uobj, cmd->ev_offset, 0, 213 UVM_MAPFLAG(prot, maxprot, UVM_INH_COPY, 214 UVM_ADV_NORMAL, UVM_FLAG_COPYONW|UVM_FLAG_FIXED)); 215 if (error) { 216 uobj->pgops->pgo_detach(uobj); 217 } 218 return error; 219} 220 221/* 222 * vmcmd_map_readvn(): 223 * handle vmcmd which specifies that a vnode should be read from. 224 * appropriate for non-demand-paged text/data segments, i.e. impure 225 * objects (a la OMAGIC and NMAGIC). 226 */ 227int 228vmcmd_map_readvn(struct lwp *l, struct exec_vmcmd *cmd) 229{ 230 struct proc *p = l->l_proc; 231 int error; 232 long diff; 233 234 if (cmd->ev_len == 0) 235 return 0; 236 237 diff = cmd->ev_addr - trunc_page(cmd->ev_addr); 238 cmd->ev_addr -= diff; /* required by uvm_map */ 239 cmd->ev_offset -= diff; 240 cmd->ev_len += diff; 241 242 error = uvm_map(&p->p_vmspace->vm_map, &cmd->ev_addr, 243 round_page(cmd->ev_len), NULL, UVM_UNKNOWN_OFFSET, 0, 244 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY, 245 UVM_ADV_NORMAL, 246 UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW)); 247 248 if (error) 249 return error; 250 251 return vmcmd_readvn(l, cmd); 252} 253 254int 255vmcmd_readvn(struct lwp *l, struct exec_vmcmd *cmd) 256{ 257 struct proc *p = l->l_proc; 258 int error; 259 vm_prot_t prot, maxprot; 260 261 error = vn_rdwr(UIO_READ, cmd->ev_vp, (void *)cmd->ev_addr, 262 cmd->ev_len, cmd->ev_offset, UIO_USERSPACE, IO_UNIT, 263 l->l_cred, NULL, l); 264 if (error) 265 return error; 266 267 prot = cmd->ev_prot; 268 maxprot = VM_PROT_ALL; 269#ifdef PAX_MPROTECT 270 pax_mprotect(l, &prot, &maxprot); 271#endif /* PAX_MPROTECT */ 272 273#ifdef PMAP_NEED_PROCWR 274 /* 275 * we had to write the process, make sure the pages are synched 276 * with the instruction cache. 277 */ 278 if (prot & VM_PROT_EXECUTE) 279 pmap_procwr(p, cmd->ev_addr, cmd->ev_len); 280#endif 281 282 /* 283 * we had to map in the area at PROT_ALL so that vn_rdwr() 284 * could write to it. however, the caller seems to want 285 * it mapped read-only, so now we are going to have to call 286 * uvm_map_protect() to fix up the protection. ICK. 287 */ 288 if (maxprot != VM_PROT_ALL) { 289 error = uvm_map_protect(&p->p_vmspace->vm_map, 290 trunc_page(cmd->ev_addr), 291 round_page(cmd->ev_addr + cmd->ev_len), 292 maxprot, true); 293 if (error) 294 return (error); 295 } 296 297 if (prot != maxprot) { 298 error = uvm_map_protect(&p->p_vmspace->vm_map, 299 trunc_page(cmd->ev_addr), 300 round_page(cmd->ev_addr + cmd->ev_len), 301 prot, false); 302 if (error) 303 return (error); 304 } 305 306 return 0; 307} 308 309/* 310 * vmcmd_map_zero(): 311 * handle vmcmd which specifies a zero-filled address space region. The 312 * address range must be first allocated, then protected appropriately. 313 */ 314 315int 316vmcmd_map_zero(struct lwp *l, struct exec_vmcmd *cmd) 317{ 318 struct proc *p = l->l_proc; 319 int error; 320 long diff; 321 vm_prot_t prot, maxprot; 322 323 diff = cmd->ev_addr - trunc_page(cmd->ev_addr); 324 cmd->ev_addr -= diff; /* required by uvm_map */ 325 cmd->ev_len += diff; 326 327 prot = cmd->ev_prot; 328 maxprot = UVM_PROT_ALL; 329#ifdef PAX_MPROTECT 330 pax_mprotect(l, &prot, &maxprot); 331#endif /* PAX_MPROTECT */ 332 333 error = uvm_map(&p->p_vmspace->vm_map, &cmd->ev_addr, 334 round_page(cmd->ev_len), NULL, UVM_UNKNOWN_OFFSET, 0, 335 UVM_MAPFLAG(prot, maxprot, UVM_INH_COPY, 336 UVM_ADV_NORMAL, 337 UVM_FLAG_FIXED|UVM_FLAG_COPYONW)); 338 if (cmd->ev_flags & VMCMD_STACK) 339 curproc->p_vmspace->vm_issize += atop(round_page(cmd->ev_len)); 340 return error; 341} 342 343/* 344 * exec_read_from(): 345 * 346 * Read from vnode into buffer at offset. 347 */ 348int 349exec_read_from(struct lwp *l, struct vnode *vp, u_long off, void *bf, 350 size_t size) 351{ 352 int error; 353 size_t resid; 354 355 if ((error = vn_rdwr(UIO_READ, vp, bf, size, off, UIO_SYSSPACE, 356 0, l->l_cred, &resid, NULL)) != 0) 357 return error; 358 /* 359 * See if we got all of it 360 */ 361 if (resid != 0) 362 return ENOEXEC; 363 return 0; 364} 365 366/* 367 * exec_setup_stack(): Set up the stack segment for an elf 368 * executable. 369 * 370 * Note that the ep_ssize parameter must be set to be the current stack 371 * limit; this is adjusted in the body of execve() to yield the 372 * appropriate stack segment usage once the argument length is 373 * calculated. 374 * 375 * This function returns an int for uniformity with other (future) formats' 376 * stack setup functions. They might have errors to return. 377 */ 378 379int 380exec_setup_stack(struct lwp *l, struct exec_package *epp) 381{ 382 vsize_t max_stack_size; 383 vaddr_t access_linear_min; 384 vsize_t access_size; 385 vaddr_t noaccess_linear_min; 386 vsize_t noaccess_size; 387 388#ifndef USRSTACK32 389#define USRSTACK32 (0x00000000ffffffffL&~PGOFSET) 390#endif 391#ifndef MAXSSIZ32 392#define MAXSSIZ32 (MAXSSIZ >> 2) 393#endif 394 395 if (epp->ep_flags & EXEC_32) { 396 epp->ep_minsaddr = USRSTACK32; 397 max_stack_size = MAXSSIZ32; 398 } else { 399 epp->ep_minsaddr = USRSTACK; 400 max_stack_size = MAXSSIZ; 401 } 402 403 DPRINTF(("ep_minsaddr=%llx max_stack_size=%llx\n", 404 (unsigned long long)epp->ep_minsaddr, 405 (unsigned long long)max_stack_size)); 406 407 epp->ep_ssize = MIN(l->l_proc->p_rlimit[RLIMIT_STACK].rlim_cur, 408 max_stack_size); 409 410#ifdef PAX_ASLR 411 pax_aslr_stack(l, epp, &max_stack_size); 412#endif /* PAX_ASLR */ 413 414 l->l_proc->p_stackbase = epp->ep_minsaddr; 415 416 epp->ep_maxsaddr = (vaddr_t)STACK_GROW(epp->ep_minsaddr, 417 max_stack_size); 418 419 DPRINTF(("ep_ssize=%llx ep_maxsaddr=%llx\n", 420 (unsigned long long)epp->ep_ssize, 421 (unsigned long long)epp->ep_maxsaddr)); 422 423 /* 424 * set up commands for stack. note that this takes *two*, one to 425 * map the part of the stack which we can access, and one to map 426 * the part which we can't. 427 * 428 * arguably, it could be made into one, but that would require the 429 * addition of another mapping proc, which is unnecessary 430 */ 431 access_size = epp->ep_ssize; 432 access_linear_min = (vaddr_t)STACK_ALLOC(epp->ep_minsaddr, access_size); 433 noaccess_size = max_stack_size - access_size; 434 noaccess_linear_min = (vaddr_t)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr, 435 access_size), noaccess_size); 436 437 DPRINTF(("access_size=%llx, access_linear_min=%llx, " 438 "noaccess_size=%llx, noaccess_linear_min=%llx\n", 439 (unsigned long long)access_size, 440 (unsigned long long)access_linear_min, 441 (unsigned long long)noaccess_size, 442 (unsigned long long)noaccess_linear_min)); 443 444 if (noaccess_size > 0 && noaccess_size <= MAXSSIZ) { 445 NEW_VMCMD2(&epp->ep_vmcmds, vmcmd_map_zero, noaccess_size, 446 noaccess_linear_min, NULL, 0, VM_PROT_NONE, VMCMD_STACK); 447 } 448 KASSERT(access_size > 0 && access_size <= MAXSSIZ); 449 NEW_VMCMD2(&epp->ep_vmcmds, vmcmd_map_zero, access_size, 450 access_linear_min, NULL, 0, VM_PROT_READ | VM_PROT_WRITE, 451 VMCMD_STACK); 452 453 return 0; 454} 455