1/* $NetBSD: exec_subr.c,v 1.88 2023/11/21 14:35:36 riastradh Exp $ */ 2 3/* 4 * Copyright (c) 1993, 1994, 1996 Christopher G. Demetriou 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by Christopher G. Demetriou. 18 * 4. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33#include <sys/cdefs.h> 34__KERNEL_RCSID(0, "$NetBSD: exec_subr.c,v 1.88 2023/11/21 14:35:36 riastradh Exp $"); 35 36#include "opt_pax.h" 37 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/proc.h> 41#include <sys/kmem.h> 42#include <sys/vnode.h> 43#include <sys/filedesc.h> 44#include <sys/exec.h> 45#include <sys/mman.h> 46#include <sys/resourcevar.h> 47#include <sys/device.h> 48#include <sys/pax.h> 49 50#include <uvm/uvm_extern.h> 51 52#define VMCMD_EVCNT_DECL(name) \ 53static struct evcnt vmcmd_ev_##name = \ 54 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "vmcmd", #name); \ 55EVCNT_ATTACH_STATIC(vmcmd_ev_##name) 56 57#define VMCMD_EVCNT_INCR(name) \ 58 vmcmd_ev_##name.ev_count++ 59 60VMCMD_EVCNT_DECL(calls); 61VMCMD_EVCNT_DECL(extends); 62VMCMD_EVCNT_DECL(kills); 63 64#ifdef DEBUG_STACK 65#define DPRINTF(a) uprintf a 66#else 67#define DPRINTF(a) 68#endif 69 70unsigned int user_stack_guard_size = 1024 * 1024; 71unsigned int user_thread_stack_guard_size = 64 * 1024; 72 73/* 74 * new_vmcmd(): 75 * create a new vmcmd structure and fill in its fields based 76 * on function call arguments. make sure objects ref'd by 77 * the vmcmd are 'held'. 78 */ 79 80void 81new_vmcmd(struct exec_vmcmd_set *evsp, 82 int (*proc)(struct lwp * l, struct exec_vmcmd *), 83 vsize_t len, vaddr_t addr, struct vnode *vp, u_long offset, 84 u_int prot, int flags) 85{ 86 struct exec_vmcmd *vcp; 87 88 VMCMD_EVCNT_INCR(calls); 89 KASSERT(proc != vmcmd_map_pagedvn || (vp->v_iflag & VI_TEXT)); 90 KASSERT(vp == NULL || vrefcnt(vp) > 0); 91 92 if (evsp->evs_used >= evsp->evs_cnt) 93 vmcmdset_extend(evsp); 94 vcp = &evsp->evs_cmds[evsp->evs_used++]; 95 vcp->ev_proc = proc; 96 vcp->ev_len = len; 97 vcp->ev_addr = addr; 98 if ((vcp->ev_vp = vp) != NULL) 99 vref(vp); 100 vcp->ev_offset = offset; 101 vcp->ev_prot = prot; 102 vcp->ev_flags = flags; 103} 104 105void 106vmcmdset_extend(struct exec_vmcmd_set *evsp) 107{ 108 struct exec_vmcmd *nvcp; 109 u_int ocnt; 110 111#ifdef DIAGNOSTIC 112 if (evsp->evs_used < evsp->evs_cnt) 113 panic("vmcmdset_extend: not necessary"); 114#endif 115 116 /* figure out number of entries in new set */ 117 if ((ocnt = evsp->evs_cnt) != 0) { 118 evsp->evs_cnt += ocnt; 119 VMCMD_EVCNT_INCR(extends); 120 } else 121 evsp->evs_cnt = EXEC_DEFAULT_VMCMD_SETSIZE; 122 123 /* allocate it */ 124 nvcp = kmem_alloc(evsp->evs_cnt * sizeof(struct exec_vmcmd), KM_SLEEP); 125 126 /* free the old struct, if there was one, and record the new one */ 127 if (ocnt) { 128 memcpy(nvcp, evsp->evs_cmds, 129 (ocnt * sizeof(struct exec_vmcmd))); 130 kmem_free(evsp->evs_cmds, ocnt * sizeof(struct exec_vmcmd)); 131 } 132 evsp->evs_cmds = nvcp; 133} 134 135void 136kill_vmcmds(struct exec_vmcmd_set *evsp) 137{ 138 struct exec_vmcmd *vcp; 139 u_int i; 140 141 VMCMD_EVCNT_INCR(kills); 142 143 if (evsp->evs_cnt == 0) 144 return; 145 146 for (i = 0; i < evsp->evs_used; i++) { 147 vcp = &evsp->evs_cmds[i]; 148 if (vcp->ev_vp != NULL) 149 vrele(vcp->ev_vp); 150 } 151 kmem_free(evsp->evs_cmds, evsp->evs_cnt * sizeof(struct exec_vmcmd)); 152 evsp->evs_used = evsp->evs_cnt = 0; 153} 154 155/* 156 * vmcmd_map_pagedvn(): 157 * handle vmcmd which specifies that a vnode should be mmap'd. 158 * appropriate for handling demand-paged text and data segments. 159 */ 160 161static int 162vmcmd_get_prot(struct lwp *l, const struct exec_vmcmd *cmd, vm_prot_t *prot, 163 vm_prot_t *maxprot) 164{ 165 vm_prot_t extraprot = PROT_MPROTECT_EXTRACT(cmd->ev_prot); 166 167 *prot = cmd->ev_prot & UVM_PROT_ALL; 168 *maxprot = PAX_MPROTECT_MAXPROTECT(l, *prot, extraprot, UVM_PROT_ALL); 169 170 if ((*prot & *maxprot) != *prot) 171 return EACCES; 172 return PAX_MPROTECT_VALIDATE(l, *prot); 173} 174 175int 176vmcmd_map_pagedvn(struct lwp *l, struct exec_vmcmd *cmd) 177{ 178 struct uvm_object *uobj; 179 struct vnode *vp = cmd->ev_vp; 180 struct proc *p = l->l_proc; 181 int error; 182 vm_prot_t prot, maxprot; 183 184 KASSERT(vp->v_iflag & VI_TEXT); 185 186 /* 187 * map the vnode in using uvm_map. 188 */ 189 190 if (cmd->ev_len == 0) 191 return 0; 192 if (cmd->ev_offset & PAGE_MASK) 193 return EINVAL; 194 if (cmd->ev_addr & PAGE_MASK) 195 return EINVAL; 196 if (cmd->ev_len & PAGE_MASK) 197 return EINVAL; 198 199 if ((error = vmcmd_get_prot(l, cmd, &prot, &maxprot)) != 0) 200 return error; 201 202 /* 203 * check the file system's opinion about mmapping the file 204 */ 205 206 error = VOP_MMAP(vp, prot, l->l_cred); 207 if (error) 208 return error; 209 210 if ((vp->v_vflag & VV_MAPPED) == 0) { 211 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 212 vp->v_vflag |= VV_MAPPED; 213 VOP_UNLOCK(vp); 214 } 215 216 /* 217 * do the map, reference the object for this map entry 218 */ 219 uobj = &vp->v_uobj; 220 vref(vp); 221 222 error = uvm_map(&p->p_vmspace->vm_map, &cmd->ev_addr, cmd->ev_len, 223 uobj, cmd->ev_offset, 0, 224 UVM_MAPFLAG(prot, maxprot, UVM_INH_COPY, 225 UVM_ADV_NORMAL, UVM_FLAG_COPYONW|UVM_FLAG_FIXED)); 226 if (error) { 227 uobj->pgops->pgo_detach(uobj); 228 } 229 return error; 230} 231 232/* 233 * vmcmd_map_readvn(): 234 * handle vmcmd which specifies that a vnode should be read from. 235 * appropriate for non-demand-paged text/data segments, i.e. impure 236 * objects (a la OMAGIC and NMAGIC). 237 */ 238int 239vmcmd_map_readvn(struct lwp *l, struct exec_vmcmd *cmd) 240{ 241 struct proc *p = l->l_proc; 242 int error; 243 long diff; 244 245 if (cmd->ev_len == 0) 246 return 0; 247 248 diff = cmd->ev_addr - trunc_page(cmd->ev_addr); 249 cmd->ev_addr -= diff; /* required by uvm_map */ 250 cmd->ev_offset -= diff; 251 cmd->ev_len += diff; 252 253 error = uvm_map(&p->p_vmspace->vm_map, &cmd->ev_addr, 254 round_page(cmd->ev_len), NULL, UVM_UNKNOWN_OFFSET, 0, 255 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY, 256 UVM_ADV_NORMAL, 257 UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW)); 258 259 if (error) 260 return error; 261 262 return vmcmd_readvn(l, cmd); 263} 264 265int 266vmcmd_readvn(struct lwp *l, struct exec_vmcmd *cmd) 267{ 268 struct proc *p = l->l_proc; 269 int error; 270 vm_prot_t prot, maxprot; 271 272 error = vn_rdwr(UIO_READ, cmd->ev_vp, (void *)cmd->ev_addr, 273 cmd->ev_len, cmd->ev_offset, UIO_USERSPACE, IO_UNIT, 274 l->l_cred, NULL, l); 275 if (error) 276 return error; 277 278 if ((error = vmcmd_get_prot(l, cmd, &prot, &maxprot)) != 0) 279 return error; 280 281#ifdef PMAP_NEED_PROCWR 282 /* 283 * we had to write the process, make sure the pages are synched 284 * with the instruction cache. 285 */ 286 if (prot & VM_PROT_EXECUTE) 287 pmap_procwr(p, cmd->ev_addr, cmd->ev_len); 288#endif 289 290 /* 291 * we had to map in the area at PROT_ALL so that vn_rdwr() 292 * could write to it. however, the caller seems to want 293 * it mapped read-only, so now we are going to have to call 294 * uvm_map_protect() to fix up the protection. ICK. 295 */ 296 if (maxprot != VM_PROT_ALL) { 297 error = uvm_map_protect(&p->p_vmspace->vm_map, 298 trunc_page(cmd->ev_addr), 299 round_page(cmd->ev_addr + cmd->ev_len), 300 maxprot, true); 301 if (error) 302 return error; 303 } 304 305 if (prot != maxprot) { 306 error = uvm_map_protect(&p->p_vmspace->vm_map, 307 trunc_page(cmd->ev_addr), 308 round_page(cmd->ev_addr + cmd->ev_len), 309 prot, false); 310 if (error) 311 return error; 312 } 313 314 return 0; 315} 316 317/* 318 * vmcmd_map_zero(): 319 * handle vmcmd which specifies a zero-filled address space region. The 320 * address range must be first allocated, then protected appropriately. 321 */ 322 323int 324vmcmd_map_zero(struct lwp *l, struct exec_vmcmd *cmd) 325{ 326 struct proc *p = l->l_proc; 327 int error; 328 long diff; 329 vm_prot_t prot, maxprot; 330 331 diff = cmd->ev_addr - trunc_page(cmd->ev_addr); 332 cmd->ev_addr -= diff; /* required by uvm_map */ 333 cmd->ev_len += diff; 334 335 if ((error = vmcmd_get_prot(l, cmd, &prot, &maxprot)) != 0) 336 return error; 337 338 error = uvm_map(&p->p_vmspace->vm_map, &cmd->ev_addr, 339 round_page(cmd->ev_len), NULL, UVM_UNKNOWN_OFFSET, 0, 340 UVM_MAPFLAG(prot, maxprot, UVM_INH_COPY, 341 UVM_ADV_NORMAL, 342 UVM_FLAG_FIXED|UVM_FLAG_COPYONW)); 343 if (cmd->ev_flags & VMCMD_STACK) 344 curproc->p_vmspace->vm_issize += atop(round_page(cmd->ev_len)); 345 return error; 346} 347 348/* 349 * exec_read(): 350 * 351 * Read from vnode into buffer at offset. 352 */ 353int 354exec_read(struct lwp *l, struct vnode *vp, u_long off, void *bf, size_t size, 355 int ioflg) 356{ 357 int error; 358 size_t resid; 359 360 KASSERT((ioflg & IO_NODELOCKED) == 0 || VOP_ISLOCKED(vp) != LK_NONE); 361 362 if ((error = vn_rdwr(UIO_READ, vp, bf, size, off, UIO_SYSSPACE, 363 ioflg, l->l_cred, &resid, NULL)) != 0) 364 return error; 365 /* 366 * See if we got all of it 367 */ 368 if (resid != 0) 369 return ENOEXEC; 370 return 0; 371} 372 373/* 374 * exec_setup_stack(): Set up the stack segment for an elf 375 * executable. 376 * 377 * Note that the ep_ssize parameter must be set to be the current stack 378 * limit; this is adjusted in the body of execve() to yield the 379 * appropriate stack segment usage once the argument length is 380 * calculated. 381 * 382 * This function returns an int for uniformity with other (future) formats' 383 * stack setup functions. They might have errors to return. 384 */ 385 386int 387exec_setup_stack(struct lwp *l, struct exec_package *epp) 388{ 389 vsize_t max_stack_size; 390 vaddr_t access_linear_min; 391 vsize_t access_size; 392 vaddr_t noaccess_linear_min; 393 vsize_t noaccess_size; 394 395#ifndef USRSTACK32 396#define USRSTACK32 (0x00000000ffffffffL&~PGOFSET) 397#endif 398#ifndef MAXSSIZ32 399#define MAXSSIZ32 (MAXSSIZ >> 2) 400#endif 401 402 if (epp->ep_flags & EXEC_32) { 403 epp->ep_minsaddr = USRSTACK32; 404 max_stack_size = MAXSSIZ32; 405 } else { 406 epp->ep_minsaddr = USRSTACK; 407 max_stack_size = MAXSSIZ; 408 } 409 410 DPRINTF(("ep_minsaddr=%#jx max_stack_size=%#jx\n", 411 (uintmax_t)epp->ep_minsaddr, (uintmax_t)max_stack_size)); 412 413 pax_aslr_stack(epp, &max_stack_size); 414 415 DPRINTF(("[RLIMIT_STACK].lim_cur=%#jx max_stack_size=%#jx\n", 416 (uintmax_t)l->l_proc->p_rlimit[RLIMIT_STACK].rlim_cur, 417 (uintmax_t)max_stack_size)); 418 epp->ep_ssize = MIN(l->l_proc->p_rlimit[RLIMIT_STACK].rlim_cur, 419 max_stack_size); 420 421 l->l_proc->p_stackbase = epp->ep_minsaddr; 422 423 epp->ep_maxsaddr = (vaddr_t)STACK_GROW(epp->ep_minsaddr, 424 max_stack_size); 425 426 DPRINTF(("ep_ssize=%#jx ep_minsaddr=%#jx ep_maxsaddr=%#jx\n", 427 (uintmax_t)epp->ep_ssize, (uintmax_t)epp->ep_minsaddr, 428 (uintmax_t)epp->ep_maxsaddr)); 429 430 /* 431 * set up commands for stack. note that this takes *two*, one to 432 * map the part of the stack which we can access, and one to map 433 * the part which we can't. 434 * 435 * arguably, it could be made into one, but that would require the 436 * addition of another mapping proc, which is unnecessary 437 */ 438 access_size = epp->ep_ssize; 439 access_linear_min = (vaddr_t)STACK_ALLOC(epp->ep_minsaddr, access_size); 440 noaccess_size = max_stack_size - access_size; 441 noaccess_linear_min = (vaddr_t)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr, 442 access_size), noaccess_size); 443 444 DPRINTF(("access_size=%#jx, access_linear_min=%#jx, " 445 "noaccess_size=%#jx, noaccess_linear_min=%#jx\n", 446 (uintmax_t)access_size, (uintmax_t)access_linear_min, 447 (uintmax_t)noaccess_size, (uintmax_t)noaccess_linear_min)); 448 449 if (user_stack_guard_size > 0) { 450#ifdef __MACHINE_STACK_GROWS_UP 451 vsize_t guard_size = MIN(VM_MAXUSER_ADDRESS - epp->ep_maxsaddr, user_stack_guard_size); 452 if (guard_size > 0) 453 NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, guard_size, 454 epp->ep_maxsaddr, NULL, 0, VM_PROT_NONE); 455#else 456 NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, user_stack_guard_size, 457 epp->ep_maxsaddr - user_stack_guard_size, NULL, 0, VM_PROT_NONE); 458#endif 459 } 460 if (noaccess_size > 0 && noaccess_size <= MAXSSIZ) { 461 NEW_VMCMD2(&epp->ep_vmcmds, vmcmd_map_zero, noaccess_size, 462 noaccess_linear_min, NULL, 0, 463 VM_PROT_NONE | PROT_MPROTECT(VM_PROT_READ | VM_PROT_WRITE), 464 VMCMD_STACK); 465 } 466 KASSERT(access_size > 0); 467 KASSERT(access_size <= MAXSSIZ); 468 NEW_VMCMD2(&epp->ep_vmcmds, vmcmd_map_zero, access_size, 469 access_linear_min, NULL, 0, VM_PROT_READ | VM_PROT_WRITE, 470 VMCMD_STACK); 471 472 return 0; 473} 474