imgact_elf.c revision 153741
1139804Simp/*- 2185435Sbz * Copyright (c) 2000 David O'Brien 3185435Sbz * Copyright (c) 1995-1996 S�ren Schmidt 4191673Sjamie * Copyright (c) 1996 Peter Wemm 5185435Sbz * All rights reserved. 6190466Sjamie * 7185404Sbz * Redistribution and use in source and binary forms, with or without 8185404Sbz * modification, are permitted provided that the following conditions 9185404Sbz * are met: 10185404Sbz * 1. Redistributions of source code must retain the above copyright 11185404Sbz * notice, this list of conditions and the following disclaimer 12185404Sbz * in this position and unchanged. 13185404Sbz * 2. Redistributions in binary form must reproduce the above copyright 14185404Sbz * notice, this list of conditions and the following disclaimer in the 15185404Sbz * documentation and/or other materials provided with the distribution. 16185404Sbz * 3. The name of the author may not be used to endorse or promote products 17185404Sbz * derived from this software without specific prior written permission 18185404Sbz * 19185404Sbz * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20185404Sbz * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21185404Sbz * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22185404Sbz * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23185404Sbz * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24185404Sbz * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25185404Sbz * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26185404Sbz * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2746197Sphk * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2846155Sphk * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29116182Sobrien */ 30116182Sobrien 31116182Sobrien#include <sys/cdefs.h> 32193066Sjamie__FBSDID("$FreeBSD: head/sys/kern/imgact_elf.c 153741 2005-12-26 21:23:57Z sobomax $"); 33185435Sbz 34185435Sbz#include "opt_compat.h" 35185435Sbz 36131177Spjd#include <sys/param.h> 3746155Sphk#include <sys/exec.h> 3846155Sphk#include <sys/fcntl.h> 3946155Sphk#include <sys/imgact.h> 4046155Sphk#include <sys/imgact_elf.h> 4146155Sphk#include <sys/kernel.h> 4246155Sphk#include <sys/lock.h> 4346155Sphk#include <sys/malloc.h> 44192895Sjamie#include <sys/mount.h> 45164032Srwatson#include <sys/mutex.h> 4646155Sphk#include <sys/mman.h> 47124882Srwatson#include <sys/namei.h> 48177785Skib#include <sys/pioctl.h> 4946155Sphk#include <sys/proc.h> 5087275Srwatson#include <sys/procfs.h> 5187275Srwatson#include <sys/resourcevar.h> 52220137Strasz#include <sys/sf_buf.h> 53221362Strasz#include <sys/systm.h> 54168401Spjd#include <sys/signalvar.h> 55193066Sjamie#include <sys/stat.h> 56113275Smike#include <sys/sx.h> 57147185Spjd#include <sys/syscall.h> 58113275Smike#include <sys/sysctl.h> 5946155Sphk#include <sys/sysent.h> 60113275Smike#include <sys/vnode.h> 6157163Srwatson 62113275Smike#include <vm/vm.h> 63196019Srwatson#include <vm/vm_kern.h> 6446155Sphk#include <vm/vm_param.h> 65196019Srwatson#include <vm/pmap.h> 66196019Srwatson#include <vm/vm_map.h> 6746155Sphk#include <vm/vm_object.h> 68196019Srwatson#include <vm/vm_extern.h> 69185435Sbz 70185435Sbz#include <machine/elf.h> 71185435Sbz#include <machine/md_var.h> 72185435Sbz 73185435Sbz#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32 74185435Sbz#include <machine/fpu.h> 7546155Sphk#include <compat/ia32/ia32_reg.h> 76163606Srwatson#endif 77163606Srwatson 78195944Sjamie#define OLD_EI_BRAND 8 79195944Sjamie 8046155Sphkstatic int __elfN(check_header)(const Elf_Ehdr *hdr); 81227293Sedstatic Elf_Brandinfo *__elfN(get_brandinfo)(const Elf_Ehdr *hdr, 8246155Sphk const char *interp); 83202468Sbzstatic int __elfN(load_file)(struct proc *p, const char *file, u_long *addr, 84202468Sbz u_long *entry, size_t pagesize); 85202468Sbzstatic int __elfN(load_section)(struct vmspace *vmspace, vm_object_t object, 86202468Sbz vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, 87202468Sbz vm_prot_t prot, size_t pagesize); 88202468Sbzstatic int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp); 89202468Sbz 90202468SbzSYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE), CTLFLAG_RW, 0, 91202468Sbz ""); 92202468Sbz 93202468Sbzint __elfN(fallback_brand) = -1; 94202468SbzSYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, 95202468Sbz fallback_brand, CTLFLAG_RW, &__elfN(fallback_brand), 0, 96202468Sbz __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort"); 97202468SbzTUNABLE_INT("kern.elf" __XSTRING(__ELF_WORD_SIZE) ".fallback_brand", 98192895Sjamie &__elfN(fallback_brand)); 99192895Sjamie 100192895Sjamiestatic int elf_trace = 0; 101192895SjamieSYSCTL_INT(_debug, OID_AUTO, __elfN(trace), CTLFLAG_RW, &elf_trace, 0, ""); 102192895Sjamie 103192895Sjamiestatic int elf_legacy_coredump = 0; 104192895SjamieSYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW, 105192895Sjamie &elf_legacy_coredump, 0, ""); 106231267Smm 107194762Sjamiestatic Elf_Brandinfo *elf_brand_list[MAX_BRANDS]; 108195944Sjamie 109201145Santoineint 110196176Sbz__elfN(insert_brand_entry)(Elf_Brandinfo *entry) 111202468Sbz{ 112196176Sbz int i; 113202468Sbz 114196176Sbz for (i = 0; i < MAX_BRANDS; i++) { 115192895Sjamie if (elf_brand_list[i] == NULL) { 116192895Sjamie elf_brand_list[i] = entry; 117192895Sjamie break; 11857163Srwatson } 119221362Strasz } 120168401Spjd if (i == MAX_BRANDS) 121191673Sjamie return (-1); 122191673Sjamie return (0); 123221362Strasz} 124179881Sdelphij 125113275Smikeint 126191673Sjamie__elfN(remove_brand_entry)(Elf_Brandinfo *entry) 127190466Sjamie{ 128191673Sjamie int i; 129192895Sjamie 130192895Sjamie for (i = 0; i < MAX_BRANDS; i++) { 131221362Strasz if (elf_brand_list[i] == entry) { 132221362Strasz elf_brand_list[i] = NULL; 133232598Strasz break; 134221362Strasz } 135221362Strasz } 136185435Sbz if (i == MAX_BRANDS) 137190466Sjamie return (-1); 138192895Sjamie return (0); 139185435Sbz} 140185435Sbz 141190466Sjamieint 142192895Sjamie__elfN(brand_inuse)(Elf_Brandinfo *entry) 143185435Sbz{ 144113275Smike struct proc *p; 145191673Sjamie int rval = FALSE; 146191673Sjamie 147191673Sjamie sx_slock(&allproc_lock); 148191673Sjamie LIST_FOREACH(p, &allproc, p_list) { 149191673Sjamie if (p->p_sysent == entry->sysvec) { 150191673Sjamie rval = TRUE; 151113275Smike break; 152192895Sjamie } 153216861Sbz } 154216861Sbz sx_sunlock(&allproc_lock); 155216861Sbz 156192895Sjamie return (rval); 157192895Sjamie} 158192895Sjamie 159202468Sbzstatic Elf_Brandinfo * 160202468Sbz__elfN(get_brandinfo)(const Elf_Ehdr *hdr, const char *interp) 161202468Sbz{ 162202468Sbz Elf_Brandinfo *bi; 163202468Sbz int i; 164202468Sbz 165192895Sjamie /* 166216861Sbz * We support three types of branding -- (1) the ELF EI_OSABI field 167192895Sjamie * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string 168192895Sjamie * branding w/in the ELF header, and (3) path of the `interp_path' 169192895Sjamie * field. We should also look for an ".note.ABI-tag" ELF section now 170202468Sbz * in all Linux ELF binaries, FreeBSD 4.1+, and some NetBSD ones. 171202468Sbz */ 172202468Sbz 173202468Sbz /* If the executable has a brand, search for it in the brand list. */ 174202468Sbz for (i = 0; i < MAX_BRANDS; i++) { 175202468Sbz bi = elf_brand_list[i]; 176195870Sjamie if (bi != NULL && hdr->e_machine == bi->machine && 177216861Sbz (hdr->e_ident[EI_OSABI] == bi->brand || 178195870Sjamie strncmp((const char *)&hdr->e_ident[OLD_EI_BRAND], 179195870Sjamie bi->compat_3_brand, strlen(bi->compat_3_brand)) == 0)) 180195870Sjamie return (bi); 181195870Sjamie } 182195870Sjamie 183195870Sjamie /* Lacking a known brand, search for a recognized interpreter. */ 184195870Sjamie if (interp != NULL) { 185195870Sjamie for (i = 0; i < MAX_BRANDS; i++) { 186195870Sjamie bi = elf_brand_list[i]; 187195870Sjamie if (bi != NULL && hdr->e_machine == bi->machine && 188192895Sjamie strcmp(interp, bi->interp_path) == 0) 189195870Sjamie return (bi); 190192895Sjamie } 191192895Sjamie } 192195870Sjamie 193192895Sjamie /* Lacking a recognized interpreter, try the default brand */ 194192895Sjamie for (i = 0; i < MAX_BRANDS; i++) { 195216861Sbz bi = elf_brand_list[i]; 196192895Sjamie if (bi != NULL && hdr->e_machine == bi->machine && 197192895Sjamie __elfN(fallback_brand) == bi->brand) 198192895Sjamie return (bi); 199192895Sjamie } 200192895Sjamie return (NULL); 201192895Sjamie} 202192895Sjamie 203192895Sjamiestatic int 204192895Sjamie__elfN(check_header)(const Elf_Ehdr *hdr) 205232059Smm{ 206232059Smm Elf_Brandinfo *bi; 207232186Smm int i; 208232278Smm 209254741Sdelphij if (!IS_ELF(*hdr) || 210277985Sjamie hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || 211295951Saraujo hdr->e_ident[EI_DATA] != ELF_TARG_DATA || 212295951Saraujo hdr->e_ident[EI_VERSION] != EV_CURRENT || 213192895Sjamie hdr->e_phentsize != sizeof(Elf_Phdr) || 214216861Sbz hdr->e_version != ELF_TARG_VER) 215192895Sjamie return (ENOEXEC); 216192895Sjamie 217192895Sjamie /* 218192895Sjamie * Make sure we have at least one brand for this machine. 219192895Sjamie */ 220192895Sjamie 221192895Sjamie for (i = 0; i < MAX_BRANDS; i++) { 222192895Sjamie bi = elf_brand_list[i]; 223192895Sjamie if (bi != NULL && bi->machine == hdr->e_machine) 224232059Smm break; 225232059Smm } 226232186Smm if (i == MAX_BRANDS) 227232278Smm return (ENOEXEC); 228254741Sdelphij 229277985Sjamie return (0); 230295951Saraujo} 231295951Saraujo 232192895Sjamiestatic int 233216861Sbz__elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 234192895Sjamie vm_offset_t start, vm_offset_t end, vm_prot_t prot) 235196002Sjamie{ 236196002Sjamie struct sf_buf *sf; 237232059Smm int error; 238192895Sjamie vm_offset_t off; 239196002Sjamie 240231267Smm /* 241192895Sjamie * Create the page if it doesn't exist yet. Ignore errors. 242193865Sjamie */ 243192895Sjamie vm_map_lock(map); 244192895Sjamie vm_map_insert(map, NULL, 0, trunc_page(start), round_page(end), 245280632Sian VM_PROT_ALL, VM_PROT_ALL, 0); 246280632Sian vm_map_unlock(map); 247280632Sian 248280632Sian /* 249280632Sian * Find the page from the underlying object. 250280632Sian */ 251280632Sian if (object) { 252280632Sian sf = vm_imgact_map_page(object, offset); 253280632Sian if (sf == NULL) 254280632Sian return (KERN_FAILURE); 255280632Sian off = offset - trunc_page(offset); 256280632Sian error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start, 257280632Sian end - start); 258192895Sjamie vm_imgact_unmap_page(sf); 259185435Sbz if (error) { 260185435Sbz return (KERN_FAILURE); 261185435Sbz } 262185435Sbz } 263185435Sbz 264185435Sbz return (KERN_SUCCESS); 265185435Sbz} 266185435Sbz 267185435Sbzstatic int 268185435Sbz__elfN(map_insert)(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 269185435Sbz vm_offset_t start, vm_offset_t end, vm_prot_t prot, int cow) 270185435Sbz{ 271185435Sbz struct sf_buf *sf; 272185435Sbz vm_offset_t off; 273185435Sbz vm_size_t sz; 274185435Sbz int error, rv; 275185435Sbz 276185435Sbz if (start != trunc_page(start)) { 277185435Sbz rv = __elfN(map_partial)(map, object, offset, start, 278185435Sbz round_page(start), prot); 279185435Sbz if (rv) 280185435Sbz return (rv); 281185435Sbz offset += round_page(start) - start; 282185435Sbz start = round_page(start); 283185435Sbz } 284185435Sbz if (end != round_page(end)) { 285185435Sbz rv = __elfN(map_partial)(map, object, offset + 286185435Sbz trunc_page(end) - start, trunc_page(end), end, prot); 287185435Sbz if (rv) 288185435Sbz return (rv); 289185435Sbz end = trunc_page(end); 290185435Sbz } 291185435Sbz if (end > start) { 292185435Sbz if (offset & PAGE_MASK) { 293185435Sbz /* 294185435Sbz * The mapping is not page aligned. This means we have 295185435Sbz * to copy the data. Sigh. 296190466Sjamie */ 297185435Sbz rv = vm_map_find(map, NULL, 0, &start, end - start, 298185435Sbz FALSE, prot | VM_PROT_WRITE, VM_PROT_ALL, 0); 299185435Sbz if (rv) 300185435Sbz return (rv); 301185435Sbz if (object == NULL) 302185435Sbz return (KERN_SUCCESS); 303185435Sbz for (; start < end; start += sz) { 304185435Sbz sf = vm_imgact_map_page(object, offset); 305185435Sbz if (sf == NULL) 306191673Sjamie return (KERN_FAILURE); 307191673Sjamie off = offset - trunc_page(offset); 308191673Sjamie sz = end - start; 309191673Sjamie if (sz > PAGE_SIZE - off) 310191673Sjamie sz = PAGE_SIZE - off; 311191673Sjamie error = copyout((caddr_t)sf_buf_kva(sf) + off, 312225617Skmacy (caddr_t)start, sz); 313185435Sbz vm_imgact_unmap_page(sf); 314191673Sjamie if (error) { 315191673Sjamie return (KERN_FAILURE); 316192895Sjamie } 317185435Sbz offset += sz; 318191673Sjamie } 319191673Sjamie rv = KERN_SUCCESS; 320191673Sjamie } else { 321185435Sbz vm_map_lock(map); 322191673Sjamie rv = vm_map_insert(map, object, offset, start, end, 323191673Sjamie prot, VM_PROT_ALL, cow); 324191673Sjamie vm_map_unlock(map); 325191673Sjamie } 326185435Sbz return (rv); 327192895Sjamie } else { 328192895Sjamie return (KERN_SUCCESS); 329191673Sjamie } 330191673Sjamie} 331191673Sjamie 332192895Sjamiestatic int 333192895Sjamie__elfN(load_section)(struct vmspace *vmspace, 334192895Sjamie vm_object_t object, vm_offset_t offset, 335258929Speter caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot, 336191673Sjamie size_t pagesize) 337191673Sjamie{ 338191673Sjamie struct sf_buf *sf; 339191673Sjamie size_t map_len; 340185435Sbz vm_offset_t map_addr; 341191673Sjamie int error, rv, cow; 342191673Sjamie size_t copy_len; 343185435Sbz vm_offset_t file_addr; 344191673Sjamie 345185435Sbz /* 346191673Sjamie * It's necessary to fail if the filsz + offset taken from the 347191673Sjamie * header is greater than the actual file pager object's size. 348191673Sjamie * If we were to allow this, then the vm_map_find() below would 349191673Sjamie * walk right off the end of the file object and into the ether. 350191673Sjamie * 351192895Sjamie * While I'm here, might as well check for something else that 352192895Sjamie * is invalid: filsz cannot be greater than memsz. 353192895Sjamie */ 354192895Sjamie if ((off_t)filsz + offset > object->un_pager.vnp.vnp_size || 355192895Sjamie filsz > memsz) { 356192895Sjamie uprintf("elf_load_section: truncated ELF file\n"); 357192895Sjamie return (ENOEXEC); 358192895Sjamie } 359192895Sjamie 360192895Sjamie#define trunc_page_ps(va, ps) ((va) & ~(ps - 1)) 361192895Sjamie#define round_page_ps(va, ps) (((va) + (ps - 1)) & ~(ps - 1)) 362192895Sjamie 363193865Sjamie map_addr = trunc_page_ps((vm_offset_t)vmaddr, pagesize); 364193865Sjamie file_addr = trunc_page_ps(offset, pagesize); 365193865Sjamie 366193865Sjamie /* 367193865Sjamie * We have two choices. We can either clear the data in the last page 368193865Sjamie * of an oversized mapping, or we can start the anon mapping a page 369193865Sjamie * early and copy the initialized data into that first page. We 370193865Sjamie * choose the second.. 371193865Sjamie */ 372192895Sjamie if (memsz > filsz) 373192895Sjamie map_len = trunc_page_ps(offset + filsz, pagesize) - file_addr; 374185435Sbz else 375193865Sjamie map_len = round_page_ps(offset + filsz, pagesize) - file_addr; 376192895Sjamie 377192895Sjamie if (map_len != 0) { 378192895Sjamie vm_object_reference(object); 379192895Sjamie 380192895Sjamie /* cow flags: don't dump readonly sections in core */ 381192895Sjamie cow = MAP_COPY_ON_WRITE | MAP_PREFAULT | 382192895Sjamie (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP); 383192895Sjamie 384192895Sjamie rv = __elfN(map_insert)(&vmspace->vm_map, 385192895Sjamie object, 386192895Sjamie file_addr, /* file offset */ 387192895Sjamie map_addr, /* virtual start */ 388192895Sjamie map_addr + map_len,/* virtual end */ 389192895Sjamie prot, 390192895Sjamie cow); 391192895Sjamie if (rv != KERN_SUCCESS) { 392192895Sjamie vm_object_deallocate(object); 393192895Sjamie return (EINVAL); 394192895Sjamie } 395192895Sjamie 396192895Sjamie /* we can stop now if we've covered it all */ 397192895Sjamie if (memsz == filsz) { 398192895Sjamie return (0); 399192895Sjamie } 400192895Sjamie } 401192895Sjamie 402192895Sjamie 403192895Sjamie /* 404192895Sjamie * We have to get the remaining bit of the file into the first part 405192895Sjamie * of the oversized map segment. This is normally because the .data 406192895Sjamie * segment in the file is extended to provide bss. It's a neat idea 407192895Sjamie * to try and save a page, but it's a pain in the behind to implement. 408192895Sjamie */ 409192895Sjamie copy_len = (offset + filsz) - trunc_page_ps(offset + filsz, pagesize); 410192895Sjamie map_addr = trunc_page_ps((vm_offset_t)vmaddr + filsz, pagesize); 411192895Sjamie map_len = round_page_ps((vm_offset_t)vmaddr + memsz, pagesize) - 412192895Sjamie map_addr; 413192895Sjamie 414192895Sjamie /* This had damn well better be true! */ 415192895Sjamie if (map_len != 0) { 416192895Sjamie rv = __elfN(map_insert)(&vmspace->vm_map, NULL, 0, map_addr, 417192895Sjamie map_addr + map_len, VM_PROT_ALL, 0); 418192895Sjamie if (rv != KERN_SUCCESS) { 419191673Sjamie return (EINVAL); 420192895Sjamie } 421192895Sjamie } 422191673Sjamie 423191673Sjamie if (copy_len != 0) { 424192895Sjamie vm_offset_t off; 425192895Sjamie 426192895Sjamie sf = vm_imgact_map_page(object, offset + filsz); 427191673Sjamie if (sf == NULL) 428192895Sjamie return (EIO); 429192895Sjamie 430191673Sjamie /* send the page fragment to user space */ 431192895Sjamie off = trunc_page_ps(offset + filsz, pagesize) - 432192895Sjamie trunc_page(offset + filsz); 433192895Sjamie error = copyout((caddr_t)sf_buf_kva(sf) + off, 434191673Sjamie (caddr_t)map_addr, copy_len); 435192895Sjamie vm_imgact_unmap_page(sf); 436191673Sjamie if (error) { 437191673Sjamie return (error); 438191673Sjamie } 439192895Sjamie } 440191673Sjamie 441192895Sjamie /* 442191673Sjamie * set it to the specified protection. 443191673Sjamie * XXX had better undo the damage from pasting over the cracks here! 444192895Sjamie */ 445192895Sjamie vm_map_protect(&vmspace->vm_map, trunc_page(map_addr), 446192895Sjamie round_page(map_addr + map_len), prot, FALSE); 447192895Sjamie 448192895Sjamie return (0); 449192895Sjamie} 450192895Sjamie 451192895Sjamie/* 452192895Sjamie * Load the file "file" into memory. It may be either a shared object 453192895Sjamie * or an executable. 454192895Sjamie * 455192895Sjamie * The "addr" reference parameter is in/out. On entry, it specifies 456192895Sjamie * the address where a shared object should be loaded. If the file is 457192895Sjamie * an executable, this value is ignored. On exit, "addr" specifies 458192895Sjamie * where the file was actually loaded. 459192895Sjamie * 460192895Sjamie * The "entry" reference parameter is out only. On exit, it specifies 461192895Sjamie * the entry point for the loaded file. 462192895Sjamie */ 463192895Sjamiestatic int 464192895Sjamie__elfN(load_file)(struct proc *p, const char *file, u_long *addr, 465192895Sjamie u_long *entry, size_t pagesize) 466192895Sjamie{ 467192895Sjamie struct { 468192895Sjamie struct nameidata nd; 469192895Sjamie struct vattr attr; 470192895Sjamie struct image_params image_params; 471192895Sjamie } *tempdata; 472192895Sjamie const Elf_Ehdr *hdr = NULL; 473191673Sjamie const Elf_Phdr *phdr = NULL; 474191673Sjamie struct nameidata *nd; 475191673Sjamie struct vmspace *vmspace = p->p_vmspace; 476191673Sjamie struct vattr *attr; 477192895Sjamie struct image_params *imgp; 478192895Sjamie vm_prot_t prot; 479191673Sjamie u_long rbase; 480192895Sjamie u_long base_addr = 0; 481192895Sjamie int vfslocked, error, i, numsegs; 482192895Sjamie 483192895Sjamie if (curthread->td_proc != p) 484192895Sjamie panic("elf_load_file - thread"); /* XXXKSE DIAGNOSTIC */ 485192895Sjamie 486192895Sjamie tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK); 487192895Sjamie nd = &tempdata->nd; 488192895Sjamie attr = &tempdata->attr; 489191673Sjamie imgp = &tempdata->image_params; 490191673Sjamie 491191673Sjamie /* 492191673Sjamie * Initialize part of the common data 493192895Sjamie */ 494192895Sjamie imgp->proc = p; 495185435Sbz imgp->attr = attr; 496185435Sbz imgp->firstpage = NULL; 497192895Sjamie imgp->image_header = NULL; 498192895Sjamie imgp->object = NULL; 499192895Sjamie imgp->execlabel = NULL; 500192895Sjamie 501192895Sjamie /* XXXKSE */ 502192895Sjamie NDINIT(nd, LOOKUP, MPSAFE|LOCKLEAF|FOLLOW, UIO_SYSSPACE, file, 503192895Sjamie curthread); 504192895Sjamie vfslocked = 0; 505192895Sjamie if ((error = namei(nd)) != 0) { 506192895Sjamie nd->ni_vp = NULL; 507192895Sjamie goto fail; 508185435Sbz } 509192895Sjamie vfslocked = NDHASGIANT(nd); 510192895Sjamie NDFREE(nd, NDF_ONLY_PNBUF); 511191673Sjamie imgp->vp = nd->ni_vp; 512191673Sjamie 513191673Sjamie /* 514185435Sbz * Check permissions, modes, uid, etc on the file, and "open" it. 515185435Sbz */ 516192895Sjamie error = exec_check_permissions(imgp); 517191673Sjamie if (error) 518191673Sjamie goto fail; 519191673Sjamie 520191673Sjamie error = exec_map_first_page(imgp); 521191673Sjamie if (error) 522191673Sjamie goto fail; 523191673Sjamie 524191673Sjamie /* 525225617Skmacy * Also make certain that the interpreter stays the same, so set 526185435Sbz * its VV_TEXT flag, too. 527191673Sjamie */ 528191673Sjamie nd->ni_vp->v_vflag |= VV_TEXT; 529191673Sjamie 530191673Sjamie imgp->object = nd->ni_vp->v_object; 531191673Sjamie 532191673Sjamie hdr = (const Elf_Ehdr *)imgp->image_header; 533191673Sjamie if ((error = __elfN(check_header)(hdr)) != 0) 534191673Sjamie goto fail; 535191673Sjamie if (hdr->e_type == ET_DYN) 536191673Sjamie rbase = *addr; 537191673Sjamie else if (hdr->e_type == ET_EXEC) 538191673Sjamie rbase = 0; 539191673Sjamie else { 540191673Sjamie error = ENOEXEC; 541191673Sjamie goto fail; 542191673Sjamie } 543191673Sjamie 544191673Sjamie /* Only support headers that fit within first page for now */ 545191673Sjamie /* (multiplication of two Elf_Half fields will not overflow) */ 546185435Sbz if ((hdr->e_phoff > PAGE_SIZE) || 547190466Sjamie (hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE - hdr->e_phoff) { 548185435Sbz error = ENOEXEC; 549185435Sbz goto fail; 550185435Sbz } 551185435Sbz 552191673Sjamie phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); 553191673Sjamie 554196135Sbz for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) { 555191673Sjamie if (phdr[i].p_type == PT_LOAD) { /* Loadable segment */ 556196835Sjamie prot = 0; 557280632Sian if (phdr[i].p_flags & PF_X) 558192895Sjamie prot |= VM_PROT_EXECUTE; 559196135Sbz if (phdr[i].p_flags & PF_W) 560191673Sjamie prot |= VM_PROT_WRITE; 561192895Sjamie if (phdr[i].p_flags & PF_R) 562193066Sjamie prot |= VM_PROT_READ; 563298833Sjamie 564298833Sjamie if ((error = __elfN(load_section)(vmspace, 565298833Sjamie imgp->object, phdr[i].p_offset, 566231267Smm (caddr_t)(uintptr_t)phdr[i].p_vaddr + rbase, 567195870Sjamie phdr[i].p_memsz, phdr[i].p_filesz, prot, 568280632Sian pagesize)) != 0) 569230129Smm goto fail; 570191673Sjamie /* 571192895Sjamie * Establish the base address if this is the 572191673Sjamie * first segment. 573191673Sjamie */ 574195974Sjamie if (numsegs == 0) 575191673Sjamie base_addr = trunc_page(phdr[i].p_vaddr + 576191673Sjamie rbase); 577195974Sjamie numsegs++; 578191673Sjamie } 579224290Smckusick } 580224290Smckusick *addr = base_addr; 581191673Sjamie *entry = (unsigned long)hdr->e_entry + rbase; 582185435Sbz 583191673Sjamiefail: 584191673Sjamie if (imgp->firstpage) 585191673Sjamie exec_unmap_first_page(imgp); 586191673Sjamie 587191673Sjamie if (nd->ni_vp) 588298833Sjamie vput(nd->ni_vp); 589194762Sjamie 590192895Sjamie VFS_UNLOCK_GIANT(vfslocked); 591191673Sjamie free(tempdata, M_TEMP); 592191673Sjamie 593191673Sjamie return (error); 594185435Sbz} 595191673Sjamie 596191673Sjamiestatic int 597191673Sjamie__CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) 598191673Sjamie{ 599185435Sbz const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header; 600191673Sjamie const Elf_Phdr *phdr; 601191673Sjamie Elf_Auxargs *elf_auxargs = NULL; 602191673Sjamie struct vmspace *vmspace; 603185435Sbz vm_prot_t prot; 604191673Sjamie u_long text_size = 0, data_size = 0, total_size = 0; 605191673Sjamie u_long text_addr = 0, data_addr = 0; 606191673Sjamie u_long seg_size, seg_addr; 607185435Sbz u_long addr, entry = 0, proghdr = 0; 608185435Sbz int error = 0, i; 609185435Sbz const char *interp = NULL; 610185435Sbz Elf_Brandinfo *brand_info; 611185435Sbz char *path; 612185435Sbz struct thread *td = curthread; 613230407Smm struct sysentvec *sv; 614191673Sjamie 615298833Sjamie /* 616298833Sjamie * Do we have a valid ELF header ? 617298833Sjamie * 618298833Sjamie * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later 619298833Sjamie * if particular brand doesn't support it. 620298833Sjamie */ 621298833Sjamie if (__elfN(check_header)(hdr) != 0 || 622191673Sjamie (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN)) 623191673Sjamie return (-1); 624191673Sjamie 625191673Sjamie /* 626191673Sjamie * From here on down, we return an errno, not -1, as we've 627191673Sjamie * detected an ELF file. 628191673Sjamie */ 629191673Sjamie 630191673Sjamie if ((hdr->e_phoff > PAGE_SIZE) || 631191673Sjamie (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) { 632191673Sjamie /* Only support headers in first page for now */ 633191673Sjamie return (ENOEXEC); 634191673Sjamie } 635191673Sjamie phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); 636194762Sjamie for (i = 0; i < hdr->e_phnum; i++) { 637194762Sjamie switch (phdr[i].p_type) { 638194762Sjamie case PT_INTERP: /* Path to interpreter */ 639194762Sjamie if (phdr[i].p_filesz > MAXPATHLEN || 640194762Sjamie phdr[i].p_offset + phdr[i].p_filesz > PAGE_SIZE) 641194762Sjamie return (ENOEXEC); 642194762Sjamie interp = imgp->image_header + phdr[i].p_offset; 643194762Sjamie break; 644194762Sjamie default: 645192895Sjamie break; 646212436Sjamie } 647212436Sjamie } 648212436Sjamie 649192895Sjamie brand_info = __elfN(get_brandinfo)(hdr, interp); 650212436Sjamie if (brand_info == NULL) { 651212436Sjamie uprintf("ELF binary type \"%u\" not known.\n", 652212436Sjamie hdr->e_ident[EI_OSABI]); 653212436Sjamie return (ENOEXEC); 654212436Sjamie } 655192895Sjamie if (hdr->e_type == ET_DYN && 656231267Smm (brand_info->flags & BI_CAN_EXEC_DYN) == 0) { 657231267Smm error = ENOEXEC; 658231267Smm goto fail; 659231267Smm } 660231267Smm sv = brand_info->sysvec; 661231267Smm if (interp != NULL && brand_info->interp_newpath != NULL) 662231267Smm interp = brand_info->interp_newpath; 663231267Smm 664191673Sjamie /* 665192895Sjamie * Avoid a possible deadlock if the current address space is destroyed 666192895Sjamie * and that address space maps the locked vnode. In the common case, 667192895Sjamie * the locked vnode's v_usecount is decremented but remains greater 668192895Sjamie * than zero. Consequently, the vnode lock is not needed by vrele(). 669192895Sjamie * However, in cases where the vnode lock is external, such as nullfs, 670192895Sjamie * v_usecount may become zero. 671192895Sjamie */ 672191673Sjamie VOP_UNLOCK(imgp->vp, 0, td); 673195870Sjamie 674195870Sjamie exec_new_vmspace(imgp, sv); 675195870Sjamie 676195870Sjamie vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY, td); 677195870Sjamie 678195870Sjamie vmspace = imgp->proc->p_vmspace; 679195870Sjamie 680195870Sjamie for (i = 0; i < hdr->e_phnum; i++) { 681195870Sjamie switch (phdr[i].p_type) { 682195870Sjamie case PT_LOAD: /* Loadable segment */ 683195870Sjamie prot = 0; 684195870Sjamie if (phdr[i].p_flags & PF_X) 685195870Sjamie prot |= VM_PROT_EXECUTE; 686195870Sjamie if (phdr[i].p_flags & PF_W) 687195870Sjamie prot |= VM_PROT_WRITE; 688195870Sjamie if (phdr[i].p_flags & PF_R) 689195870Sjamie prot |= VM_PROT_READ; 690195870Sjamie 691195870Sjamie#if defined(__ia64__) && __ELF_WORD_SIZE == 32 && defined(IA32_ME_HARDER) 692195870Sjamie /* 693195870Sjamie * Some x86 binaries assume read == executable, 694195870Sjamie * notably the M3 runtime and therefore cvsup 695195870Sjamie */ 696195870Sjamie if (prot & VM_PROT_READ) 697195870Sjamie prot |= VM_PROT_EXECUTE; 698195870Sjamie#endif 699195870Sjamie 700195870Sjamie if ((error = __elfN(load_section)(vmspace, 701211085Sjamie imgp->object, phdr[i].p_offset, 702211085Sjamie (caddr_t)(uintptr_t)phdr[i].p_vaddr, 703211085Sjamie phdr[i].p_memsz, phdr[i].p_filesz, prot, 704211085Sjamie sv->sv_pagesize)) != 0) 705211085Sjamie return (error); 706211085Sjamie 707194251Sjamie /* 708194251Sjamie * If this segment contains the program headers, 709194251Sjamie * remember their virtual address for the AT_PHDR 710194251Sjamie * aux entry. Static binaries don't usually include 711194251Sjamie * a PT_PHDR entry. 712194251Sjamie */ 713194251Sjamie if (phdr[i].p_offset == 0 && 714195974Sjamie hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize 715195974Sjamie <= phdr[i].p_filesz) 716195974Sjamie proghdr = phdr[i].p_vaddr + hdr->e_phoff; 717195974Sjamie 718195974Sjamie seg_addr = trunc_page(phdr[i].p_vaddr); 719195974Sjamie seg_size = round_page(phdr[i].p_memsz + 720195974Sjamie phdr[i].p_vaddr - seg_addr); 721195974Sjamie 722195974Sjamie /* 723195974Sjamie * Is this .text or .data? We can't use 724195974Sjamie * VM_PROT_WRITE or VM_PROT_EXEC, it breaks the 725195974Sjamie * alpha terribly and possibly does other bad 726195974Sjamie * things so we stick to the old way of figuring 727195974Sjamie * it out: If the segment contains the program 728191673Sjamie * entry point, it's a text segment, otherwise it 729192895Sjamie * is a data segment. 730192895Sjamie * 731192895Sjamie * Note that obreak() assumes that data_addr + 732192895Sjamie * data_size == end of data load area, and the ELF 733192895Sjamie * file format expects segments to be sorted by 734192895Sjamie * address. If multiple data segments exist, the 735192895Sjamie * last one will be used. 736192895Sjamie */ 737191673Sjamie if (hdr->e_entry >= phdr[i].p_vaddr && 738191673Sjamie hdr->e_entry < (phdr[i].p_vaddr + 739191673Sjamie phdr[i].p_memsz)) { 740191673Sjamie text_size = seg_size; 741191673Sjamie text_addr = seg_addr; 742191673Sjamie entry = (u_long)hdr->e_entry; 743191673Sjamie } else { 744191673Sjamie data_size = seg_size; 745191673Sjamie data_addr = seg_addr; 746191673Sjamie } 747191673Sjamie total_size += seg_size; 748191673Sjamie break; 749191673Sjamie case PT_PHDR: /* Program header table info */ 750191673Sjamie proghdr = phdr[i].p_vaddr; 751191673Sjamie break; 752191673Sjamie default: 753191673Sjamie break; 754191673Sjamie } 755191673Sjamie } 756191673Sjamie 757191673Sjamie if (data_addr == 0 && data_size == 0) { 758191673Sjamie data_addr = text_addr; 759193066Sjamie data_size = text_size; 760193066Sjamie } 761191673Sjamie 762191673Sjamie /* 763191673Sjamie * Check limits. It should be safe to check the 764191673Sjamie * limits after loading the segments since we do 765191673Sjamie * not actually fault in all the segments pages. 766191673Sjamie */ 767191673Sjamie PROC_LOCK(imgp->proc); 768191673Sjamie if (data_size > lim_cur(imgp->proc, RLIMIT_DATA) || 769191673Sjamie text_size > maxtsiz || 770191673Sjamie total_size > lim_cur(imgp->proc, RLIMIT_VMEM)) { 771193066Sjamie PROC_UNLOCK(imgp->proc); 772193066Sjamie return (ENOMEM); 773193066Sjamie } 774193066Sjamie 775193066Sjamie vmspace->vm_tsize = text_size >> PAGE_SHIFT; 776193066Sjamie vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr; 777193066Sjamie vmspace->vm_dsize = data_size >> PAGE_SHIFT; 778193066Sjamie vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr; 779193066Sjamie 780193066Sjamie /* 781193066Sjamie * We load the dynamic linker where a userland call 782193066Sjamie * to mmap(0, ...) would put it. The rationale behind this 783193066Sjamie * calculation is that it leaves room for the heap to grow to 784193066Sjamie * its maximum allowed size. 785193066Sjamie */ 786193066Sjamie addr = round_page((vm_offset_t)imgp->proc->p_vmspace->vm_daddr + 787193066Sjamie lim_max(imgp->proc, RLIMIT_DATA)); 788193066Sjamie PROC_UNLOCK(imgp->proc); 789193066Sjamie 790193066Sjamie imgp->entry_addr = entry; 791193066Sjamie 792193066Sjamie imgp->proc->p_sysent = sv; 793193066Sjamie if (interp != NULL) { 794193066Sjamie VOP_UNLOCK(imgp->vp, 0, td); 795193066Sjamie if (brand_info->emul_path != NULL && 796193066Sjamie brand_info->emul_path[0] != '\0') { 797193066Sjamie path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 798193066Sjamie snprintf(path, MAXPATHLEN, "%s%s", 799193066Sjamie brand_info->emul_path, interp); 800193066Sjamie error = __elfN(load_file)(imgp->proc, path, &addr, 801193066Sjamie &imgp->entry_addr, sv->sv_pagesize); 802193066Sjamie free(path, M_TEMP); 803193066Sjamie if (error == 0) 804193066Sjamie interp = NULL; 805193066Sjamie } 806193066Sjamie if (interp != NULL) { 807205014Snwhitehorn error = __elfN(load_file)(imgp->proc, interp, &addr, 808217896Sdchagin &imgp->entry_addr, sv->sv_pagesize); 809193066Sjamie } 810193066Sjamie vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY, td); 811193066Sjamie if (error != 0) { 812193066Sjamie uprintf("ELF interpreter %s not found\n", interp); 813193066Sjamie return (error); 814193066Sjamie } 815193066Sjamie } 816193066Sjamie 817193066Sjamie /* 818193066Sjamie * Construct auxargs table (used by the fixup routine) 819193066Sjamie */ 820193066Sjamie elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK); 821193066Sjamie elf_auxargs->execfd = -1; 822193066Sjamie elf_auxargs->phdr = proghdr; 823193066Sjamie elf_auxargs->phent = hdr->e_phentsize; 824193066Sjamie elf_auxargs->phnum = hdr->e_phnum; 825193066Sjamie elf_auxargs->pagesz = PAGE_SIZE; 826185435Sbz elf_auxargs->base = addr; 827191673Sjamie elf_auxargs->flags = 0; 828191673Sjamie elf_auxargs->entry = entry; 829277279Sjamie elf_auxargs->trace = elf_trace; 830191673Sjamie 831191673Sjamie imgp->auxargs = elf_auxargs; 832191673Sjamie imgp->interpreted = 0; 833191673Sjamie 834191673Sjamie return (error); 835192895Sjamie} 836195870Sjamie 837195870Sjamie#define suword __CONCAT(suword, __ELF_WORD_SIZE) 838195870Sjamie 839195870Sjamieint 840195870Sjamie__elfN(freebsd_fixup)(register_t **stack_base, struct image_params *imgp) 841192895Sjamie{ 842192895Sjamie Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs; 843185435Sbz Elf_Addr *base; 844192895Sjamie Elf_Addr *pos; 845192895Sjamie 846185435Sbz base = (Elf_Addr *)*stack_base; 847195974Sjamie pos = base + (imgp->args->argc + imgp->args->envc + 2); 848192895Sjamie 849192895Sjamie if (args->trace) { 850192895Sjamie AUXARGS_ENTRY(pos, AT_DEBUG, 1); 851192895Sjamie } 852192895Sjamie if (args->execfd != -1) { 853202116Sbz AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 854202116Sbz } 855202116Sbz AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 856192895Sjamie AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 857192895Sjamie AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 858192895Sjamie AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 859192895Sjamie AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 860192895Sjamie AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 861192895Sjamie AUXARGS_ENTRY(pos, AT_BASE, args->base); 862192895Sjamie AUXARGS_ENTRY(pos, AT_NULL, 0); 863192895Sjamie 864192895Sjamie free(imgp->auxargs, M_TEMP); 865192895Sjamie imgp->auxargs = NULL; 866192895Sjamie 867192895Sjamie base--; 868192895Sjamie suword(base, (long)imgp->args->argc); 869192895Sjamie *stack_base = (register_t *)base; 870192895Sjamie return (0); 871192895Sjamie} 872192895Sjamie 873192895Sjamie/* 874192895Sjamie * Code for generating ELF core dumps. 875192895Sjamie */ 876192895Sjamie 877192895Sjamietypedef void (*segment_callback)(vm_map_entry_t, void *); 878192895Sjamie 879192895Sjamie/* Closure for cb_put_phdr(). */ 880185435Sbzstruct phdr_closure { 881191673Sjamie Elf_Phdr *phdr; /* Program header to fill in */ 882191673Sjamie Elf_Off offset; /* Offset of segment in core file */ 883185435Sbz}; 884185435Sbz 885191673Sjamie/* Closure for cb_size_segment(). */ 886191673Sjamiestruct sseg_closure { 887277279Sjamie int count; /* Count of writable segments. */ 888191673Sjamie size_t size; /* Total size of all writable segments. */ 889191673Sjamie}; 890191673Sjamie 891191673Sjamiestatic void cb_put_phdr(vm_map_entry_t, void *); 892191673Sjamiestatic void cb_size_segment(vm_map_entry_t, void *); 893192895Sjamiestatic void each_writable_segment(struct thread *, segment_callback, void *); 894195870Sjamiestatic int __elfN(corehdr)(struct thread *, struct vnode *, struct ucred *, 895195870Sjamie int, void *, size_t); 896195870Sjamiestatic void __elfN(puthdr)(struct thread *, void *, size_t *, int); 897195870Sjamiestatic void __elfN(putnote)(void *, size_t *, const char *, int, 898195870Sjamie const void *, size_t); 899192895Sjamie 900192895Sjamieextern int osreldate; 901185435Sbz 902192895Sjamieint 903192895Sjamie__elfN(coredump)(td, vp, limit) 904185435Sbz struct thread *td; 905195974Sjamie struct vnode *vp; 906192895Sjamie off_t limit; 907192895Sjamie{ 908192895Sjamie struct ucred *cred = td->td_ucred; 909192895Sjamie int error = 0; 910192895Sjamie struct sseg_closure seginfo; 911192895Sjamie void *hdr; 912192895Sjamie size_t hdrsize; 913192895Sjamie 914192895Sjamie /* Size the program segments. */ 915192895Sjamie seginfo.count = 0; 916192895Sjamie seginfo.size = 0; 917192895Sjamie each_writable_segment(td, cb_size_segment, &seginfo); 918192895Sjamie 919192895Sjamie /* 920192895Sjamie * Calculate the size of the core file header area by making 921192895Sjamie * a dry run of generating it. Nothing is written, but the 922185435Sbz * size is calculated. 923191673Sjamie */ 924185435Sbz hdrsize = 0; 925185435Sbz __elfN(puthdr)(td, (void *)NULL, &hdrsize, seginfo.count); 926195945Sjamie 927195945Sjamie if (hdrsize + seginfo.size >= limit) 928195945Sjamie return (EFAULT); 929195945Sjamie 930195945Sjamie /* 931195945Sjamie * Allocate memory for building the header, fill it up, 932195945Sjamie * and write it out. 933195945Sjamie */ 934195945Sjamie hdr = malloc(hdrsize, M_TEMP, M_WAITOK); 935230143Smm if (hdr == NULL) { 936191673Sjamie return (EINVAL); 937191673Sjamie } 938191673Sjamie error = __elfN(corehdr)(td, vp, cred, seginfo.count, hdr, hdrsize); 939191673Sjamie 940191673Sjamie /* Write the contents of all of the writable segments. */ 941191673Sjamie if (error == 0) { 942191673Sjamie Elf_Phdr *php; 943191673Sjamie off_t offset; 944191673Sjamie int i; 945191673Sjamie 946191673Sjamie php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1; 947191673Sjamie offset = hdrsize; 948191673Sjamie for (i = 0; i < seginfo.count; i++) { 949191673Sjamie error = vn_rdwr_inchunks(UIO_WRITE, vp, 950191673Sjamie (caddr_t)(uintptr_t)php->p_vaddr, 951191673Sjamie php->p_filesz, offset, UIO_USERSPACE, 952191673Sjamie IO_UNIT | IO_DIRECT, cred, NOCRED, NULL, 953241896Skib curthread); /* XXXKSE */ 954230129Smm if (error != 0) 955230129Smm break; 956230129Smm offset += php->p_filesz; 957230129Smm php++; 958230129Smm } 959230129Smm } 960230407Smm free(hdr, M_TEMP); 961230407Smm 962230407Smm return (error); 963230407Smm} 964230407Smm 965230407Smm/* 966230129Smm * A callback for each_writable_segment() to write out the segment's 967230129Smm * program header entry. 968230129Smm */ 969230129Smmstatic void 970230407Smmcb_put_phdr(entry, closure) 971230129Smm vm_map_entry_t entry; 972230129Smm void *closure; 973230129Smm{ 974230129Smm struct phdr_closure *phc = (struct phdr_closure *)closure; 975230129Smm Elf_Phdr *phdr = phc->phdr; 976230129Smm 977230129Smm phc->offset = round_page(phc->offset); 978230129Smm 979230129Smm phdr->p_type = PT_LOAD; 980230129Smm phdr->p_offset = phc->offset; 981192895Sjamie phdr->p_vaddr = entry->start; 982192895Sjamie phdr->p_paddr = 0; 983192895Sjamie phdr->p_filesz = phdr->p_memsz = entry->end - entry->start; 984192895Sjamie phdr->p_align = PAGE_SIZE; 985192895Sjamie phdr->p_flags = 0; 986192895Sjamie if (entry->protection & VM_PROT_READ) 987191673Sjamie phdr->p_flags |= PF_R; 988191673Sjamie if (entry->protection & VM_PROT_WRITE) 989185435Sbz phdr->p_flags |= PF_W; 990280632Sian if (entry->protection & VM_PROT_EXECUTE) 991280632Sian phdr->p_flags |= PF_X; 992280632Sian 993280632Sian phc->offset += phdr->p_filesz; 994280632Sian phc->phdr++; 995280632Sian} 996280632Sian 997280632Sian/* 998280632Sian * A callback for each_writable_segment() to gather information about 999280632Sian * the number of segments and their total size. 1000280632Sian */ 1001280632Sianstatic void 1002280632Siancb_size_segment(entry, closure) 1003280632Sian vm_map_entry_t entry; 1004280632Sian void *closure; 1005280632Sian{ 1006280632Sian struct sseg_closure *ssc = (struct sseg_closure *)closure; 1007280632Sian 1008280632Sian ssc->count++; 1009280632Sian ssc->size += entry->end - entry->start; 1010280632Sian} 1011280632Sian 1012280632Sian/* 1013280632Sian * For each writable segment in the process's memory map, call the given 1014280632Sian * function with a pointer to the map entry and some arbitrary 1015280632Sian * caller-supplied data. 1016280632Sian */ 1017280632Sianstatic void 1018280632Sianeach_writable_segment(td, func, closure) 1019280632Sian struct thread *td; 1020280632Sian segment_callback func; 1021280632Sian void *closure; 1022280632Sian{ 1023280632Sian struct proc *p = td->td_proc; 1024280632Sian vm_map_t map = &p->p_vmspace->vm_map; 1025280632Sian vm_map_entry_t entry; 1026280632Sian 1027280632Sian for (entry = map->header.next; entry != &map->header; 1028280632Sian entry = entry->next) { 1029280632Sian vm_object_t obj; 1030191673Sjamie 1031298833Sjamie /* 1032191673Sjamie * Don't dump inaccessible mappings, deal with legacy 1033185435Sbz * coredump mode. 1034191673Sjamie * 1035298833Sjamie * Note that read-only segments related to the elf binary 1036196835Sjamie * are marked MAP_ENTRY_NOCOREDUMP now so we no longer 1037196835Sjamie * need to arbitrarily ignore such segments. 1038196835Sjamie */ 1039196835Sjamie if (elf_legacy_coredump) { 1040196835Sjamie if ((entry->protection & VM_PROT_RW) != VM_PROT_RW) 1041196835Sjamie continue; 1042298833Sjamie } else { 1043191673Sjamie if ((entry->protection & VM_PROT_ALL) == 0) 1044192895Sjamie continue; 1045192895Sjamie } 1046192895Sjamie 1047192895Sjamie /* 1048192895Sjamie * Dont include memory segment in the coredump if 1049192895Sjamie * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in 1050192895Sjamie * madvise(2). Do not dump submaps (i.e. parts of the 1051191673Sjamie * kernel map). 1052191673Sjamie */ 1053191673Sjamie if (entry->eflags & (MAP_ENTRY_NOCOREDUMP|MAP_ENTRY_IS_SUB_MAP)) 1054191673Sjamie continue; 1055191673Sjamie 1056191673Sjamie if ((obj = entry->object.vm_object) == NULL) 1057191673Sjamie continue; 1058192895Sjamie 1059191673Sjamie /* Find the deepest backing object. */ 1060191673Sjamie while (obj->backing_object != NULL) 1061191673Sjamie obj = obj->backing_object; 1062191673Sjamie 1063191673Sjamie /* Ignore memory-mapped devices and such things. */ 1064191673Sjamie if (obj->type != OBJT_DEFAULT && 1065191673Sjamie obj->type != OBJT_SWAP && 1066191673Sjamie obj->type != OBJT_VNODE) 1067192895Sjamie continue; 1068192895Sjamie 1069192895Sjamie (*func)(entry, closure); 1070192895Sjamie } 1071191673Sjamie} 1072191673Sjamie 1073191673Sjamie/* 1074191673Sjamie * Write the core file header to the file, including padding up to 1075191673Sjamie * the page boundary. 1076191673Sjamie */ 1077191673Sjamiestatic int 1078191673Sjamie__elfN(corehdr)(td, vp, cred, numsegs, hdr, hdrsize) 1079191673Sjamie struct thread *td; 1080191673Sjamie struct vnode *vp; 1081191673Sjamie struct ucred *cred; 1082191673Sjamie int numsegs; 1083191673Sjamie size_t hdrsize; 1084191673Sjamie void *hdr; 1085191673Sjamie{ 1086191673Sjamie size_t off; 1087191673Sjamie 1088192895Sjamie /* Fill in the header. */ 1089191673Sjamie bzero(hdr, hdrsize); 1090191673Sjamie off = 0; 1091191673Sjamie __elfN(puthdr)(td, hdr, &off, numsegs); 1092191673Sjamie 1093191673Sjamie /* Write it to the core file. */ 1094191673Sjamie return (vn_rdwr_inchunks(UIO_WRITE, vp, hdr, hdrsize, (off_t)0, 1095191673Sjamie UIO_SYSSPACE, IO_UNIT | IO_DIRECT, cred, NOCRED, NULL, 1096191673Sjamie td)); /* XXXKSE */ 1097191673Sjamie} 1098191673Sjamie 1099191673Sjamie#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32 1100191673Sjamietypedef struct prstatus32 elf_prstatus_t; 1101191673Sjamietypedef struct prpsinfo32 elf_prpsinfo_t; 1102191673Sjamietypedef struct fpreg32 elf_prfpregset_t; 1103191673Sjamietypedef struct fpreg32 elf_fpregset_t; 1104191673Sjamietypedef struct reg32 elf_gregset_t; 1105191673Sjamie#else 1106191673Sjamietypedef prstatus_t elf_prstatus_t; 1107191673Sjamietypedef prpsinfo_t elf_prpsinfo_t; 1108298833Sjamietypedef prfpregset_t elf_prfpregset_t; 1109191673Sjamietypedef prfpregset_t elf_fpregset_t; 1110196835Sjamietypedef gregset_t elf_gregset_t; 1111196835Sjamie#endif 1112196835Sjamie 1113196835Sjamiestatic void 1114192895Sjamie__elfN(puthdr)(struct thread *td, void *dst, size_t *off, int numsegs) 1115192895Sjamie{ 1116192895Sjamie struct { 1117192895Sjamie elf_prstatus_t status; 1118192895Sjamie elf_prfpregset_t fpregset; 1119192895Sjamie elf_prpsinfo_t psinfo; 1120196835Sjamie } *tempdata; 1121196835Sjamie elf_prstatus_t *status; 1122192895Sjamie elf_prfpregset_t *fpregset; 1123192895Sjamie elf_prpsinfo_t *psinfo; 1124192895Sjamie struct proc *p; 1125192895Sjamie struct thread *thr; 1126192895Sjamie size_t ehoff, noteoff, notesz, phoff; 1127192895Sjamie 1128192895Sjamie p = td->td_proc; 1129298833Sjamie 1130192895Sjamie ehoff = *off; 1131192895Sjamie *off += sizeof(Elf_Ehdr); 1132192895Sjamie 1133192895Sjamie phoff = *off; 1134192895Sjamie *off += (numsegs + 1) * sizeof(Elf_Phdr); 1135192895Sjamie 1136192895Sjamie noteoff = *off; 1137192895Sjamie /* 1138298833Sjamie * Don't allocate space for the notes if we're just calculating 1139192895Sjamie * the size of the header. We also don't collect the data. 1140298833Sjamie */ 1141192895Sjamie if (dst != NULL) { 1142298833Sjamie tempdata = malloc(sizeof(*tempdata), M_TEMP, M_ZERO|M_WAITOK); 1143298833Sjamie status = &tempdata->status; 1144192895Sjamie fpregset = &tempdata->fpregset; 1145192895Sjamie psinfo = &tempdata->psinfo; 1146191673Sjamie } else { 1147192895Sjamie tempdata = NULL; 1148191673Sjamie status = NULL; 1149298833Sjamie fpregset = NULL; 1150191673Sjamie psinfo = NULL; 1151191673Sjamie } 1152191673Sjamie 1153191673Sjamie if (dst != NULL) { 1154191673Sjamie psinfo->pr_version = PRPSINFO_VERSION; 1155191673Sjamie psinfo->pr_psinfosz = sizeof(elf_prpsinfo_t); 1156191673Sjamie strlcpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname)); 1157191673Sjamie /* 1158191673Sjamie * XXX - We don't fill in the command line arguments properly 1159191673Sjamie * yet. 1160191673Sjamie */ 1161191673Sjamie strlcpy(psinfo->pr_psargs, p->p_comm, 1162191673Sjamie sizeof(psinfo->pr_psargs)); 1163191673Sjamie } 1164191673Sjamie __elfN(putnote)(dst, off, "FreeBSD", NT_PRPSINFO, psinfo, 1165191673Sjamie sizeof *psinfo); 1166191673Sjamie 1167191673Sjamie /* 1168191673Sjamie * To have the debugger select the right thread (LWP) as the initial 1169192895Sjamie * thread, we dump the state of the thread passed to us in td first. 1170191673Sjamie * This is the thread that causes the core dump and thus likely to 1171191673Sjamie * be the right thread one wants to have selected in the debugger. 1172191673Sjamie */ 1173191673Sjamie thr = td; 1174191673Sjamie while (thr != NULL) { 1175191673Sjamie if (dst != NULL) { 1176191673Sjamie status->pr_version = PRSTATUS_VERSION; 1177191673Sjamie status->pr_statussz = sizeof(elf_prstatus_t); 1178191673Sjamie status->pr_gregsetsz = sizeof(elf_gregset_t); 1179191673Sjamie status->pr_fpregsetsz = sizeof(elf_fpregset_t); 1180191673Sjamie status->pr_osreldate = osreldate; 1181191673Sjamie status->pr_cursig = p->p_sig; 1182191673Sjamie status->pr_pid = thr->td_tid; 1183191673Sjamie#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32 1184191673Sjamie fill_regs32(thr, &status->pr_reg); 1185191673Sjamie fill_fpregs32(thr, fpregset); 1186191673Sjamie#else 1187191673Sjamie fill_regs(thr, &status->pr_reg); 1188191673Sjamie fill_fpregs(thr, fpregset); 1189191673Sjamie#endif 1190191673Sjamie } 1191191673Sjamie __elfN(putnote)(dst, off, "FreeBSD", NT_PRSTATUS, status, 1192191673Sjamie sizeof *status); 1193191673Sjamie __elfN(putnote)(dst, off, "FreeBSD", NT_FPREGSET, fpregset, 1194191673Sjamie sizeof *fpregset); 1195191673Sjamie /* 1196191673Sjamie * Allow for MD specific notes, as well as any MD 1197191673Sjamie * specific preparations for writing MI notes. 1198191673Sjamie */ 1199191673Sjamie __elfN(dump_thread)(thr, dst, off); 1200191673Sjamie 1201191673Sjamie thr = (thr == td) ? TAILQ_FIRST(&p->p_threads) : 1202191673Sjamie TAILQ_NEXT(thr, td_plist); 1203191673Sjamie if (thr == td) 1204191673Sjamie thr = TAILQ_NEXT(thr, td_plist); 1205191673Sjamie } 1206191673Sjamie 1207191673Sjamie notesz = *off - noteoff; 1208191673Sjamie 1209191673Sjamie if (dst != NULL) 1210191673Sjamie free(tempdata, M_TEMP); 1211191673Sjamie 1212185435Sbz /* Align up to a page boundary for the program segments. */ 1213191673Sjamie *off = round_page(*off); 1214191673Sjamie 1215194762Sjamie if (dst != NULL) { 1216194762Sjamie Elf_Ehdr *ehdr; 1217194762Sjamie Elf_Phdr *phdr; 1218194762Sjamie struct phdr_closure phc; 1219194762Sjamie 1220194762Sjamie /* 1221191673Sjamie * Fill in the ELF header. 1222192895Sjamie */ 1223298832Sjamie ehdr = (Elf_Ehdr *)((char *)dst + ehoff); 1224192895Sjamie ehdr->e_ident[EI_MAG0] = ELFMAG0; 1225192895Sjamie ehdr->e_ident[EI_MAG1] = ELFMAG1; 1226298833Sjamie ehdr->e_ident[EI_MAG2] = ELFMAG2; 1227298833Sjamie ehdr->e_ident[EI_MAG3] = ELFMAG3; 1228192895Sjamie ehdr->e_ident[EI_CLASS] = ELF_CLASS; 1229192895Sjamie ehdr->e_ident[EI_DATA] = ELF_DATA; 1230192895Sjamie ehdr->e_ident[EI_VERSION] = EV_CURRENT; 1231192895Sjamie ehdr->e_ident[EI_OSABI] = ELFOSABI_FREEBSD; 1232192895Sjamie ehdr->e_ident[EI_ABIVERSION] = 0; 1233191673Sjamie ehdr->e_ident[EI_PAD] = 0; 1234191673Sjamie ehdr->e_type = ET_CORE; 1235191673Sjamie#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32 1236191673Sjamie ehdr->e_machine = EM_386; 1237191673Sjamie#else 1238191673Sjamie ehdr->e_machine = ELF_ARCH; 1239191673Sjamie#endif 1240191673Sjamie ehdr->e_version = EV_CURRENT; 1241191673Sjamie ehdr->e_entry = 0; 1242191673Sjamie ehdr->e_phoff = phoff; 1243191673Sjamie ehdr->e_flags = 0; 1244191673Sjamie ehdr->e_ehsize = sizeof(Elf_Ehdr); 1245191673Sjamie ehdr->e_phentsize = sizeof(Elf_Phdr); 1246191673Sjamie ehdr->e_phnum = numsegs + 1; 1247191673Sjamie ehdr->e_shentsize = sizeof(Elf_Shdr); 1248191673Sjamie ehdr->e_shnum = 0; 1249191673Sjamie ehdr->e_shstrndx = SHN_UNDEF; 1250191673Sjamie 1251191673Sjamie /* 1252192895Sjamie * Fill in the program header entries. 1253192895Sjamie */ 1254192895Sjamie phdr = (Elf_Phdr *)((char *)dst + phoff); 1255191673Sjamie 1256191673Sjamie /* The note segement. */ 1257191673Sjamie phdr->p_type = PT_NOTE; 1258191673Sjamie phdr->p_offset = noteoff; 1259191673Sjamie phdr->p_vaddr = 0; 1260191673Sjamie phdr->p_paddr = 0; 1261191673Sjamie phdr->p_filesz = notesz; 1262191673Sjamie phdr->p_memsz = 0; 1263191673Sjamie phdr->p_flags = 0; 1264191673Sjamie phdr->p_align = 0; 1265191673Sjamie phdr++; 1266191673Sjamie 1267191673Sjamie /* All the writable segments from the program. */ 1268191673Sjamie phc.phdr = phdr; 1269191673Sjamie phc.offset = *off; 1270191673Sjamie each_writable_segment(td, cb_put_phdr, &phc); 1271191673Sjamie } 1272191673Sjamie} 1273192895Sjamie 1274192895Sjamiestatic void 1275194762Sjamie__elfN(putnote)(void *dst, size_t *off, const char *name, int type, 1276185435Sbz const void *desc, size_t descsz) 1277192895Sjamie{ 1278191673Sjamie Elf_Note note; 1279192895Sjamie 1280192895Sjamie note.n_namesz = strlen(name) + 1; 1281298833Sjamie note.n_descsz = descsz; 1282298833Sjamie note.n_type = type; 1283191673Sjamie if (dst != NULL) 1284191673Sjamie bcopy(¬e, (char *)dst + *off, sizeof note); 1285192895Sjamie *off += sizeof note; 1286191673Sjamie if (dst != NULL) 1287191673Sjamie bcopy(name, (char *)dst + *off, note.n_namesz); 1288195944Sjamie *off += roundup2(note.n_namesz, sizeof(Elf_Size)); 1289195944Sjamie if (dst != NULL) 1290195945Sjamie bcopy(desc, (char *)dst + *off, note.n_descsz); 1291195945Sjamie *off += roundup2(note.n_descsz, sizeof(Elf_Size)); 1292195945Sjamie} 1293195945Sjamie 1294195945Sjamie/* 1295192895Sjamie * Tell kern_execve.c about it, with a little help from the linker. 1296195974Sjamie */ 1297195974Sjamiestatic struct execsw __elfN(execsw) = { 1298195974Sjamie __CONCAT(exec_, __elfN(imgact)), 1299195974Sjamie __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) 1300195974Sjamie}; 1301195974SjamieEXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw)); 1302195974Sjamie