imgact_elf.c revision 153741
1139804Simp/*-
2185435Sbz * Copyright (c) 2000 David O'Brien
3185435Sbz * Copyright (c) 1995-1996 S�ren Schmidt
4191673Sjamie * Copyright (c) 1996 Peter Wemm
5185435Sbz * All rights reserved.
6190466Sjamie *
7185404Sbz * Redistribution and use in source and binary forms, with or without
8185404Sbz * modification, are permitted provided that the following conditions
9185404Sbz * are met:
10185404Sbz * 1. Redistributions of source code must retain the above copyright
11185404Sbz *    notice, this list of conditions and the following disclaimer
12185404Sbz *    in this position and unchanged.
13185404Sbz * 2. Redistributions in binary form must reproduce the above copyright
14185404Sbz *    notice, this list of conditions and the following disclaimer in the
15185404Sbz *    documentation and/or other materials provided with the distribution.
16185404Sbz * 3. The name of the author may not be used to endorse or promote products
17185404Sbz *    derived from this software without specific prior written permission
18185404Sbz *
19185404Sbz * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20185404Sbz * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21185404Sbz * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22185404Sbz * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23185404Sbz * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24185404Sbz * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25185404Sbz * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26185404Sbz * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2746197Sphk * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2846155Sphk * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29116182Sobrien */
30116182Sobrien
31116182Sobrien#include <sys/cdefs.h>
32193066Sjamie__FBSDID("$FreeBSD: head/sys/kern/imgact_elf.c 153741 2005-12-26 21:23:57Z sobomax $");
33185435Sbz
34185435Sbz#include "opt_compat.h"
35185435Sbz
36131177Spjd#include <sys/param.h>
3746155Sphk#include <sys/exec.h>
3846155Sphk#include <sys/fcntl.h>
3946155Sphk#include <sys/imgact.h>
4046155Sphk#include <sys/imgact_elf.h>
4146155Sphk#include <sys/kernel.h>
4246155Sphk#include <sys/lock.h>
4346155Sphk#include <sys/malloc.h>
44192895Sjamie#include <sys/mount.h>
45164032Srwatson#include <sys/mutex.h>
4646155Sphk#include <sys/mman.h>
47124882Srwatson#include <sys/namei.h>
48177785Skib#include <sys/pioctl.h>
4946155Sphk#include <sys/proc.h>
5087275Srwatson#include <sys/procfs.h>
5187275Srwatson#include <sys/resourcevar.h>
52220137Strasz#include <sys/sf_buf.h>
53221362Strasz#include <sys/systm.h>
54168401Spjd#include <sys/signalvar.h>
55193066Sjamie#include <sys/stat.h>
56113275Smike#include <sys/sx.h>
57147185Spjd#include <sys/syscall.h>
58113275Smike#include <sys/sysctl.h>
5946155Sphk#include <sys/sysent.h>
60113275Smike#include <sys/vnode.h>
6157163Srwatson
62113275Smike#include <vm/vm.h>
63196019Srwatson#include <vm/vm_kern.h>
6446155Sphk#include <vm/vm_param.h>
65196019Srwatson#include <vm/pmap.h>
66196019Srwatson#include <vm/vm_map.h>
6746155Sphk#include <vm/vm_object.h>
68196019Srwatson#include <vm/vm_extern.h>
69185435Sbz
70185435Sbz#include <machine/elf.h>
71185435Sbz#include <machine/md_var.h>
72185435Sbz
73185435Sbz#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
74185435Sbz#include <machine/fpu.h>
7546155Sphk#include <compat/ia32/ia32_reg.h>
76163606Srwatson#endif
77163606Srwatson
78195944Sjamie#define OLD_EI_BRAND	8
79195944Sjamie
8046155Sphkstatic int __elfN(check_header)(const Elf_Ehdr *hdr);
81227293Sedstatic Elf_Brandinfo *__elfN(get_brandinfo)(const Elf_Ehdr *hdr,
8246155Sphk    const char *interp);
83202468Sbzstatic int __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
84202468Sbz    u_long *entry, size_t pagesize);
85202468Sbzstatic int __elfN(load_section)(struct vmspace *vmspace, vm_object_t object,
86202468Sbz    vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz,
87202468Sbz    vm_prot_t prot, size_t pagesize);
88202468Sbzstatic int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp);
89202468Sbz
90202468SbzSYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE), CTLFLAG_RW, 0,
91202468Sbz    "");
92202468Sbz
93202468Sbzint __elfN(fallback_brand) = -1;
94202468SbzSYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
95202468Sbz    fallback_brand, CTLFLAG_RW, &__elfN(fallback_brand), 0,
96202468Sbz    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort");
97202468SbzTUNABLE_INT("kern.elf" __XSTRING(__ELF_WORD_SIZE) ".fallback_brand",
98192895Sjamie    &__elfN(fallback_brand));
99192895Sjamie
100192895Sjamiestatic int elf_trace = 0;
101192895SjamieSYSCTL_INT(_debug, OID_AUTO, __elfN(trace), CTLFLAG_RW, &elf_trace, 0, "");
102192895Sjamie
103192895Sjamiestatic int elf_legacy_coredump = 0;
104192895SjamieSYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW,
105192895Sjamie    &elf_legacy_coredump, 0, "");
106231267Smm
107194762Sjamiestatic Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
108195944Sjamie
109201145Santoineint
110196176Sbz__elfN(insert_brand_entry)(Elf_Brandinfo *entry)
111202468Sbz{
112196176Sbz	int i;
113202468Sbz
114196176Sbz	for (i = 0; i < MAX_BRANDS; i++) {
115192895Sjamie		if (elf_brand_list[i] == NULL) {
116192895Sjamie			elf_brand_list[i] = entry;
117192895Sjamie			break;
11857163Srwatson		}
119221362Strasz	}
120168401Spjd	if (i == MAX_BRANDS)
121191673Sjamie		return (-1);
122191673Sjamie	return (0);
123221362Strasz}
124179881Sdelphij
125113275Smikeint
126191673Sjamie__elfN(remove_brand_entry)(Elf_Brandinfo *entry)
127190466Sjamie{
128191673Sjamie	int i;
129192895Sjamie
130192895Sjamie	for (i = 0; i < MAX_BRANDS; i++) {
131221362Strasz		if (elf_brand_list[i] == entry) {
132221362Strasz			elf_brand_list[i] = NULL;
133232598Strasz			break;
134221362Strasz		}
135221362Strasz	}
136185435Sbz	if (i == MAX_BRANDS)
137190466Sjamie		return (-1);
138192895Sjamie	return (0);
139185435Sbz}
140185435Sbz
141190466Sjamieint
142192895Sjamie__elfN(brand_inuse)(Elf_Brandinfo *entry)
143185435Sbz{
144113275Smike	struct proc *p;
145191673Sjamie	int rval = FALSE;
146191673Sjamie
147191673Sjamie	sx_slock(&allproc_lock);
148191673Sjamie	LIST_FOREACH(p, &allproc, p_list) {
149191673Sjamie		if (p->p_sysent == entry->sysvec) {
150191673Sjamie			rval = TRUE;
151113275Smike			break;
152192895Sjamie		}
153216861Sbz	}
154216861Sbz	sx_sunlock(&allproc_lock);
155216861Sbz
156192895Sjamie	return (rval);
157192895Sjamie}
158192895Sjamie
159202468Sbzstatic Elf_Brandinfo *
160202468Sbz__elfN(get_brandinfo)(const Elf_Ehdr *hdr, const char *interp)
161202468Sbz{
162202468Sbz	Elf_Brandinfo *bi;
163202468Sbz	int i;
164202468Sbz
165192895Sjamie	/*
166216861Sbz	 * We support three types of branding -- (1) the ELF EI_OSABI field
167192895Sjamie	 * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string
168192895Sjamie	 * branding w/in the ELF header, and (3) path of the `interp_path'
169192895Sjamie	 * field.  We should also look for an ".note.ABI-tag" ELF section now
170202468Sbz	 * in all Linux ELF binaries, FreeBSD 4.1+, and some NetBSD ones.
171202468Sbz	 */
172202468Sbz
173202468Sbz	/* If the executable has a brand, search for it in the brand list. */
174202468Sbz	for (i = 0; i < MAX_BRANDS; i++) {
175202468Sbz		bi = elf_brand_list[i];
176195870Sjamie		if (bi != NULL && hdr->e_machine == bi->machine &&
177216861Sbz		    (hdr->e_ident[EI_OSABI] == bi->brand ||
178195870Sjamie		    strncmp((const char *)&hdr->e_ident[OLD_EI_BRAND],
179195870Sjamie		    bi->compat_3_brand, strlen(bi->compat_3_brand)) == 0))
180195870Sjamie			return (bi);
181195870Sjamie	}
182195870Sjamie
183195870Sjamie	/* Lacking a known brand, search for a recognized interpreter. */
184195870Sjamie	if (interp != NULL) {
185195870Sjamie		for (i = 0; i < MAX_BRANDS; i++) {
186195870Sjamie			bi = elf_brand_list[i];
187195870Sjamie			if (bi != NULL && hdr->e_machine == bi->machine &&
188192895Sjamie			    strcmp(interp, bi->interp_path) == 0)
189195870Sjamie				return (bi);
190192895Sjamie		}
191192895Sjamie	}
192195870Sjamie
193192895Sjamie	/* Lacking a recognized interpreter, try the default brand */
194192895Sjamie	for (i = 0; i < MAX_BRANDS; i++) {
195216861Sbz		bi = elf_brand_list[i];
196192895Sjamie		if (bi != NULL && hdr->e_machine == bi->machine &&
197192895Sjamie		    __elfN(fallback_brand) == bi->brand)
198192895Sjamie			return (bi);
199192895Sjamie	}
200192895Sjamie	return (NULL);
201192895Sjamie}
202192895Sjamie
203192895Sjamiestatic int
204192895Sjamie__elfN(check_header)(const Elf_Ehdr *hdr)
205232059Smm{
206232059Smm	Elf_Brandinfo *bi;
207232186Smm	int i;
208232278Smm
209254741Sdelphij	if (!IS_ELF(*hdr) ||
210277985Sjamie	    hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
211295951Saraujo	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
212295951Saraujo	    hdr->e_ident[EI_VERSION] != EV_CURRENT ||
213192895Sjamie	    hdr->e_phentsize != sizeof(Elf_Phdr) ||
214216861Sbz	    hdr->e_version != ELF_TARG_VER)
215192895Sjamie		return (ENOEXEC);
216192895Sjamie
217192895Sjamie	/*
218192895Sjamie	 * Make sure we have at least one brand for this machine.
219192895Sjamie	 */
220192895Sjamie
221192895Sjamie	for (i = 0; i < MAX_BRANDS; i++) {
222192895Sjamie		bi = elf_brand_list[i];
223192895Sjamie		if (bi != NULL && bi->machine == hdr->e_machine)
224232059Smm			break;
225232059Smm	}
226232186Smm	if (i == MAX_BRANDS)
227232278Smm		return (ENOEXEC);
228254741Sdelphij
229277985Sjamie	return (0);
230295951Saraujo}
231295951Saraujo
232192895Sjamiestatic int
233216861Sbz__elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
234192895Sjamie    vm_offset_t start, vm_offset_t end, vm_prot_t prot)
235196002Sjamie{
236196002Sjamie	struct sf_buf *sf;
237232059Smm	int error;
238192895Sjamie	vm_offset_t off;
239196002Sjamie
240231267Smm	/*
241192895Sjamie	 * Create the page if it doesn't exist yet. Ignore errors.
242193865Sjamie	 */
243192895Sjamie	vm_map_lock(map);
244192895Sjamie	vm_map_insert(map, NULL, 0, trunc_page(start), round_page(end),
245280632Sian	    VM_PROT_ALL, VM_PROT_ALL, 0);
246280632Sian	vm_map_unlock(map);
247280632Sian
248280632Sian	/*
249280632Sian	 * Find the page from the underlying object.
250280632Sian	 */
251280632Sian	if (object) {
252280632Sian		sf = vm_imgact_map_page(object, offset);
253280632Sian		if (sf == NULL)
254280632Sian			return (KERN_FAILURE);
255280632Sian		off = offset - trunc_page(offset);
256280632Sian		error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start,
257280632Sian		    end - start);
258192895Sjamie		vm_imgact_unmap_page(sf);
259185435Sbz		if (error) {
260185435Sbz			return (KERN_FAILURE);
261185435Sbz		}
262185435Sbz	}
263185435Sbz
264185435Sbz	return (KERN_SUCCESS);
265185435Sbz}
266185435Sbz
267185435Sbzstatic int
268185435Sbz__elfN(map_insert)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
269185435Sbz    vm_offset_t start, vm_offset_t end, vm_prot_t prot, int cow)
270185435Sbz{
271185435Sbz	struct sf_buf *sf;
272185435Sbz	vm_offset_t off;
273185435Sbz	vm_size_t sz;
274185435Sbz	int error, rv;
275185435Sbz
276185435Sbz	if (start != trunc_page(start)) {
277185435Sbz		rv = __elfN(map_partial)(map, object, offset, start,
278185435Sbz		    round_page(start), prot);
279185435Sbz		if (rv)
280185435Sbz			return (rv);
281185435Sbz		offset += round_page(start) - start;
282185435Sbz		start = round_page(start);
283185435Sbz	}
284185435Sbz	if (end != round_page(end)) {
285185435Sbz		rv = __elfN(map_partial)(map, object, offset +
286185435Sbz		    trunc_page(end) - start, trunc_page(end), end, prot);
287185435Sbz		if (rv)
288185435Sbz			return (rv);
289185435Sbz		end = trunc_page(end);
290185435Sbz	}
291185435Sbz	if (end > start) {
292185435Sbz		if (offset & PAGE_MASK) {
293185435Sbz			/*
294185435Sbz			 * The mapping is not page aligned. This means we have
295185435Sbz			 * to copy the data. Sigh.
296190466Sjamie			 */
297185435Sbz			rv = vm_map_find(map, NULL, 0, &start, end - start,
298185435Sbz			    FALSE, prot | VM_PROT_WRITE, VM_PROT_ALL, 0);
299185435Sbz			if (rv)
300185435Sbz				return (rv);
301185435Sbz			if (object == NULL)
302185435Sbz				return (KERN_SUCCESS);
303185435Sbz			for (; start < end; start += sz) {
304185435Sbz				sf = vm_imgact_map_page(object, offset);
305185435Sbz				if (sf == NULL)
306191673Sjamie					return (KERN_FAILURE);
307191673Sjamie				off = offset - trunc_page(offset);
308191673Sjamie				sz = end - start;
309191673Sjamie				if (sz > PAGE_SIZE - off)
310191673Sjamie					sz = PAGE_SIZE - off;
311191673Sjamie				error = copyout((caddr_t)sf_buf_kva(sf) + off,
312225617Skmacy				    (caddr_t)start, sz);
313185435Sbz				vm_imgact_unmap_page(sf);
314191673Sjamie				if (error) {
315191673Sjamie					return (KERN_FAILURE);
316192895Sjamie				}
317185435Sbz				offset += sz;
318191673Sjamie			}
319191673Sjamie			rv = KERN_SUCCESS;
320191673Sjamie		} else {
321185435Sbz			vm_map_lock(map);
322191673Sjamie			rv = vm_map_insert(map, object, offset, start, end,
323191673Sjamie			    prot, VM_PROT_ALL, cow);
324191673Sjamie			vm_map_unlock(map);
325191673Sjamie		}
326185435Sbz		return (rv);
327192895Sjamie	} else {
328192895Sjamie		return (KERN_SUCCESS);
329191673Sjamie	}
330191673Sjamie}
331191673Sjamie
332192895Sjamiestatic int
333192895Sjamie__elfN(load_section)(struct vmspace *vmspace,
334192895Sjamie	vm_object_t object, vm_offset_t offset,
335258929Speter	caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot,
336191673Sjamie	size_t pagesize)
337191673Sjamie{
338191673Sjamie	struct sf_buf *sf;
339191673Sjamie	size_t map_len;
340185435Sbz	vm_offset_t map_addr;
341191673Sjamie	int error, rv, cow;
342191673Sjamie	size_t copy_len;
343185435Sbz	vm_offset_t file_addr;
344191673Sjamie
345185435Sbz	/*
346191673Sjamie	 * It's necessary to fail if the filsz + offset taken from the
347191673Sjamie	 * header is greater than the actual file pager object's size.
348191673Sjamie	 * If we were to allow this, then the vm_map_find() below would
349191673Sjamie	 * walk right off the end of the file object and into the ether.
350191673Sjamie	 *
351192895Sjamie	 * While I'm here, might as well check for something else that
352192895Sjamie	 * is invalid: filsz cannot be greater than memsz.
353192895Sjamie	 */
354192895Sjamie	if ((off_t)filsz + offset > object->un_pager.vnp.vnp_size ||
355192895Sjamie	    filsz > memsz) {
356192895Sjamie		uprintf("elf_load_section: truncated ELF file\n");
357192895Sjamie		return (ENOEXEC);
358192895Sjamie	}
359192895Sjamie
360192895Sjamie#define trunc_page_ps(va, ps)	((va) & ~(ps - 1))
361192895Sjamie#define round_page_ps(va, ps)	(((va) + (ps - 1)) & ~(ps - 1))
362192895Sjamie
363193865Sjamie	map_addr = trunc_page_ps((vm_offset_t)vmaddr, pagesize);
364193865Sjamie	file_addr = trunc_page_ps(offset, pagesize);
365193865Sjamie
366193865Sjamie	/*
367193865Sjamie	 * We have two choices.  We can either clear the data in the last page
368193865Sjamie	 * of an oversized mapping, or we can start the anon mapping a page
369193865Sjamie	 * early and copy the initialized data into that first page.  We
370193865Sjamie	 * choose the second..
371193865Sjamie	 */
372192895Sjamie	if (memsz > filsz)
373192895Sjamie		map_len = trunc_page_ps(offset + filsz, pagesize) - file_addr;
374185435Sbz	else
375193865Sjamie		map_len = round_page_ps(offset + filsz, pagesize) - file_addr;
376192895Sjamie
377192895Sjamie	if (map_len != 0) {
378192895Sjamie		vm_object_reference(object);
379192895Sjamie
380192895Sjamie		/* cow flags: don't dump readonly sections in core */
381192895Sjamie		cow = MAP_COPY_ON_WRITE | MAP_PREFAULT |
382192895Sjamie		    (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP);
383192895Sjamie
384192895Sjamie		rv = __elfN(map_insert)(&vmspace->vm_map,
385192895Sjamie				      object,
386192895Sjamie				      file_addr,	/* file offset */
387192895Sjamie				      map_addr,		/* virtual start */
388192895Sjamie				      map_addr + map_len,/* virtual end */
389192895Sjamie				      prot,
390192895Sjamie				      cow);
391192895Sjamie		if (rv != KERN_SUCCESS) {
392192895Sjamie			vm_object_deallocate(object);
393192895Sjamie			return (EINVAL);
394192895Sjamie		}
395192895Sjamie
396192895Sjamie		/* we can stop now if we've covered it all */
397192895Sjamie		if (memsz == filsz) {
398192895Sjamie			return (0);
399192895Sjamie		}
400192895Sjamie	}
401192895Sjamie
402192895Sjamie
403192895Sjamie	/*
404192895Sjamie	 * We have to get the remaining bit of the file into the first part
405192895Sjamie	 * of the oversized map segment.  This is normally because the .data
406192895Sjamie	 * segment in the file is extended to provide bss.  It's a neat idea
407192895Sjamie	 * to try and save a page, but it's a pain in the behind to implement.
408192895Sjamie	 */
409192895Sjamie	copy_len = (offset + filsz) - trunc_page_ps(offset + filsz, pagesize);
410192895Sjamie	map_addr = trunc_page_ps((vm_offset_t)vmaddr + filsz, pagesize);
411192895Sjamie	map_len = round_page_ps((vm_offset_t)vmaddr + memsz, pagesize) -
412192895Sjamie	    map_addr;
413192895Sjamie
414192895Sjamie	/* This had damn well better be true! */
415192895Sjamie	if (map_len != 0) {
416192895Sjamie		rv = __elfN(map_insert)(&vmspace->vm_map, NULL, 0, map_addr,
417192895Sjamie		    map_addr + map_len, VM_PROT_ALL, 0);
418192895Sjamie		if (rv != KERN_SUCCESS) {
419191673Sjamie			return (EINVAL);
420192895Sjamie		}
421192895Sjamie	}
422191673Sjamie
423191673Sjamie	if (copy_len != 0) {
424192895Sjamie		vm_offset_t off;
425192895Sjamie
426192895Sjamie		sf = vm_imgact_map_page(object, offset + filsz);
427191673Sjamie		if (sf == NULL)
428192895Sjamie			return (EIO);
429192895Sjamie
430191673Sjamie		/* send the page fragment to user space */
431192895Sjamie		off = trunc_page_ps(offset + filsz, pagesize) -
432192895Sjamie		    trunc_page(offset + filsz);
433192895Sjamie		error = copyout((caddr_t)sf_buf_kva(sf) + off,
434191673Sjamie		    (caddr_t)map_addr, copy_len);
435192895Sjamie		vm_imgact_unmap_page(sf);
436191673Sjamie		if (error) {
437191673Sjamie			return (error);
438191673Sjamie		}
439192895Sjamie	}
440191673Sjamie
441192895Sjamie	/*
442191673Sjamie	 * set it to the specified protection.
443191673Sjamie	 * XXX had better undo the damage from pasting over the cracks here!
444192895Sjamie	 */
445192895Sjamie	vm_map_protect(&vmspace->vm_map, trunc_page(map_addr),
446192895Sjamie	    round_page(map_addr + map_len),  prot, FALSE);
447192895Sjamie
448192895Sjamie	return (0);
449192895Sjamie}
450192895Sjamie
451192895Sjamie/*
452192895Sjamie * Load the file "file" into memory.  It may be either a shared object
453192895Sjamie * or an executable.
454192895Sjamie *
455192895Sjamie * The "addr" reference parameter is in/out.  On entry, it specifies
456192895Sjamie * the address where a shared object should be loaded.  If the file is
457192895Sjamie * an executable, this value is ignored.  On exit, "addr" specifies
458192895Sjamie * where the file was actually loaded.
459192895Sjamie *
460192895Sjamie * The "entry" reference parameter is out only.  On exit, it specifies
461192895Sjamie * the entry point for the loaded file.
462192895Sjamie */
463192895Sjamiestatic int
464192895Sjamie__elfN(load_file)(struct proc *p, const char *file, u_long *addr,
465192895Sjamie	u_long *entry, size_t pagesize)
466192895Sjamie{
467192895Sjamie	struct {
468192895Sjamie		struct nameidata nd;
469192895Sjamie		struct vattr attr;
470192895Sjamie		struct image_params image_params;
471192895Sjamie	} *tempdata;
472192895Sjamie	const Elf_Ehdr *hdr = NULL;
473191673Sjamie	const Elf_Phdr *phdr = NULL;
474191673Sjamie	struct nameidata *nd;
475191673Sjamie	struct vmspace *vmspace = p->p_vmspace;
476191673Sjamie	struct vattr *attr;
477192895Sjamie	struct image_params *imgp;
478192895Sjamie	vm_prot_t prot;
479191673Sjamie	u_long rbase;
480192895Sjamie	u_long base_addr = 0;
481192895Sjamie	int vfslocked, error, i, numsegs;
482192895Sjamie
483192895Sjamie	if (curthread->td_proc != p)
484192895Sjamie		panic("elf_load_file - thread");	/* XXXKSE DIAGNOSTIC */
485192895Sjamie
486192895Sjamie	tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK);
487192895Sjamie	nd = &tempdata->nd;
488192895Sjamie	attr = &tempdata->attr;
489191673Sjamie	imgp = &tempdata->image_params;
490191673Sjamie
491191673Sjamie	/*
492191673Sjamie	 * Initialize part of the common data
493192895Sjamie	 */
494192895Sjamie	imgp->proc = p;
495185435Sbz	imgp->attr = attr;
496185435Sbz	imgp->firstpage = NULL;
497192895Sjamie	imgp->image_header = NULL;
498192895Sjamie	imgp->object = NULL;
499192895Sjamie	imgp->execlabel = NULL;
500192895Sjamie
501192895Sjamie	/* XXXKSE */
502192895Sjamie	NDINIT(nd, LOOKUP, MPSAFE|LOCKLEAF|FOLLOW, UIO_SYSSPACE, file,
503192895Sjamie	    curthread);
504192895Sjamie	vfslocked = 0;
505192895Sjamie	if ((error = namei(nd)) != 0) {
506192895Sjamie		nd->ni_vp = NULL;
507192895Sjamie		goto fail;
508185435Sbz	}
509192895Sjamie	vfslocked = NDHASGIANT(nd);
510192895Sjamie	NDFREE(nd, NDF_ONLY_PNBUF);
511191673Sjamie	imgp->vp = nd->ni_vp;
512191673Sjamie
513191673Sjamie	/*
514185435Sbz	 * Check permissions, modes, uid, etc on the file, and "open" it.
515185435Sbz	 */
516192895Sjamie	error = exec_check_permissions(imgp);
517191673Sjamie	if (error)
518191673Sjamie		goto fail;
519191673Sjamie
520191673Sjamie	error = exec_map_first_page(imgp);
521191673Sjamie	if (error)
522191673Sjamie		goto fail;
523191673Sjamie
524191673Sjamie	/*
525225617Skmacy	 * Also make certain that the interpreter stays the same, so set
526185435Sbz	 * its VV_TEXT flag, too.
527191673Sjamie	 */
528191673Sjamie	nd->ni_vp->v_vflag |= VV_TEXT;
529191673Sjamie
530191673Sjamie	imgp->object = nd->ni_vp->v_object;
531191673Sjamie
532191673Sjamie	hdr = (const Elf_Ehdr *)imgp->image_header;
533191673Sjamie	if ((error = __elfN(check_header)(hdr)) != 0)
534191673Sjamie		goto fail;
535191673Sjamie	if (hdr->e_type == ET_DYN)
536191673Sjamie		rbase = *addr;
537191673Sjamie	else if (hdr->e_type == ET_EXEC)
538191673Sjamie		rbase = 0;
539191673Sjamie	else {
540191673Sjamie		error = ENOEXEC;
541191673Sjamie		goto fail;
542191673Sjamie	}
543191673Sjamie
544191673Sjamie	/* Only support headers that fit within first page for now      */
545191673Sjamie	/*    (multiplication of two Elf_Half fields will not overflow) */
546185435Sbz	if ((hdr->e_phoff > PAGE_SIZE) ||
547190466Sjamie	    (hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE - hdr->e_phoff) {
548185435Sbz		error = ENOEXEC;
549185435Sbz		goto fail;
550185435Sbz	}
551185435Sbz
552191673Sjamie	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
553191673Sjamie
554196135Sbz	for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) {
555191673Sjamie		if (phdr[i].p_type == PT_LOAD) {	/* Loadable segment */
556196835Sjamie			prot = 0;
557280632Sian			if (phdr[i].p_flags & PF_X)
558192895Sjamie  				prot |= VM_PROT_EXECUTE;
559196135Sbz			if (phdr[i].p_flags & PF_W)
560191673Sjamie  				prot |= VM_PROT_WRITE;
561192895Sjamie			if (phdr[i].p_flags & PF_R)
562193066Sjamie  				prot |= VM_PROT_READ;
563298833Sjamie
564298833Sjamie			if ((error = __elfN(load_section)(vmspace,
565298833Sjamie			    imgp->object, phdr[i].p_offset,
566231267Smm			    (caddr_t)(uintptr_t)phdr[i].p_vaddr + rbase,
567195870Sjamie			    phdr[i].p_memsz, phdr[i].p_filesz, prot,
568280632Sian			    pagesize)) != 0)
569230129Smm				goto fail;
570191673Sjamie			/*
571192895Sjamie			 * Establish the base address if this is the
572191673Sjamie			 * first segment.
573191673Sjamie			 */
574195974Sjamie			if (numsegs == 0)
575191673Sjamie  				base_addr = trunc_page(phdr[i].p_vaddr +
576191673Sjamie				    rbase);
577195974Sjamie			numsegs++;
578191673Sjamie		}
579224290Smckusick	}
580224290Smckusick	*addr = base_addr;
581191673Sjamie	*entry = (unsigned long)hdr->e_entry + rbase;
582185435Sbz
583191673Sjamiefail:
584191673Sjamie	if (imgp->firstpage)
585191673Sjamie		exec_unmap_first_page(imgp);
586191673Sjamie
587191673Sjamie	if (nd->ni_vp)
588298833Sjamie		vput(nd->ni_vp);
589194762Sjamie
590192895Sjamie	VFS_UNLOCK_GIANT(vfslocked);
591191673Sjamie	free(tempdata, M_TEMP);
592191673Sjamie
593191673Sjamie	return (error);
594185435Sbz}
595191673Sjamie
596191673Sjamiestatic int
597191673Sjamie__CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
598191673Sjamie{
599185435Sbz	const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
600191673Sjamie	const Elf_Phdr *phdr;
601191673Sjamie	Elf_Auxargs *elf_auxargs = NULL;
602191673Sjamie	struct vmspace *vmspace;
603185435Sbz	vm_prot_t prot;
604191673Sjamie	u_long text_size = 0, data_size = 0, total_size = 0;
605191673Sjamie	u_long text_addr = 0, data_addr = 0;
606191673Sjamie	u_long seg_size, seg_addr;
607185435Sbz	u_long addr, entry = 0, proghdr = 0;
608185435Sbz	int error = 0, i;
609185435Sbz	const char *interp = NULL;
610185435Sbz	Elf_Brandinfo *brand_info;
611185435Sbz	char *path;
612185435Sbz	struct thread *td = curthread;
613230407Smm	struct sysentvec *sv;
614191673Sjamie
615298833Sjamie	/*
616298833Sjamie	 * Do we have a valid ELF header ?
617298833Sjamie	 *
618298833Sjamie	 * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later
619298833Sjamie	 * if particular brand doesn't support it.
620298833Sjamie	 */
621298833Sjamie	if (__elfN(check_header)(hdr) != 0 ||
622191673Sjamie	    (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN))
623191673Sjamie		return (-1);
624191673Sjamie
625191673Sjamie	/*
626191673Sjamie	 * From here on down, we return an errno, not -1, as we've
627191673Sjamie	 * detected an ELF file.
628191673Sjamie	 */
629191673Sjamie
630191673Sjamie	if ((hdr->e_phoff > PAGE_SIZE) ||
631191673Sjamie	    (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) {
632191673Sjamie		/* Only support headers in first page for now */
633191673Sjamie		return (ENOEXEC);
634191673Sjamie	}
635191673Sjamie	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
636194762Sjamie	for (i = 0; i < hdr->e_phnum; i++) {
637194762Sjamie		switch (phdr[i].p_type) {
638194762Sjamie	  	case PT_INTERP:	/* Path to interpreter */
639194762Sjamie			if (phdr[i].p_filesz > MAXPATHLEN ||
640194762Sjamie			    phdr[i].p_offset + phdr[i].p_filesz > PAGE_SIZE)
641194762Sjamie				return (ENOEXEC);
642194762Sjamie			interp = imgp->image_header + phdr[i].p_offset;
643194762Sjamie			break;
644194762Sjamie		default:
645192895Sjamie			break;
646212436Sjamie		}
647212436Sjamie	}
648212436Sjamie
649192895Sjamie	brand_info = __elfN(get_brandinfo)(hdr, interp);
650212436Sjamie	if (brand_info == NULL) {
651212436Sjamie		uprintf("ELF binary type \"%u\" not known.\n",
652212436Sjamie		    hdr->e_ident[EI_OSABI]);
653212436Sjamie		return (ENOEXEC);
654212436Sjamie	}
655192895Sjamie	if (hdr->e_type == ET_DYN &&
656231267Smm	    (brand_info->flags & BI_CAN_EXEC_DYN) == 0) {
657231267Smm		error = ENOEXEC;
658231267Smm		goto fail;
659231267Smm	}
660231267Smm	sv = brand_info->sysvec;
661231267Smm	if (interp != NULL && brand_info->interp_newpath != NULL)
662231267Smm		interp = brand_info->interp_newpath;
663231267Smm
664191673Sjamie	/*
665192895Sjamie	 * Avoid a possible deadlock if the current address space is destroyed
666192895Sjamie	 * and that address space maps the locked vnode.  In the common case,
667192895Sjamie	 * the locked vnode's v_usecount is decremented but remains greater
668192895Sjamie	 * than zero.  Consequently, the vnode lock is not needed by vrele().
669192895Sjamie	 * However, in cases where the vnode lock is external, such as nullfs,
670192895Sjamie	 * v_usecount may become zero.
671192895Sjamie	 */
672191673Sjamie	VOP_UNLOCK(imgp->vp, 0, td);
673195870Sjamie
674195870Sjamie	exec_new_vmspace(imgp, sv);
675195870Sjamie
676195870Sjamie	vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY, td);
677195870Sjamie
678195870Sjamie	vmspace = imgp->proc->p_vmspace;
679195870Sjamie
680195870Sjamie	for (i = 0; i < hdr->e_phnum; i++) {
681195870Sjamie		switch (phdr[i].p_type) {
682195870Sjamie		case PT_LOAD:	/* Loadable segment */
683195870Sjamie			prot = 0;
684195870Sjamie			if (phdr[i].p_flags & PF_X)
685195870Sjamie  				prot |= VM_PROT_EXECUTE;
686195870Sjamie			if (phdr[i].p_flags & PF_W)
687195870Sjamie  				prot |= VM_PROT_WRITE;
688195870Sjamie			if (phdr[i].p_flags & PF_R)
689195870Sjamie  				prot |= VM_PROT_READ;
690195870Sjamie
691195870Sjamie#if defined(__ia64__) && __ELF_WORD_SIZE == 32 && defined(IA32_ME_HARDER)
692195870Sjamie			/*
693195870Sjamie			 * Some x86 binaries assume read == executable,
694195870Sjamie			 * notably the M3 runtime and therefore cvsup
695195870Sjamie			 */
696195870Sjamie			if (prot & VM_PROT_READ)
697195870Sjamie				prot |= VM_PROT_EXECUTE;
698195870Sjamie#endif
699195870Sjamie
700195870Sjamie			if ((error = __elfN(load_section)(vmspace,
701211085Sjamie			    imgp->object, phdr[i].p_offset,
702211085Sjamie			    (caddr_t)(uintptr_t)phdr[i].p_vaddr,
703211085Sjamie			    phdr[i].p_memsz, phdr[i].p_filesz, prot,
704211085Sjamie			    sv->sv_pagesize)) != 0)
705211085Sjamie				return (error);
706211085Sjamie
707194251Sjamie			/*
708194251Sjamie			 * If this segment contains the program headers,
709194251Sjamie			 * remember their virtual address for the AT_PHDR
710194251Sjamie			 * aux entry. Static binaries don't usually include
711194251Sjamie			 * a PT_PHDR entry.
712194251Sjamie			 */
713194251Sjamie			if (phdr[i].p_offset == 0 &&
714195974Sjamie			    hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize
715195974Sjamie				<= phdr[i].p_filesz)
716195974Sjamie				proghdr = phdr[i].p_vaddr + hdr->e_phoff;
717195974Sjamie
718195974Sjamie			seg_addr = trunc_page(phdr[i].p_vaddr);
719195974Sjamie			seg_size = round_page(phdr[i].p_memsz +
720195974Sjamie			    phdr[i].p_vaddr - seg_addr);
721195974Sjamie
722195974Sjamie			/*
723195974Sjamie			 * Is this .text or .data?  We can't use
724195974Sjamie			 * VM_PROT_WRITE or VM_PROT_EXEC, it breaks the
725195974Sjamie			 * alpha terribly and possibly does other bad
726195974Sjamie			 * things so we stick to the old way of figuring
727195974Sjamie			 * it out:  If the segment contains the program
728191673Sjamie			 * entry point, it's a text segment, otherwise it
729192895Sjamie			 * is a data segment.
730192895Sjamie			 *
731192895Sjamie			 * Note that obreak() assumes that data_addr +
732192895Sjamie			 * data_size == end of data load area, and the ELF
733192895Sjamie			 * file format expects segments to be sorted by
734192895Sjamie			 * address.  If multiple data segments exist, the
735192895Sjamie			 * last one will be used.
736192895Sjamie			 */
737191673Sjamie			if (hdr->e_entry >= phdr[i].p_vaddr &&
738191673Sjamie			    hdr->e_entry < (phdr[i].p_vaddr +
739191673Sjamie			    phdr[i].p_memsz)) {
740191673Sjamie				text_size = seg_size;
741191673Sjamie				text_addr = seg_addr;
742191673Sjamie				entry = (u_long)hdr->e_entry;
743191673Sjamie			} else {
744191673Sjamie				data_size = seg_size;
745191673Sjamie				data_addr = seg_addr;
746191673Sjamie			}
747191673Sjamie			total_size += seg_size;
748191673Sjamie			break;
749191673Sjamie		case PT_PHDR: 	/* Program header table info */
750191673Sjamie			proghdr = phdr[i].p_vaddr;
751191673Sjamie			break;
752191673Sjamie		default:
753191673Sjamie			break;
754191673Sjamie		}
755191673Sjamie	}
756191673Sjamie
757191673Sjamie	if (data_addr == 0 && data_size == 0) {
758191673Sjamie		data_addr = text_addr;
759193066Sjamie		data_size = text_size;
760193066Sjamie	}
761191673Sjamie
762191673Sjamie	/*
763191673Sjamie	 * Check limits.  It should be safe to check the
764191673Sjamie	 * limits after loading the segments since we do
765191673Sjamie	 * not actually fault in all the segments pages.
766191673Sjamie	 */
767191673Sjamie	PROC_LOCK(imgp->proc);
768191673Sjamie	if (data_size > lim_cur(imgp->proc, RLIMIT_DATA) ||
769191673Sjamie	    text_size > maxtsiz ||
770191673Sjamie	    total_size > lim_cur(imgp->proc, RLIMIT_VMEM)) {
771193066Sjamie		PROC_UNLOCK(imgp->proc);
772193066Sjamie		return (ENOMEM);
773193066Sjamie	}
774193066Sjamie
775193066Sjamie	vmspace->vm_tsize = text_size >> PAGE_SHIFT;
776193066Sjamie	vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
777193066Sjamie	vmspace->vm_dsize = data_size >> PAGE_SHIFT;
778193066Sjamie	vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr;
779193066Sjamie
780193066Sjamie	/*
781193066Sjamie	 * We load the dynamic linker where a userland call
782193066Sjamie	 * to mmap(0, ...) would put it.  The rationale behind this
783193066Sjamie	 * calculation is that it leaves room for the heap to grow to
784193066Sjamie	 * its maximum allowed size.
785193066Sjamie	 */
786193066Sjamie	addr = round_page((vm_offset_t)imgp->proc->p_vmspace->vm_daddr +
787193066Sjamie	    lim_max(imgp->proc, RLIMIT_DATA));
788193066Sjamie	PROC_UNLOCK(imgp->proc);
789193066Sjamie
790193066Sjamie	imgp->entry_addr = entry;
791193066Sjamie
792193066Sjamie	imgp->proc->p_sysent = sv;
793193066Sjamie	if (interp != NULL) {
794193066Sjamie		VOP_UNLOCK(imgp->vp, 0, td);
795193066Sjamie		if (brand_info->emul_path != NULL &&
796193066Sjamie		    brand_info->emul_path[0] != '\0') {
797193066Sjamie			path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
798193066Sjamie			snprintf(path, MAXPATHLEN, "%s%s",
799193066Sjamie			    brand_info->emul_path, interp);
800193066Sjamie			error = __elfN(load_file)(imgp->proc, path, &addr,
801193066Sjamie			    &imgp->entry_addr, sv->sv_pagesize);
802193066Sjamie			free(path, M_TEMP);
803193066Sjamie			if (error == 0)
804193066Sjamie				interp = NULL;
805193066Sjamie		}
806193066Sjamie		if (interp != NULL) {
807205014Snwhitehorn			error = __elfN(load_file)(imgp->proc, interp, &addr,
808217896Sdchagin			    &imgp->entry_addr, sv->sv_pagesize);
809193066Sjamie		}
810193066Sjamie		vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY, td);
811193066Sjamie		if (error != 0) {
812193066Sjamie			uprintf("ELF interpreter %s not found\n", interp);
813193066Sjamie			return (error);
814193066Sjamie		}
815193066Sjamie	}
816193066Sjamie
817193066Sjamie	/*
818193066Sjamie	 * Construct auxargs table (used by the fixup routine)
819193066Sjamie	 */
820193066Sjamie	elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK);
821193066Sjamie	elf_auxargs->execfd = -1;
822193066Sjamie	elf_auxargs->phdr = proghdr;
823193066Sjamie	elf_auxargs->phent = hdr->e_phentsize;
824193066Sjamie	elf_auxargs->phnum = hdr->e_phnum;
825193066Sjamie	elf_auxargs->pagesz = PAGE_SIZE;
826185435Sbz	elf_auxargs->base = addr;
827191673Sjamie	elf_auxargs->flags = 0;
828191673Sjamie	elf_auxargs->entry = entry;
829277279Sjamie	elf_auxargs->trace = elf_trace;
830191673Sjamie
831191673Sjamie	imgp->auxargs = elf_auxargs;
832191673Sjamie	imgp->interpreted = 0;
833191673Sjamie
834191673Sjamie	return (error);
835192895Sjamie}
836195870Sjamie
837195870Sjamie#define	suword __CONCAT(suword, __ELF_WORD_SIZE)
838195870Sjamie
839195870Sjamieint
840195870Sjamie__elfN(freebsd_fixup)(register_t **stack_base, struct image_params *imgp)
841192895Sjamie{
842192895Sjamie	Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs;
843185435Sbz	Elf_Addr *base;
844192895Sjamie	Elf_Addr *pos;
845192895Sjamie
846185435Sbz	base = (Elf_Addr *)*stack_base;
847195974Sjamie	pos = base + (imgp->args->argc + imgp->args->envc + 2);
848192895Sjamie
849192895Sjamie	if (args->trace) {
850192895Sjamie		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
851192895Sjamie	}
852192895Sjamie	if (args->execfd != -1) {
853202116Sbz		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
854202116Sbz	}
855202116Sbz	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
856192895Sjamie	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
857192895Sjamie	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
858192895Sjamie	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
859192895Sjamie	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
860192895Sjamie	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
861192895Sjamie	AUXARGS_ENTRY(pos, AT_BASE, args->base);
862192895Sjamie	AUXARGS_ENTRY(pos, AT_NULL, 0);
863192895Sjamie
864192895Sjamie	free(imgp->auxargs, M_TEMP);
865192895Sjamie	imgp->auxargs = NULL;
866192895Sjamie
867192895Sjamie	base--;
868192895Sjamie	suword(base, (long)imgp->args->argc);
869192895Sjamie	*stack_base = (register_t *)base;
870192895Sjamie	return (0);
871192895Sjamie}
872192895Sjamie
873192895Sjamie/*
874192895Sjamie * Code for generating ELF core dumps.
875192895Sjamie */
876192895Sjamie
877192895Sjamietypedef void (*segment_callback)(vm_map_entry_t, void *);
878192895Sjamie
879192895Sjamie/* Closure for cb_put_phdr(). */
880185435Sbzstruct phdr_closure {
881191673Sjamie	Elf_Phdr *phdr;		/* Program header to fill in */
882191673Sjamie	Elf_Off offset;		/* Offset of segment in core file */
883185435Sbz};
884185435Sbz
885191673Sjamie/* Closure for cb_size_segment(). */
886191673Sjamiestruct sseg_closure {
887277279Sjamie	int count;		/* Count of writable segments. */
888191673Sjamie	size_t size;		/* Total size of all writable segments. */
889191673Sjamie};
890191673Sjamie
891191673Sjamiestatic void cb_put_phdr(vm_map_entry_t, void *);
892191673Sjamiestatic void cb_size_segment(vm_map_entry_t, void *);
893192895Sjamiestatic void each_writable_segment(struct thread *, segment_callback, void *);
894195870Sjamiestatic int __elfN(corehdr)(struct thread *, struct vnode *, struct ucred *,
895195870Sjamie    int, void *, size_t);
896195870Sjamiestatic void __elfN(puthdr)(struct thread *, void *, size_t *, int);
897195870Sjamiestatic void __elfN(putnote)(void *, size_t *, const char *, int,
898195870Sjamie    const void *, size_t);
899192895Sjamie
900192895Sjamieextern int osreldate;
901185435Sbz
902192895Sjamieint
903192895Sjamie__elfN(coredump)(td, vp, limit)
904185435Sbz	struct thread *td;
905195974Sjamie	struct vnode *vp;
906192895Sjamie	off_t limit;
907192895Sjamie{
908192895Sjamie	struct ucred *cred = td->td_ucred;
909192895Sjamie	int error = 0;
910192895Sjamie	struct sseg_closure seginfo;
911192895Sjamie	void *hdr;
912192895Sjamie	size_t hdrsize;
913192895Sjamie
914192895Sjamie	/* Size the program segments. */
915192895Sjamie	seginfo.count = 0;
916192895Sjamie	seginfo.size = 0;
917192895Sjamie	each_writable_segment(td, cb_size_segment, &seginfo);
918192895Sjamie
919192895Sjamie	/*
920192895Sjamie	 * Calculate the size of the core file header area by making
921192895Sjamie	 * a dry run of generating it.  Nothing is written, but the
922185435Sbz	 * size is calculated.
923191673Sjamie	 */
924185435Sbz	hdrsize = 0;
925185435Sbz	__elfN(puthdr)(td, (void *)NULL, &hdrsize, seginfo.count);
926195945Sjamie
927195945Sjamie	if (hdrsize + seginfo.size >= limit)
928195945Sjamie		return (EFAULT);
929195945Sjamie
930195945Sjamie	/*
931195945Sjamie	 * Allocate memory for building the header, fill it up,
932195945Sjamie	 * and write it out.
933195945Sjamie	 */
934195945Sjamie	hdr = malloc(hdrsize, M_TEMP, M_WAITOK);
935230143Smm	if (hdr == NULL) {
936191673Sjamie		return (EINVAL);
937191673Sjamie	}
938191673Sjamie	error = __elfN(corehdr)(td, vp, cred, seginfo.count, hdr, hdrsize);
939191673Sjamie
940191673Sjamie	/* Write the contents of all of the writable segments. */
941191673Sjamie	if (error == 0) {
942191673Sjamie		Elf_Phdr *php;
943191673Sjamie		off_t offset;
944191673Sjamie		int i;
945191673Sjamie
946191673Sjamie		php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
947191673Sjamie		offset = hdrsize;
948191673Sjamie		for (i = 0; i < seginfo.count; i++) {
949191673Sjamie			error = vn_rdwr_inchunks(UIO_WRITE, vp,
950191673Sjamie			    (caddr_t)(uintptr_t)php->p_vaddr,
951191673Sjamie			    php->p_filesz, offset, UIO_USERSPACE,
952191673Sjamie			    IO_UNIT | IO_DIRECT, cred, NOCRED, NULL,
953241896Skib			    curthread); /* XXXKSE */
954230129Smm			if (error != 0)
955230129Smm				break;
956230129Smm			offset += php->p_filesz;
957230129Smm			php++;
958230129Smm		}
959230129Smm	}
960230407Smm	free(hdr, M_TEMP);
961230407Smm
962230407Smm	return (error);
963230407Smm}
964230407Smm
965230407Smm/*
966230129Smm * A callback for each_writable_segment() to write out the segment's
967230129Smm * program header entry.
968230129Smm */
969230129Smmstatic void
970230407Smmcb_put_phdr(entry, closure)
971230129Smm	vm_map_entry_t entry;
972230129Smm	void *closure;
973230129Smm{
974230129Smm	struct phdr_closure *phc = (struct phdr_closure *)closure;
975230129Smm	Elf_Phdr *phdr = phc->phdr;
976230129Smm
977230129Smm	phc->offset = round_page(phc->offset);
978230129Smm
979230129Smm	phdr->p_type = PT_LOAD;
980230129Smm	phdr->p_offset = phc->offset;
981192895Sjamie	phdr->p_vaddr = entry->start;
982192895Sjamie	phdr->p_paddr = 0;
983192895Sjamie	phdr->p_filesz = phdr->p_memsz = entry->end - entry->start;
984192895Sjamie	phdr->p_align = PAGE_SIZE;
985192895Sjamie	phdr->p_flags = 0;
986192895Sjamie	if (entry->protection & VM_PROT_READ)
987191673Sjamie		phdr->p_flags |= PF_R;
988191673Sjamie	if (entry->protection & VM_PROT_WRITE)
989185435Sbz		phdr->p_flags |= PF_W;
990280632Sian	if (entry->protection & VM_PROT_EXECUTE)
991280632Sian		phdr->p_flags |= PF_X;
992280632Sian
993280632Sian	phc->offset += phdr->p_filesz;
994280632Sian	phc->phdr++;
995280632Sian}
996280632Sian
997280632Sian/*
998280632Sian * A callback for each_writable_segment() to gather information about
999280632Sian * the number of segments and their total size.
1000280632Sian */
1001280632Sianstatic void
1002280632Siancb_size_segment(entry, closure)
1003280632Sian	vm_map_entry_t entry;
1004280632Sian	void *closure;
1005280632Sian{
1006280632Sian	struct sseg_closure *ssc = (struct sseg_closure *)closure;
1007280632Sian
1008280632Sian	ssc->count++;
1009280632Sian	ssc->size += entry->end - entry->start;
1010280632Sian}
1011280632Sian
1012280632Sian/*
1013280632Sian * For each writable segment in the process's memory map, call the given
1014280632Sian * function with a pointer to the map entry and some arbitrary
1015280632Sian * caller-supplied data.
1016280632Sian */
1017280632Sianstatic void
1018280632Sianeach_writable_segment(td, func, closure)
1019280632Sian	struct thread *td;
1020280632Sian	segment_callback func;
1021280632Sian	void *closure;
1022280632Sian{
1023280632Sian	struct proc *p = td->td_proc;
1024280632Sian	vm_map_t map = &p->p_vmspace->vm_map;
1025280632Sian	vm_map_entry_t entry;
1026280632Sian
1027280632Sian	for (entry = map->header.next; entry != &map->header;
1028280632Sian	    entry = entry->next) {
1029280632Sian		vm_object_t obj;
1030191673Sjamie
1031298833Sjamie		/*
1032191673Sjamie		 * Don't dump inaccessible mappings, deal with legacy
1033185435Sbz		 * coredump mode.
1034191673Sjamie		 *
1035298833Sjamie		 * Note that read-only segments related to the elf binary
1036196835Sjamie		 * are marked MAP_ENTRY_NOCOREDUMP now so we no longer
1037196835Sjamie		 * need to arbitrarily ignore such segments.
1038196835Sjamie		 */
1039196835Sjamie		if (elf_legacy_coredump) {
1040196835Sjamie			if ((entry->protection & VM_PROT_RW) != VM_PROT_RW)
1041196835Sjamie				continue;
1042298833Sjamie		} else {
1043191673Sjamie			if ((entry->protection & VM_PROT_ALL) == 0)
1044192895Sjamie				continue;
1045192895Sjamie		}
1046192895Sjamie
1047192895Sjamie		/*
1048192895Sjamie		 * Dont include memory segment in the coredump if
1049192895Sjamie		 * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in
1050192895Sjamie		 * madvise(2).  Do not dump submaps (i.e. parts of the
1051191673Sjamie		 * kernel map).
1052191673Sjamie		 */
1053191673Sjamie		if (entry->eflags & (MAP_ENTRY_NOCOREDUMP|MAP_ENTRY_IS_SUB_MAP))
1054191673Sjamie			continue;
1055191673Sjamie
1056191673Sjamie		if ((obj = entry->object.vm_object) == NULL)
1057191673Sjamie			continue;
1058192895Sjamie
1059191673Sjamie		/* Find the deepest backing object. */
1060191673Sjamie		while (obj->backing_object != NULL)
1061191673Sjamie			obj = obj->backing_object;
1062191673Sjamie
1063191673Sjamie		/* Ignore memory-mapped devices and such things. */
1064191673Sjamie		if (obj->type != OBJT_DEFAULT &&
1065191673Sjamie		    obj->type != OBJT_SWAP &&
1066191673Sjamie		    obj->type != OBJT_VNODE)
1067192895Sjamie			continue;
1068192895Sjamie
1069192895Sjamie		(*func)(entry, closure);
1070192895Sjamie	}
1071191673Sjamie}
1072191673Sjamie
1073191673Sjamie/*
1074191673Sjamie * Write the core file header to the file, including padding up to
1075191673Sjamie * the page boundary.
1076191673Sjamie */
1077191673Sjamiestatic int
1078191673Sjamie__elfN(corehdr)(td, vp, cred, numsegs, hdr, hdrsize)
1079191673Sjamie	struct thread *td;
1080191673Sjamie	struct vnode *vp;
1081191673Sjamie	struct ucred *cred;
1082191673Sjamie	int numsegs;
1083191673Sjamie	size_t hdrsize;
1084191673Sjamie	void *hdr;
1085191673Sjamie{
1086191673Sjamie	size_t off;
1087191673Sjamie
1088192895Sjamie	/* Fill in the header. */
1089191673Sjamie	bzero(hdr, hdrsize);
1090191673Sjamie	off = 0;
1091191673Sjamie	__elfN(puthdr)(td, hdr, &off, numsegs);
1092191673Sjamie
1093191673Sjamie	/* Write it to the core file. */
1094191673Sjamie	return (vn_rdwr_inchunks(UIO_WRITE, vp, hdr, hdrsize, (off_t)0,
1095191673Sjamie	    UIO_SYSSPACE, IO_UNIT | IO_DIRECT, cred, NOCRED, NULL,
1096191673Sjamie	    td)); /* XXXKSE */
1097191673Sjamie}
1098191673Sjamie
1099191673Sjamie#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
1100191673Sjamietypedef struct prstatus32 elf_prstatus_t;
1101191673Sjamietypedef struct prpsinfo32 elf_prpsinfo_t;
1102191673Sjamietypedef struct fpreg32 elf_prfpregset_t;
1103191673Sjamietypedef struct fpreg32 elf_fpregset_t;
1104191673Sjamietypedef struct reg32 elf_gregset_t;
1105191673Sjamie#else
1106191673Sjamietypedef prstatus_t elf_prstatus_t;
1107191673Sjamietypedef prpsinfo_t elf_prpsinfo_t;
1108298833Sjamietypedef prfpregset_t elf_prfpregset_t;
1109191673Sjamietypedef prfpregset_t elf_fpregset_t;
1110196835Sjamietypedef gregset_t elf_gregset_t;
1111196835Sjamie#endif
1112196835Sjamie
1113196835Sjamiestatic void
1114192895Sjamie__elfN(puthdr)(struct thread *td, void *dst, size_t *off, int numsegs)
1115192895Sjamie{
1116192895Sjamie	struct {
1117192895Sjamie		elf_prstatus_t status;
1118192895Sjamie		elf_prfpregset_t fpregset;
1119192895Sjamie		elf_prpsinfo_t psinfo;
1120196835Sjamie	} *tempdata;
1121196835Sjamie	elf_prstatus_t *status;
1122192895Sjamie	elf_prfpregset_t *fpregset;
1123192895Sjamie	elf_prpsinfo_t *psinfo;
1124192895Sjamie	struct proc *p;
1125192895Sjamie	struct thread *thr;
1126192895Sjamie	size_t ehoff, noteoff, notesz, phoff;
1127192895Sjamie
1128192895Sjamie	p = td->td_proc;
1129298833Sjamie
1130192895Sjamie	ehoff = *off;
1131192895Sjamie	*off += sizeof(Elf_Ehdr);
1132192895Sjamie
1133192895Sjamie	phoff = *off;
1134192895Sjamie	*off += (numsegs + 1) * sizeof(Elf_Phdr);
1135192895Sjamie
1136192895Sjamie	noteoff = *off;
1137192895Sjamie	/*
1138298833Sjamie	 * Don't allocate space for the notes if we're just calculating
1139192895Sjamie	 * the size of the header. We also don't collect the data.
1140298833Sjamie	 */
1141192895Sjamie	if (dst != NULL) {
1142298833Sjamie		tempdata = malloc(sizeof(*tempdata), M_TEMP, M_ZERO|M_WAITOK);
1143298833Sjamie		status = &tempdata->status;
1144192895Sjamie		fpregset = &tempdata->fpregset;
1145192895Sjamie		psinfo = &tempdata->psinfo;
1146191673Sjamie	} else {
1147192895Sjamie		tempdata = NULL;
1148191673Sjamie		status = NULL;
1149298833Sjamie		fpregset = NULL;
1150191673Sjamie		psinfo = NULL;
1151191673Sjamie	}
1152191673Sjamie
1153191673Sjamie	if (dst != NULL) {
1154191673Sjamie		psinfo->pr_version = PRPSINFO_VERSION;
1155191673Sjamie		psinfo->pr_psinfosz = sizeof(elf_prpsinfo_t);
1156191673Sjamie		strlcpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname));
1157191673Sjamie		/*
1158191673Sjamie		 * XXX - We don't fill in the command line arguments properly
1159191673Sjamie		 * yet.
1160191673Sjamie		 */
1161191673Sjamie		strlcpy(psinfo->pr_psargs, p->p_comm,
1162191673Sjamie		    sizeof(psinfo->pr_psargs));
1163191673Sjamie	}
1164191673Sjamie	__elfN(putnote)(dst, off, "FreeBSD", NT_PRPSINFO, psinfo,
1165191673Sjamie	    sizeof *psinfo);
1166191673Sjamie
1167191673Sjamie	/*
1168191673Sjamie	 * To have the debugger select the right thread (LWP) as the initial
1169192895Sjamie	 * thread, we dump the state of the thread passed to us in td first.
1170191673Sjamie	 * This is the thread that causes the core dump and thus likely to
1171191673Sjamie	 * be the right thread one wants to have selected in the debugger.
1172191673Sjamie	 */
1173191673Sjamie	thr = td;
1174191673Sjamie	while (thr != NULL) {
1175191673Sjamie		if (dst != NULL) {
1176191673Sjamie			status->pr_version = PRSTATUS_VERSION;
1177191673Sjamie			status->pr_statussz = sizeof(elf_prstatus_t);
1178191673Sjamie			status->pr_gregsetsz = sizeof(elf_gregset_t);
1179191673Sjamie			status->pr_fpregsetsz = sizeof(elf_fpregset_t);
1180191673Sjamie			status->pr_osreldate = osreldate;
1181191673Sjamie			status->pr_cursig = p->p_sig;
1182191673Sjamie			status->pr_pid = thr->td_tid;
1183191673Sjamie#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
1184191673Sjamie			fill_regs32(thr, &status->pr_reg);
1185191673Sjamie			fill_fpregs32(thr, fpregset);
1186191673Sjamie#else
1187191673Sjamie			fill_regs(thr, &status->pr_reg);
1188191673Sjamie			fill_fpregs(thr, fpregset);
1189191673Sjamie#endif
1190191673Sjamie		}
1191191673Sjamie		__elfN(putnote)(dst, off, "FreeBSD", NT_PRSTATUS, status,
1192191673Sjamie		    sizeof *status);
1193191673Sjamie		__elfN(putnote)(dst, off, "FreeBSD", NT_FPREGSET, fpregset,
1194191673Sjamie		    sizeof *fpregset);
1195191673Sjamie		/*
1196191673Sjamie		 * Allow for MD specific notes, as well as any MD
1197191673Sjamie		 * specific preparations for writing MI notes.
1198191673Sjamie		 */
1199191673Sjamie		__elfN(dump_thread)(thr, dst, off);
1200191673Sjamie
1201191673Sjamie		thr = (thr == td) ? TAILQ_FIRST(&p->p_threads) :
1202191673Sjamie		    TAILQ_NEXT(thr, td_plist);
1203191673Sjamie		if (thr == td)
1204191673Sjamie			thr = TAILQ_NEXT(thr, td_plist);
1205191673Sjamie	}
1206191673Sjamie
1207191673Sjamie	notesz = *off - noteoff;
1208191673Sjamie
1209191673Sjamie	if (dst != NULL)
1210191673Sjamie		free(tempdata, M_TEMP);
1211191673Sjamie
1212185435Sbz	/* Align up to a page boundary for the program segments. */
1213191673Sjamie	*off = round_page(*off);
1214191673Sjamie
1215194762Sjamie	if (dst != NULL) {
1216194762Sjamie		Elf_Ehdr *ehdr;
1217194762Sjamie		Elf_Phdr *phdr;
1218194762Sjamie		struct phdr_closure phc;
1219194762Sjamie
1220194762Sjamie		/*
1221191673Sjamie		 * Fill in the ELF header.
1222192895Sjamie		 */
1223298832Sjamie		ehdr = (Elf_Ehdr *)((char *)dst + ehoff);
1224192895Sjamie		ehdr->e_ident[EI_MAG0] = ELFMAG0;
1225192895Sjamie		ehdr->e_ident[EI_MAG1] = ELFMAG1;
1226298833Sjamie		ehdr->e_ident[EI_MAG2] = ELFMAG2;
1227298833Sjamie		ehdr->e_ident[EI_MAG3] = ELFMAG3;
1228192895Sjamie		ehdr->e_ident[EI_CLASS] = ELF_CLASS;
1229192895Sjamie		ehdr->e_ident[EI_DATA] = ELF_DATA;
1230192895Sjamie		ehdr->e_ident[EI_VERSION] = EV_CURRENT;
1231192895Sjamie		ehdr->e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
1232192895Sjamie		ehdr->e_ident[EI_ABIVERSION] = 0;
1233191673Sjamie		ehdr->e_ident[EI_PAD] = 0;
1234191673Sjamie		ehdr->e_type = ET_CORE;
1235191673Sjamie#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
1236191673Sjamie		ehdr->e_machine = EM_386;
1237191673Sjamie#else
1238191673Sjamie		ehdr->e_machine = ELF_ARCH;
1239191673Sjamie#endif
1240191673Sjamie		ehdr->e_version = EV_CURRENT;
1241191673Sjamie		ehdr->e_entry = 0;
1242191673Sjamie		ehdr->e_phoff = phoff;
1243191673Sjamie		ehdr->e_flags = 0;
1244191673Sjamie		ehdr->e_ehsize = sizeof(Elf_Ehdr);
1245191673Sjamie		ehdr->e_phentsize = sizeof(Elf_Phdr);
1246191673Sjamie		ehdr->e_phnum = numsegs + 1;
1247191673Sjamie		ehdr->e_shentsize = sizeof(Elf_Shdr);
1248191673Sjamie		ehdr->e_shnum = 0;
1249191673Sjamie		ehdr->e_shstrndx = SHN_UNDEF;
1250191673Sjamie
1251191673Sjamie		/*
1252192895Sjamie		 * Fill in the program header entries.
1253192895Sjamie		 */
1254192895Sjamie		phdr = (Elf_Phdr *)((char *)dst + phoff);
1255191673Sjamie
1256191673Sjamie		/* The note segement. */
1257191673Sjamie		phdr->p_type = PT_NOTE;
1258191673Sjamie		phdr->p_offset = noteoff;
1259191673Sjamie		phdr->p_vaddr = 0;
1260191673Sjamie		phdr->p_paddr = 0;
1261191673Sjamie		phdr->p_filesz = notesz;
1262191673Sjamie		phdr->p_memsz = 0;
1263191673Sjamie		phdr->p_flags = 0;
1264191673Sjamie		phdr->p_align = 0;
1265191673Sjamie		phdr++;
1266191673Sjamie
1267191673Sjamie		/* All the writable segments from the program. */
1268191673Sjamie		phc.phdr = phdr;
1269191673Sjamie		phc.offset = *off;
1270191673Sjamie		each_writable_segment(td, cb_put_phdr, &phc);
1271191673Sjamie	}
1272191673Sjamie}
1273192895Sjamie
1274192895Sjamiestatic void
1275194762Sjamie__elfN(putnote)(void *dst, size_t *off, const char *name, int type,
1276185435Sbz    const void *desc, size_t descsz)
1277192895Sjamie{
1278191673Sjamie	Elf_Note note;
1279192895Sjamie
1280192895Sjamie	note.n_namesz = strlen(name) + 1;
1281298833Sjamie	note.n_descsz = descsz;
1282298833Sjamie	note.n_type = type;
1283191673Sjamie	if (dst != NULL)
1284191673Sjamie		bcopy(&note, (char *)dst + *off, sizeof note);
1285192895Sjamie	*off += sizeof note;
1286191673Sjamie	if (dst != NULL)
1287191673Sjamie		bcopy(name, (char *)dst + *off, note.n_namesz);
1288195944Sjamie	*off += roundup2(note.n_namesz, sizeof(Elf_Size));
1289195944Sjamie	if (dst != NULL)
1290195945Sjamie		bcopy(desc, (char *)dst + *off, note.n_descsz);
1291195945Sjamie	*off += roundup2(note.n_descsz, sizeof(Elf_Size));
1292195945Sjamie}
1293195945Sjamie
1294195945Sjamie/*
1295192895Sjamie * Tell kern_execve.c about it, with a little help from the linker.
1296195974Sjamie */
1297195974Sjamiestatic struct execsw __elfN(execsw) = {
1298195974Sjamie	__CONCAT(exec_, __elfN(imgact)),
1299195974Sjamie	__XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
1300195974Sjamie};
1301195974SjamieEXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw));
1302195974Sjamie