imgact_elf.c revision 186225
11573Srgrimes/*-
21573Srgrimes * Copyright (c) 2000 David O'Brien
31573Srgrimes * Copyright (c) 1995-1996 S�ren Schmidt
41573Srgrimes * Copyright (c) 1996 Peter Wemm
51573Srgrimes * All rights reserved.
61573Srgrimes *
71573Srgrimes * Redistribution and use in source and binary forms, with or without
81573Srgrimes * modification, are permitted provided that the following conditions
91573Srgrimes * are met:
101573Srgrimes * 1. Redistributions of source code must retain the above copyright
111573Srgrimes *    notice, this list of conditions and the following disclaimer
121573Srgrimes *    in this position and unchanged.
131573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
141573Srgrimes *    notice, this list of conditions and the following disclaimer in the
151573Srgrimes *    documentation and/or other materials provided with the distribution.
161573Srgrimes * 3. The name of the author may not be used to endorse or promote products
171573Srgrimes *    derived from this software without specific prior written permission
181573Srgrimes *
191573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
201573Srgrimes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
211573Srgrimes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
221573Srgrimes * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
231573Srgrimes * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
241573Srgrimes * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
251573Srgrimes * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
261573Srgrimes * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
271573Srgrimes * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
281573Srgrimes * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
291573Srgrimes */
301573Srgrimes
311573Srgrimes#include <sys/cdefs.h>
321573Srgrimes__FBSDID("$FreeBSD: head/sys/kern/imgact_elf.c 186225 2008-12-17 13:13:35Z kib $");
331573Srgrimes
341573Srgrimes#include "opt_compat.h"
351573Srgrimes
361573Srgrimes#include <sys/param.h>
3792986Sobrien#include <sys/exec.h>
3892986Sobrien#include <sys/fcntl.h>
391573Srgrimes#include <sys/imgact.h>
401573Srgrimes#include <sys/imgact_elf.h>
411573Srgrimes#include <sys/kernel.h>
421573Srgrimes#include <sys/lock.h>
431573Srgrimes#include <sys/malloc.h>
441573Srgrimes#include <sys/mount.h>
451573Srgrimes#include <sys/mutex.h>
461573Srgrimes#include <sys/mman.h>
471573Srgrimes#include <sys/namei.h>
4811659Sphk#include <sys/pioctl.h>
491573Srgrimes#include <sys/proc.h>
50111010Snectar#include <sys/procfs.h>
51111010Snectar#include <sys/resourcevar.h>
52111010Snectar#include <sys/sf_buf.h>
531573Srgrimes#include <sys/systm.h>
541573Srgrimes#include <sys/signalvar.h>
551573Srgrimes#include <sys/stat.h>
561573Srgrimes#include <sys/sx.h>
571573Srgrimes#include <sys/syscall.h>
581573Srgrimes#include <sys/sysctl.h>
591573Srgrimes#include <sys/sysent.h>
601573Srgrimes#include <sys/vnode.h>
611573Srgrimes
621573Srgrimes#include <vm/vm.h>
631573Srgrimes#include <vm/vm_kern.h>
641573Srgrimes#include <vm/vm_param.h>
651573Srgrimes#include <vm/pmap.h>
661573Srgrimes#include <vm/vm_map.h>
671573Srgrimes#include <vm/vm_object.h>
681573Srgrimes#include <vm/vm_extern.h>
691573Srgrimes
701573Srgrimes#include <machine/elf.h>
711573Srgrimes#include <machine/md_var.h>
721573Srgrimes
731573Srgrimes#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
7442353Sdes#include <machine/fpu.h>
7542353Sdes#include <compat/ia32/ia32_reg.h>
7642353Sdes#endif
7742353Sdes
781573Srgrimes#define OLD_EI_BRAND	8
791573Srgrimes
801573Srgrimesstatic int __elfN(check_header)(const Elf_Ehdr *hdr);
811573Srgrimesstatic Elf_Brandinfo *__elfN(get_brandinfo)(const Elf_Ehdr *hdr,
821573Srgrimes    const char *interp);
831573Srgrimesstatic int __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
8442353Sdes    u_long *entry, size_t pagesize);
8542353Sdesstatic int __elfN(load_section)(struct vmspace *vmspace, vm_object_t object,
8642353Sdes    vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz,
8742353Sdes    vm_prot_t prot, size_t pagesize);
881573Srgrimesstatic int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp);
891573Srgrimes
901573SrgrimesSYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE), CTLFLAG_RW, 0,
911573Srgrimes    "");
921573Srgrimes
931573Srgrimesint __elfN(fallback_brand) = -1;
941573SrgrimesSYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
951573Srgrimes    fallback_brand, CTLFLAG_RW, &__elfN(fallback_brand), 0,
961573Srgrimes    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort");
971573SrgrimesTUNABLE_INT("kern.elf" __XSTRING(__ELF_WORD_SIZE) ".fallback_brand",
981573Srgrimes    &__elfN(fallback_brand));
991573Srgrimes
1001573Srgrimesstatic int elf_trace = 0;
1011573SrgrimesSYSCTL_INT(_debug, OID_AUTO, __elfN(trace), CTLFLAG_RW, &elf_trace, 0, "");
1021573Srgrimes
1031573Srgrimesstatic int elf_legacy_coredump = 0;
1041573SrgrimesSYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW,
1051573Srgrimes    &elf_legacy_coredump, 0, "");
1061573Srgrimes
1071573Srgrimesstatic Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
1081573Srgrimes
1091573Srgrimes#define	trunc_page_ps(va, ps)	((va) & ~(ps - 1))
1101573Srgrimes#define	round_page_ps(va, ps)	(((va) + (ps - 1)) & ~(ps - 1))
1111573Srgrimes#define	aligned(a, t)	(trunc_page_ps((u_long)(a), sizeof(t)) == (u_long)(a))
1121573Srgrimes
1131573Srgrimesint
1141573Srgrimes__elfN(insert_brand_entry)(Elf_Brandinfo *entry)
1151573Srgrimes{
1161573Srgrimes	int i;
1171573Srgrimes
1181573Srgrimes	for (i = 0; i < MAX_BRANDS; i++) {
1191573Srgrimes		if (elf_brand_list[i] == NULL) {
1201573Srgrimes			elf_brand_list[i] = entry;
1211573Srgrimes			break;
1221573Srgrimes		}
1231573Srgrimes	}
1241573Srgrimes	if (i == MAX_BRANDS)
1251573Srgrimes		return (-1);
1261573Srgrimes	return (0);
1271573Srgrimes}
1281573Srgrimes
1291573Srgrimesint
1301573Srgrimes__elfN(remove_brand_entry)(Elf_Brandinfo *entry)
1311573Srgrimes{
1321573Srgrimes	int i;
1331573Srgrimes
1341573Srgrimes	for (i = 0; i < MAX_BRANDS; i++) {
1351573Srgrimes		if (elf_brand_list[i] == entry) {
1361573Srgrimes			elf_brand_list[i] = NULL;
1371573Srgrimes			break;
1381573Srgrimes		}
1391573Srgrimes	}
1401573Srgrimes	if (i == MAX_BRANDS)
1411573Srgrimes		return (-1);
1421573Srgrimes	return (0);
1431573Srgrimes}
1441573Srgrimes
1451573Srgrimesint
1461573Srgrimes__elfN(brand_inuse)(Elf_Brandinfo *entry)
1471573Srgrimes{
1481573Srgrimes	struct proc *p;
1491573Srgrimes	int rval = FALSE;
1501573Srgrimes
1511573Srgrimes	sx_slock(&allproc_lock);
1521573Srgrimes	FOREACH_PROC_IN_SYSTEM(p) {
1531573Srgrimes		if (p->p_sysent == entry->sysvec) {
1541573Srgrimes			rval = TRUE;
1551573Srgrimes			break;
1561573Srgrimes		}
1571573Srgrimes	}
1581573Srgrimes	sx_sunlock(&allproc_lock);
1591573Srgrimes
1601573Srgrimes	return (rval);
1611573Srgrimes}
1621573Srgrimes
1631573Srgrimesstatic Elf_Brandinfo *
1641573Srgrimes__elfN(get_brandinfo)(const Elf_Ehdr *hdr, const char *interp)
1651573Srgrimes{
1661573Srgrimes	Elf_Brandinfo *bi;
1671573Srgrimes	int i;
1681573Srgrimes
1691573Srgrimes	/*
1701573Srgrimes	 * We support three types of branding -- (1) the ELF EI_OSABI field
1711573Srgrimes	 * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string
1721573Srgrimes	 * branding w/in the ELF header, and (3) path of the `interp_path'
1731573Srgrimes	 * field.  We should also look for an ".note.ABI-tag" ELF section now
1741573Srgrimes	 * in all Linux ELF binaries, FreeBSD 4.1+, and some NetBSD ones.
1751573Srgrimes	 */
1761573Srgrimes
1771573Srgrimes	/* If the executable has a brand, search for it in the brand list. */
1781573Srgrimes	for (i = 0; i < MAX_BRANDS; i++) {
1791573Srgrimes		bi = elf_brand_list[i];
1801573Srgrimes		if (bi != NULL && hdr->e_machine == bi->machine &&
1811573Srgrimes		    (hdr->e_ident[EI_OSABI] == bi->brand ||
1821573Srgrimes		    strncmp((const char *)&hdr->e_ident[OLD_EI_BRAND],
1831573Srgrimes		    bi->compat_3_brand, strlen(bi->compat_3_brand)) == 0))
1841573Srgrimes			return (bi);
1851573Srgrimes	}
1861573Srgrimes
1871573Srgrimes	/* Lacking a known brand, search for a recognized interpreter. */
188	if (interp != NULL) {
189		for (i = 0; i < MAX_BRANDS; i++) {
190			bi = elf_brand_list[i];
191			if (bi != NULL && hdr->e_machine == bi->machine &&
192			    strcmp(interp, bi->interp_path) == 0)
193				return (bi);
194		}
195	}
196
197	/* Lacking a recognized interpreter, try the default brand */
198	for (i = 0; i < MAX_BRANDS; i++) {
199		bi = elf_brand_list[i];
200		if (bi != NULL && hdr->e_machine == bi->machine &&
201		    __elfN(fallback_brand) == bi->brand)
202			return (bi);
203	}
204	return (NULL);
205}
206
207static int
208__elfN(check_header)(const Elf_Ehdr *hdr)
209{
210	Elf_Brandinfo *bi;
211	int i;
212
213	if (!IS_ELF(*hdr) ||
214	    hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
215	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
216	    hdr->e_ident[EI_VERSION] != EV_CURRENT ||
217	    hdr->e_phentsize != sizeof(Elf_Phdr) ||
218	    hdr->e_version != ELF_TARG_VER)
219		return (ENOEXEC);
220
221	/*
222	 * Make sure we have at least one brand for this machine.
223	 */
224
225	for (i = 0; i < MAX_BRANDS; i++) {
226		bi = elf_brand_list[i];
227		if (bi != NULL && bi->machine == hdr->e_machine)
228			break;
229	}
230	if (i == MAX_BRANDS)
231		return (ENOEXEC);
232
233	return (0);
234}
235
236static int
237__elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
238    vm_offset_t start, vm_offset_t end, vm_prot_t prot)
239{
240	struct sf_buf *sf;
241	int error;
242	vm_offset_t off;
243
244	/*
245	 * Create the page if it doesn't exist yet. Ignore errors.
246	 */
247	vm_map_lock(map);
248	vm_map_insert(map, NULL, 0, trunc_page(start), round_page(end),
249	    VM_PROT_ALL, VM_PROT_ALL, 0);
250	vm_map_unlock(map);
251
252	/*
253	 * Find the page from the underlying object.
254	 */
255	if (object) {
256		sf = vm_imgact_map_page(object, offset);
257		if (sf == NULL)
258			return (KERN_FAILURE);
259		off = offset - trunc_page(offset);
260		error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start,
261		    end - start);
262		vm_imgact_unmap_page(sf);
263		if (error) {
264			return (KERN_FAILURE);
265		}
266	}
267
268	return (KERN_SUCCESS);
269}
270
271static int
272__elfN(map_insert)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
273    vm_offset_t start, vm_offset_t end, vm_prot_t prot, int cow)
274{
275	struct sf_buf *sf;
276	vm_offset_t off;
277	vm_size_t sz;
278	int error, rv;
279
280	if (start != trunc_page(start)) {
281		rv = __elfN(map_partial)(map, object, offset, start,
282		    round_page(start), prot);
283		if (rv)
284			return (rv);
285		offset += round_page(start) - start;
286		start = round_page(start);
287	}
288	if (end != round_page(end)) {
289		rv = __elfN(map_partial)(map, object, offset +
290		    trunc_page(end) - start, trunc_page(end), end, prot);
291		if (rv)
292			return (rv);
293		end = trunc_page(end);
294	}
295	if (end > start) {
296		if (offset & PAGE_MASK) {
297			/*
298			 * The mapping is not page aligned. This means we have
299			 * to copy the data. Sigh.
300			 */
301			rv = vm_map_find(map, NULL, 0, &start, end - start,
302			    FALSE, prot | VM_PROT_WRITE, VM_PROT_ALL, 0);
303			if (rv)
304				return (rv);
305			if (object == NULL)
306				return (KERN_SUCCESS);
307			for (; start < end; start += sz) {
308				sf = vm_imgact_map_page(object, offset);
309				if (sf == NULL)
310					return (KERN_FAILURE);
311				off = offset - trunc_page(offset);
312				sz = end - start;
313				if (sz > PAGE_SIZE - off)
314					sz = PAGE_SIZE - off;
315				error = copyout((caddr_t)sf_buf_kva(sf) + off,
316				    (caddr_t)start, sz);
317				vm_imgact_unmap_page(sf);
318				if (error) {
319					return (KERN_FAILURE);
320				}
321				offset += sz;
322			}
323			rv = KERN_SUCCESS;
324		} else {
325			vm_object_reference(object);
326			vm_map_lock(map);
327			rv = vm_map_insert(map, object, offset, start, end,
328			    prot, VM_PROT_ALL, cow);
329			vm_map_unlock(map);
330			if (rv != KERN_SUCCESS)
331				vm_object_deallocate(object);
332		}
333		return (rv);
334	} else {
335		return (KERN_SUCCESS);
336	}
337}
338
339static int
340__elfN(load_section)(struct vmspace *vmspace,
341	vm_object_t object, vm_offset_t offset,
342	caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot,
343	size_t pagesize)
344{
345	struct sf_buf *sf;
346	size_t map_len;
347	vm_offset_t map_addr;
348	int error, rv, cow;
349	size_t copy_len;
350	vm_offset_t file_addr;
351
352	/*
353	 * It's necessary to fail if the filsz + offset taken from the
354	 * header is greater than the actual file pager object's size.
355	 * If we were to allow this, then the vm_map_find() below would
356	 * walk right off the end of the file object and into the ether.
357	 *
358	 * While I'm here, might as well check for something else that
359	 * is invalid: filsz cannot be greater than memsz.
360	 */
361	if ((off_t)filsz + offset > object->un_pager.vnp.vnp_size ||
362	    filsz > memsz) {
363		uprintf("elf_load_section: truncated ELF file\n");
364		return (ENOEXEC);
365	}
366
367	map_addr = trunc_page_ps((vm_offset_t)vmaddr, pagesize);
368	file_addr = trunc_page_ps(offset, pagesize);
369
370	/*
371	 * We have two choices.  We can either clear the data in the last page
372	 * of an oversized mapping, or we can start the anon mapping a page
373	 * early and copy the initialized data into that first page.  We
374	 * choose the second..
375	 */
376	if (memsz > filsz)
377		map_len = trunc_page_ps(offset + filsz, pagesize) - file_addr;
378	else
379		map_len = round_page_ps(offset + filsz, pagesize) - file_addr;
380
381	if (map_len != 0) {
382		/* cow flags: don't dump readonly sections in core */
383		cow = MAP_COPY_ON_WRITE | MAP_PREFAULT |
384		    (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP);
385
386		rv = __elfN(map_insert)(&vmspace->vm_map,
387				      object,
388				      file_addr,	/* file offset */
389				      map_addr,		/* virtual start */
390				      map_addr + map_len,/* virtual end */
391				      prot,
392				      cow);
393		if (rv != KERN_SUCCESS)
394			return (EINVAL);
395
396		/* we can stop now if we've covered it all */
397		if (memsz == filsz) {
398			return (0);
399		}
400	}
401
402
403	/*
404	 * We have to get the remaining bit of the file into the first part
405	 * of the oversized map segment.  This is normally because the .data
406	 * segment in the file is extended to provide bss.  It's a neat idea
407	 * to try and save a page, but it's a pain in the behind to implement.
408	 */
409	copy_len = (offset + filsz) - trunc_page_ps(offset + filsz, pagesize);
410	map_addr = trunc_page_ps((vm_offset_t)vmaddr + filsz, pagesize);
411	map_len = round_page_ps((vm_offset_t)vmaddr + memsz, pagesize) -
412	    map_addr;
413
414	/* This had damn well better be true! */
415	if (map_len != 0) {
416		rv = __elfN(map_insert)(&vmspace->vm_map, NULL, 0, map_addr,
417		    map_addr + map_len, VM_PROT_ALL, 0);
418		if (rv != KERN_SUCCESS) {
419			return (EINVAL);
420		}
421	}
422
423	if (copy_len != 0) {
424		vm_offset_t off;
425
426		sf = vm_imgact_map_page(object, offset + filsz);
427		if (sf == NULL)
428			return (EIO);
429
430		/* send the page fragment to user space */
431		off = trunc_page_ps(offset + filsz, pagesize) -
432		    trunc_page(offset + filsz);
433		error = copyout((caddr_t)sf_buf_kva(sf) + off,
434		    (caddr_t)map_addr, copy_len);
435		vm_imgact_unmap_page(sf);
436		if (error) {
437			return (error);
438		}
439	}
440
441	/*
442	 * set it to the specified protection.
443	 * XXX had better undo the damage from pasting over the cracks here!
444	 */
445	vm_map_protect(&vmspace->vm_map, trunc_page(map_addr),
446	    round_page(map_addr + map_len),  prot, FALSE);
447
448	return (0);
449}
450
451/*
452 * Load the file "file" into memory.  It may be either a shared object
453 * or an executable.
454 *
455 * The "addr" reference parameter is in/out.  On entry, it specifies
456 * the address where a shared object should be loaded.  If the file is
457 * an executable, this value is ignored.  On exit, "addr" specifies
458 * where the file was actually loaded.
459 *
460 * The "entry" reference parameter is out only.  On exit, it specifies
461 * the entry point for the loaded file.
462 */
463static int
464__elfN(load_file)(struct proc *p, const char *file, u_long *addr,
465	u_long *entry, size_t pagesize)
466{
467	struct {
468		struct nameidata nd;
469		struct vattr attr;
470		struct image_params image_params;
471	} *tempdata;
472	const Elf_Ehdr *hdr = NULL;
473	const Elf_Phdr *phdr = NULL;
474	struct nameidata *nd;
475	struct vmspace *vmspace = p->p_vmspace;
476	struct vattr *attr;
477	struct image_params *imgp;
478	vm_prot_t prot;
479	u_long rbase;
480	u_long base_addr = 0;
481	int vfslocked, error, i, numsegs;
482
483	tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK);
484	nd = &tempdata->nd;
485	attr = &tempdata->attr;
486	imgp = &tempdata->image_params;
487
488	/*
489	 * Initialize part of the common data
490	 */
491	imgp->proc = p;
492	imgp->attr = attr;
493	imgp->firstpage = NULL;
494	imgp->image_header = NULL;
495	imgp->object = NULL;
496	imgp->execlabel = NULL;
497
498	NDINIT(nd, LOOKUP, MPSAFE|LOCKLEAF|FOLLOW, UIO_SYSSPACE, file,
499	    curthread);
500	vfslocked = 0;
501	if ((error = namei(nd)) != 0) {
502		nd->ni_vp = NULL;
503		goto fail;
504	}
505	vfslocked = NDHASGIANT(nd);
506	NDFREE(nd, NDF_ONLY_PNBUF);
507	imgp->vp = nd->ni_vp;
508
509	/*
510	 * Check permissions, modes, uid, etc on the file, and "open" it.
511	 */
512	error = exec_check_permissions(imgp);
513	if (error)
514		goto fail;
515
516	error = exec_map_first_page(imgp);
517	if (error)
518		goto fail;
519
520	/*
521	 * Also make certain that the interpreter stays the same, so set
522	 * its VV_TEXT flag, too.
523	 */
524	nd->ni_vp->v_vflag |= VV_TEXT;
525
526	imgp->object = nd->ni_vp->v_object;
527
528	hdr = (const Elf_Ehdr *)imgp->image_header;
529	if ((error = __elfN(check_header)(hdr)) != 0)
530		goto fail;
531	if (hdr->e_type == ET_DYN)
532		rbase = *addr;
533	else if (hdr->e_type == ET_EXEC)
534		rbase = 0;
535	else {
536		error = ENOEXEC;
537		goto fail;
538	}
539
540	/* Only support headers that fit within first page for now      */
541	/*    (multiplication of two Elf_Half fields will not overflow) */
542	if ((hdr->e_phoff > PAGE_SIZE) ||
543	    (hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE - hdr->e_phoff) {
544		error = ENOEXEC;
545		goto fail;
546	}
547
548	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
549	if (!aligned(phdr, Elf_Addr)) {
550		error = ENOEXEC;
551		goto fail;
552	}
553
554	for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) {
555		if (phdr[i].p_type == PT_LOAD) {	/* Loadable segment */
556			prot = 0;
557			if (phdr[i].p_flags & PF_X)
558  				prot |= VM_PROT_EXECUTE;
559			if (phdr[i].p_flags & PF_W)
560  				prot |= VM_PROT_WRITE;
561			if (phdr[i].p_flags & PF_R)
562  				prot |= VM_PROT_READ;
563
564			if ((error = __elfN(load_section)(vmspace,
565			    imgp->object, phdr[i].p_offset,
566			    (caddr_t)(uintptr_t)phdr[i].p_vaddr + rbase,
567			    phdr[i].p_memsz, phdr[i].p_filesz, prot,
568			    pagesize)) != 0)
569				goto fail;
570			/*
571			 * Establish the base address if this is the
572			 * first segment.
573			 */
574			if (numsegs == 0)
575  				base_addr = trunc_page(phdr[i].p_vaddr +
576				    rbase);
577			numsegs++;
578		}
579	}
580	*addr = base_addr;
581	*entry = (unsigned long)hdr->e_entry + rbase;
582
583fail:
584	if (imgp->firstpage)
585		exec_unmap_first_page(imgp);
586
587	if (nd->ni_vp)
588		vput(nd->ni_vp);
589
590	VFS_UNLOCK_GIANT(vfslocked);
591	free(tempdata, M_TEMP);
592
593	return (error);
594}
595
596static const char FREEBSD_ABI_VENDOR[] = "FreeBSD";
597
598static int
599__CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
600{
601	const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
602	const Elf_Phdr *phdr, *pnote = NULL;
603	Elf_Auxargs *elf_auxargs;
604	struct vmspace *vmspace;
605	vm_prot_t prot;
606	u_long text_size = 0, data_size = 0, total_size = 0;
607	u_long text_addr = 0, data_addr = 0;
608	u_long seg_size, seg_addr;
609	u_long addr, entry = 0, proghdr = 0;
610	int error = 0, i;
611	const char *interp = NULL, *newinterp = NULL;
612	Elf_Brandinfo *brand_info;
613	const Elf_Note *note, *note_end;
614	char *path;
615	const char *note_name;
616	struct sysentvec *sv;
617
618	/*
619	 * Do we have a valid ELF header ?
620	 *
621	 * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later
622	 * if particular brand doesn't support it.
623	 */
624	if (__elfN(check_header)(hdr) != 0 ||
625	    (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN))
626		return (-1);
627
628	/*
629	 * From here on down, we return an errno, not -1, as we've
630	 * detected an ELF file.
631	 */
632
633	if ((hdr->e_phoff > PAGE_SIZE) ||
634	    (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) {
635		/* Only support headers in first page for now */
636		return (ENOEXEC);
637	}
638	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
639	if (!aligned(phdr, Elf_Addr))
640		return (ENOEXEC);
641	for (i = 0; i < hdr->e_phnum; i++) {
642		if (phdr[i].p_type == PT_INTERP) {
643			/* Path to interpreter */
644			if (phdr[i].p_filesz > MAXPATHLEN ||
645			    phdr[i].p_offset + phdr[i].p_filesz > PAGE_SIZE)
646				return (ENOEXEC);
647			interp = imgp->image_header + phdr[i].p_offset;
648			break;
649		}
650	}
651
652	brand_info = __elfN(get_brandinfo)(hdr, interp);
653	if (brand_info == NULL) {
654		uprintf("ELF binary type \"%u\" not known.\n",
655		    hdr->e_ident[EI_OSABI]);
656		return (ENOEXEC);
657	}
658	if (hdr->e_type == ET_DYN &&
659	    (brand_info->flags & BI_CAN_EXEC_DYN) == 0)
660		return (ENOEXEC);
661	sv = brand_info->sysvec;
662	if (interp != NULL && brand_info->interp_newpath != NULL)
663		newinterp = brand_info->interp_newpath;
664
665	/*
666	 * Avoid a possible deadlock if the current address space is destroyed
667	 * and that address space maps the locked vnode.  In the common case,
668	 * the locked vnode's v_usecount is decremented but remains greater
669	 * than zero.  Consequently, the vnode lock is not needed by vrele().
670	 * However, in cases where the vnode lock is external, such as nullfs,
671	 * v_usecount may become zero.
672	 */
673	VOP_UNLOCK(imgp->vp, 0);
674
675	error = exec_new_vmspace(imgp, sv);
676	imgp->proc->p_sysent = sv;
677
678	vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
679	if (error)
680		return (error);
681
682	vmspace = imgp->proc->p_vmspace;
683
684	for (i = 0; i < hdr->e_phnum; i++) {
685		switch (phdr[i].p_type) {
686		case PT_LOAD:	/* Loadable segment */
687			prot = 0;
688			if (phdr[i].p_flags & PF_X)
689  				prot |= VM_PROT_EXECUTE;
690			if (phdr[i].p_flags & PF_W)
691  				prot |= VM_PROT_WRITE;
692			if (phdr[i].p_flags & PF_R)
693  				prot |= VM_PROT_READ;
694
695#if defined(__ia64__) && __ELF_WORD_SIZE == 32 && defined(IA32_ME_HARDER)
696			/*
697			 * Some x86 binaries assume read == executable,
698			 * notably the M3 runtime and therefore cvsup
699			 */
700			if (prot & VM_PROT_READ)
701				prot |= VM_PROT_EXECUTE;
702#endif
703
704			if ((error = __elfN(load_section)(vmspace,
705			    imgp->object, phdr[i].p_offset,
706			    (caddr_t)(uintptr_t)phdr[i].p_vaddr,
707			    phdr[i].p_memsz, phdr[i].p_filesz, prot,
708			    sv->sv_pagesize)) != 0)
709				return (error);
710
711			/*
712			 * If this segment contains the program headers,
713			 * remember their virtual address for the AT_PHDR
714			 * aux entry. Static binaries don't usually include
715			 * a PT_PHDR entry.
716			 */
717			if (phdr[i].p_offset == 0 &&
718			    hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize
719				<= phdr[i].p_filesz)
720				proghdr = phdr[i].p_vaddr + hdr->e_phoff;
721
722			seg_addr = trunc_page(phdr[i].p_vaddr);
723			seg_size = round_page(phdr[i].p_memsz +
724			    phdr[i].p_vaddr - seg_addr);
725
726			/*
727			 * Is this .text or .data?  We can't use
728			 * VM_PROT_WRITE or VM_PROT_EXEC, it breaks the
729			 * alpha terribly and possibly does other bad
730			 * things so we stick to the old way of figuring
731			 * it out:  If the segment contains the program
732			 * entry point, it's a text segment, otherwise it
733			 * is a data segment.
734			 *
735			 * Note that obreak() assumes that data_addr +
736			 * data_size == end of data load area, and the ELF
737			 * file format expects segments to be sorted by
738			 * address.  If multiple data segments exist, the
739			 * last one will be used.
740			 */
741			if (hdr->e_entry >= phdr[i].p_vaddr &&
742			    hdr->e_entry < (phdr[i].p_vaddr +
743			    phdr[i].p_memsz)) {
744				text_size = seg_size;
745				text_addr = seg_addr;
746				entry = (u_long)hdr->e_entry;
747			} else {
748				data_size = seg_size;
749				data_addr = seg_addr;
750			}
751			total_size += seg_size;
752			break;
753		case PT_PHDR: 	/* Program header table info */
754			proghdr = phdr[i].p_vaddr;
755			break;
756		case PT_NOTE:
757			pnote = &phdr[i];
758			break;
759		default:
760			break;
761		}
762	}
763
764	if (data_addr == 0 && data_size == 0) {
765		data_addr = text_addr;
766		data_size = text_size;
767	}
768
769	/*
770	 * Check limits.  It should be safe to check the
771	 * limits after loading the segments since we do
772	 * not actually fault in all the segments pages.
773	 */
774	PROC_LOCK(imgp->proc);
775	if (data_size > lim_cur(imgp->proc, RLIMIT_DATA) ||
776	    text_size > maxtsiz ||
777	    total_size > lim_cur(imgp->proc, RLIMIT_VMEM)) {
778		PROC_UNLOCK(imgp->proc);
779		return (ENOMEM);
780	}
781
782	vmspace->vm_tsize = text_size >> PAGE_SHIFT;
783	vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
784	vmspace->vm_dsize = data_size >> PAGE_SHIFT;
785	vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr;
786
787	/*
788	 * We load the dynamic linker where a userland call
789	 * to mmap(0, ...) would put it.  The rationale behind this
790	 * calculation is that it leaves room for the heap to grow to
791	 * its maximum allowed size.
792	 */
793	addr = round_page((vm_offset_t)imgp->proc->p_vmspace->vm_daddr +
794	    lim_max(imgp->proc, RLIMIT_DATA));
795	PROC_UNLOCK(imgp->proc);
796
797	imgp->entry_addr = entry;
798
799	if (interp != NULL) {
800		int have_interp = FALSE;
801		VOP_UNLOCK(imgp->vp, 0);
802		if (brand_info->emul_path != NULL &&
803		    brand_info->emul_path[0] != '\0') {
804			path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
805			snprintf(path, MAXPATHLEN, "%s%s",
806			    brand_info->emul_path, interp);
807			error = __elfN(load_file)(imgp->proc, path, &addr,
808			    &imgp->entry_addr, sv->sv_pagesize);
809			free(path, M_TEMP);
810			if (error == 0)
811				have_interp = TRUE;
812		}
813		if (!have_interp && newinterp != NULL) {
814			error = __elfN(load_file)(imgp->proc, newinterp, &addr,
815			    &imgp->entry_addr, sv->sv_pagesize);
816			if (error == 0)
817				have_interp = TRUE;
818		}
819		if (!have_interp) {
820			error = __elfN(load_file)(imgp->proc, interp, &addr,
821			    &imgp->entry_addr, sv->sv_pagesize);
822		}
823		vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
824		if (error != 0) {
825			uprintf("ELF interpreter %s not found\n", interp);
826			return (error);
827		}
828	}
829
830	/*
831	 * Construct auxargs table (used by the fixup routine)
832	 */
833	elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK);
834	elf_auxargs->execfd = -1;
835	elf_auxargs->phdr = proghdr;
836	elf_auxargs->phent = hdr->e_phentsize;
837	elf_auxargs->phnum = hdr->e_phnum;
838	elf_auxargs->pagesz = PAGE_SIZE;
839	elf_auxargs->base = addr;
840	elf_auxargs->flags = 0;
841	elf_auxargs->entry = entry;
842	elf_auxargs->trace = elf_trace;
843
844	imgp->auxargs = elf_auxargs;
845	imgp->interpreted = 0;
846
847	/*
848	 * Try to fetch the osreldate for FreeBSD binary from the ELF
849	 * OSABI-note. Only the first page of the image is searched,
850	 * the same as for headers.
851	 */
852	if (pnote != NULL && pnote->p_offset < PAGE_SIZE &&
853	    pnote->p_offset + pnote->p_filesz < PAGE_SIZE ) {
854		note = (const Elf_Note *)(imgp->image_header + pnote->p_offset);
855		if (!aligned(note, Elf32_Addr)) {
856			free(imgp->auxargs, M_TEMP);
857			imgp->auxargs = NULL;
858			return (ENOEXEC);
859		}
860		note_end = (const Elf_Note *)(imgp->image_header + pnote->p_offset +
861		    pnote->p_filesz);
862		while (note < note_end) {
863			if (note->n_namesz == sizeof(FREEBSD_ABI_VENDOR) &&
864			    note->n_descsz == sizeof(int32_t) &&
865			    note->n_type == 1 /* ABI_NOTETYPE */) {
866				note_name = (const char *)(note + 1);
867				if (strncmp(FREEBSD_ABI_VENDOR, note_name,
868				    sizeof(FREEBSD_ABI_VENDOR)) == 0) {
869					imgp->proc->p_osrel = *(const int32_t *)
870					    (note_name +
871					    round_page_ps(sizeof(FREEBSD_ABI_VENDOR),
872						sizeof(Elf32_Addr)));
873					break;
874				}
875			}
876			note = (const Elf_Note *)((const char *)(note + 1) +
877			    round_page_ps(note->n_namesz, sizeof(Elf32_Addr)) +
878			    round_page_ps(note->n_descsz, sizeof(Elf32_Addr)));
879		}
880	}
881
882	return (error);
883}
884
885#define	suword __CONCAT(suword, __ELF_WORD_SIZE)
886
887int
888__elfN(freebsd_fixup)(register_t **stack_base, struct image_params *imgp)
889{
890	Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs;
891	Elf_Addr *base;
892	Elf_Addr *pos;
893
894	base = (Elf_Addr *)*stack_base;
895	pos = base + (imgp->args->argc + imgp->args->envc + 2);
896
897	if (args->execfd != -1) {
898		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
899	}
900	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
901	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
902	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
903	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
904	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
905	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
906	AUXARGS_ENTRY(pos, AT_BASE, args->base);
907	AUXARGS_ENTRY(pos, AT_NULL, 0);
908
909	free(imgp->auxargs, M_TEMP);
910	imgp->auxargs = NULL;
911
912	base--;
913	suword(base, (long)imgp->args->argc);
914	*stack_base = (register_t *)base;
915	return (0);
916}
917
918/*
919 * Code for generating ELF core dumps.
920 */
921
922typedef void (*segment_callback)(vm_map_entry_t, void *);
923
924/* Closure for cb_put_phdr(). */
925struct phdr_closure {
926	Elf_Phdr *phdr;		/* Program header to fill in */
927	Elf_Off offset;		/* Offset of segment in core file */
928};
929
930/* Closure for cb_size_segment(). */
931struct sseg_closure {
932	int count;		/* Count of writable segments. */
933	size_t size;		/* Total size of all writable segments. */
934};
935
936static void cb_put_phdr(vm_map_entry_t, void *);
937static void cb_size_segment(vm_map_entry_t, void *);
938static void each_writable_segment(struct thread *, segment_callback, void *);
939static int __elfN(corehdr)(struct thread *, struct vnode *, struct ucred *,
940    int, void *, size_t);
941static void __elfN(puthdr)(struct thread *, void *, size_t *, int);
942static void __elfN(putnote)(void *, size_t *, const char *, int,
943    const void *, size_t);
944
945int
946__elfN(coredump)(td, vp, limit)
947	struct thread *td;
948	struct vnode *vp;
949	off_t limit;
950{
951	struct ucred *cred = td->td_ucred;
952	int error = 0;
953	struct sseg_closure seginfo;
954	void *hdr;
955	size_t hdrsize;
956
957	/* Size the program segments. */
958	seginfo.count = 0;
959	seginfo.size = 0;
960	each_writable_segment(td, cb_size_segment, &seginfo);
961
962	/*
963	 * Calculate the size of the core file header area by making
964	 * a dry run of generating it.  Nothing is written, but the
965	 * size is calculated.
966	 */
967	hdrsize = 0;
968	__elfN(puthdr)(td, (void *)NULL, &hdrsize, seginfo.count);
969
970	if (hdrsize + seginfo.size >= limit)
971		return (EFAULT);
972
973	/*
974	 * Allocate memory for building the header, fill it up,
975	 * and write it out.
976	 */
977	hdr = malloc(hdrsize, M_TEMP, M_WAITOK);
978	if (hdr == NULL) {
979		return (EINVAL);
980	}
981	error = __elfN(corehdr)(td, vp, cred, seginfo.count, hdr, hdrsize);
982
983	/* Write the contents of all of the writable segments. */
984	if (error == 0) {
985		Elf_Phdr *php;
986		off_t offset;
987		int i;
988
989		php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
990		offset = hdrsize;
991		for (i = 0; i < seginfo.count; i++) {
992			error = vn_rdwr_inchunks(UIO_WRITE, vp,
993			    (caddr_t)(uintptr_t)php->p_vaddr,
994			    php->p_filesz, offset, UIO_USERSPACE,
995			    IO_UNIT | IO_DIRECT, cred, NOCRED, NULL,
996			    curthread);
997			if (error != 0)
998				break;
999			offset += php->p_filesz;
1000			php++;
1001		}
1002	}
1003	free(hdr, M_TEMP);
1004
1005	return (error);
1006}
1007
1008/*
1009 * A callback for each_writable_segment() to write out the segment's
1010 * program header entry.
1011 */
1012static void
1013cb_put_phdr(entry, closure)
1014	vm_map_entry_t entry;
1015	void *closure;
1016{
1017	struct phdr_closure *phc = (struct phdr_closure *)closure;
1018	Elf_Phdr *phdr = phc->phdr;
1019
1020	phc->offset = round_page(phc->offset);
1021
1022	phdr->p_type = PT_LOAD;
1023	phdr->p_offset = phc->offset;
1024	phdr->p_vaddr = entry->start;
1025	phdr->p_paddr = 0;
1026	phdr->p_filesz = phdr->p_memsz = entry->end - entry->start;
1027	phdr->p_align = PAGE_SIZE;
1028	phdr->p_flags = 0;
1029	if (entry->protection & VM_PROT_READ)
1030		phdr->p_flags |= PF_R;
1031	if (entry->protection & VM_PROT_WRITE)
1032		phdr->p_flags |= PF_W;
1033	if (entry->protection & VM_PROT_EXECUTE)
1034		phdr->p_flags |= PF_X;
1035
1036	phc->offset += phdr->p_filesz;
1037	phc->phdr++;
1038}
1039
1040/*
1041 * A callback for each_writable_segment() to gather information about
1042 * the number of segments and their total size.
1043 */
1044static void
1045cb_size_segment(entry, closure)
1046	vm_map_entry_t entry;
1047	void *closure;
1048{
1049	struct sseg_closure *ssc = (struct sseg_closure *)closure;
1050
1051	ssc->count++;
1052	ssc->size += entry->end - entry->start;
1053}
1054
1055/*
1056 * For each writable segment in the process's memory map, call the given
1057 * function with a pointer to the map entry and some arbitrary
1058 * caller-supplied data.
1059 */
1060static void
1061each_writable_segment(td, func, closure)
1062	struct thread *td;
1063	segment_callback func;
1064	void *closure;
1065{
1066	struct proc *p = td->td_proc;
1067	vm_map_t map = &p->p_vmspace->vm_map;
1068	vm_map_entry_t entry;
1069	vm_object_t backing_object, object;
1070	boolean_t ignore_entry;
1071
1072	vm_map_lock_read(map);
1073	for (entry = map->header.next; entry != &map->header;
1074	    entry = entry->next) {
1075		/*
1076		 * Don't dump inaccessible mappings, deal with legacy
1077		 * coredump mode.
1078		 *
1079		 * Note that read-only segments related to the elf binary
1080		 * are marked MAP_ENTRY_NOCOREDUMP now so we no longer
1081		 * need to arbitrarily ignore such segments.
1082		 */
1083		if (elf_legacy_coredump) {
1084			if ((entry->protection & VM_PROT_RW) != VM_PROT_RW)
1085				continue;
1086		} else {
1087			if ((entry->protection & VM_PROT_ALL) == 0)
1088				continue;
1089		}
1090
1091		/*
1092		 * Dont include memory segment in the coredump if
1093		 * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in
1094		 * madvise(2).  Do not dump submaps (i.e. parts of the
1095		 * kernel map).
1096		 */
1097		if (entry->eflags & (MAP_ENTRY_NOCOREDUMP|MAP_ENTRY_IS_SUB_MAP))
1098			continue;
1099
1100		if ((object = entry->object.vm_object) == NULL)
1101			continue;
1102
1103		/* Ignore memory-mapped devices and such things. */
1104		VM_OBJECT_LOCK(object);
1105		while ((backing_object = object->backing_object) != NULL) {
1106			VM_OBJECT_LOCK(backing_object);
1107			VM_OBJECT_UNLOCK(object);
1108			object = backing_object;
1109		}
1110		ignore_entry = object->type != OBJT_DEFAULT &&
1111		    object->type != OBJT_SWAP && object->type != OBJT_VNODE;
1112		VM_OBJECT_UNLOCK(object);
1113		if (ignore_entry)
1114			continue;
1115
1116		(*func)(entry, closure);
1117	}
1118	vm_map_unlock_read(map);
1119}
1120
1121/*
1122 * Write the core file header to the file, including padding up to
1123 * the page boundary.
1124 */
1125static int
1126__elfN(corehdr)(td, vp, cred, numsegs, hdr, hdrsize)
1127	struct thread *td;
1128	struct vnode *vp;
1129	struct ucred *cred;
1130	int numsegs;
1131	size_t hdrsize;
1132	void *hdr;
1133{
1134	size_t off;
1135
1136	/* Fill in the header. */
1137	bzero(hdr, hdrsize);
1138	off = 0;
1139	__elfN(puthdr)(td, hdr, &off, numsegs);
1140
1141	/* Write it to the core file. */
1142	return (vn_rdwr_inchunks(UIO_WRITE, vp, hdr, hdrsize, (off_t)0,
1143	    UIO_SYSSPACE, IO_UNIT | IO_DIRECT, cred, NOCRED, NULL,
1144	    td));
1145}
1146
1147#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
1148typedef struct prstatus32 elf_prstatus_t;
1149typedef struct prpsinfo32 elf_prpsinfo_t;
1150typedef struct fpreg32 elf_prfpregset_t;
1151typedef struct fpreg32 elf_fpregset_t;
1152typedef struct reg32 elf_gregset_t;
1153#else
1154typedef prstatus_t elf_prstatus_t;
1155typedef prpsinfo_t elf_prpsinfo_t;
1156typedef prfpregset_t elf_prfpregset_t;
1157typedef prfpregset_t elf_fpregset_t;
1158typedef gregset_t elf_gregset_t;
1159#endif
1160
1161static void
1162__elfN(puthdr)(struct thread *td, void *dst, size_t *off, int numsegs)
1163{
1164	struct {
1165		elf_prstatus_t status;
1166		elf_prfpregset_t fpregset;
1167		elf_prpsinfo_t psinfo;
1168	} *tempdata;
1169	elf_prstatus_t *status;
1170	elf_prfpregset_t *fpregset;
1171	elf_prpsinfo_t *psinfo;
1172	struct proc *p;
1173	struct thread *thr;
1174	size_t ehoff, noteoff, notesz, phoff;
1175
1176	p = td->td_proc;
1177
1178	ehoff = *off;
1179	*off += sizeof(Elf_Ehdr);
1180
1181	phoff = *off;
1182	*off += (numsegs + 1) * sizeof(Elf_Phdr);
1183
1184	noteoff = *off;
1185	/*
1186	 * Don't allocate space for the notes if we're just calculating
1187	 * the size of the header. We also don't collect the data.
1188	 */
1189	if (dst != NULL) {
1190		tempdata = malloc(sizeof(*tempdata), M_TEMP, M_ZERO|M_WAITOK);
1191		status = &tempdata->status;
1192		fpregset = &tempdata->fpregset;
1193		psinfo = &tempdata->psinfo;
1194	} else {
1195		tempdata = NULL;
1196		status = NULL;
1197		fpregset = NULL;
1198		psinfo = NULL;
1199	}
1200
1201	if (dst != NULL) {
1202		psinfo->pr_version = PRPSINFO_VERSION;
1203		psinfo->pr_psinfosz = sizeof(elf_prpsinfo_t);
1204		strlcpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname));
1205		/*
1206		 * XXX - We don't fill in the command line arguments properly
1207		 * yet.
1208		 */
1209		strlcpy(psinfo->pr_psargs, p->p_comm,
1210		    sizeof(psinfo->pr_psargs));
1211	}
1212	__elfN(putnote)(dst, off, "FreeBSD", NT_PRPSINFO, psinfo,
1213	    sizeof *psinfo);
1214
1215	/*
1216	 * To have the debugger select the right thread (LWP) as the initial
1217	 * thread, we dump the state of the thread passed to us in td first.
1218	 * This is the thread that causes the core dump and thus likely to
1219	 * be the right thread one wants to have selected in the debugger.
1220	 */
1221	thr = td;
1222	while (thr != NULL) {
1223		if (dst != NULL) {
1224			status->pr_version = PRSTATUS_VERSION;
1225			status->pr_statussz = sizeof(elf_prstatus_t);
1226			status->pr_gregsetsz = sizeof(elf_gregset_t);
1227			status->pr_fpregsetsz = sizeof(elf_fpregset_t);
1228			status->pr_osreldate = osreldate;
1229			status->pr_cursig = p->p_sig;
1230			status->pr_pid = thr->td_tid;
1231#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
1232			fill_regs32(thr, &status->pr_reg);
1233			fill_fpregs32(thr, fpregset);
1234#else
1235			fill_regs(thr, &status->pr_reg);
1236			fill_fpregs(thr, fpregset);
1237#endif
1238		}
1239		__elfN(putnote)(dst, off, "FreeBSD", NT_PRSTATUS, status,
1240		    sizeof *status);
1241		__elfN(putnote)(dst, off, "FreeBSD", NT_FPREGSET, fpregset,
1242		    sizeof *fpregset);
1243		/*
1244		 * Allow for MD specific notes, as well as any MD
1245		 * specific preparations for writing MI notes.
1246		 */
1247		__elfN(dump_thread)(thr, dst, off);
1248
1249		thr = (thr == td) ? TAILQ_FIRST(&p->p_threads) :
1250		    TAILQ_NEXT(thr, td_plist);
1251		if (thr == td)
1252			thr = TAILQ_NEXT(thr, td_plist);
1253	}
1254
1255	notesz = *off - noteoff;
1256
1257	if (dst != NULL)
1258		free(tempdata, M_TEMP);
1259
1260	/* Align up to a page boundary for the program segments. */
1261	*off = round_page(*off);
1262
1263	if (dst != NULL) {
1264		Elf_Ehdr *ehdr;
1265		Elf_Phdr *phdr;
1266		struct phdr_closure phc;
1267
1268		/*
1269		 * Fill in the ELF header.
1270		 */
1271		ehdr = (Elf_Ehdr *)((char *)dst + ehoff);
1272		ehdr->e_ident[EI_MAG0] = ELFMAG0;
1273		ehdr->e_ident[EI_MAG1] = ELFMAG1;
1274		ehdr->e_ident[EI_MAG2] = ELFMAG2;
1275		ehdr->e_ident[EI_MAG3] = ELFMAG3;
1276		ehdr->e_ident[EI_CLASS] = ELF_CLASS;
1277		ehdr->e_ident[EI_DATA] = ELF_DATA;
1278		ehdr->e_ident[EI_VERSION] = EV_CURRENT;
1279		ehdr->e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
1280		ehdr->e_ident[EI_ABIVERSION] = 0;
1281		ehdr->e_ident[EI_PAD] = 0;
1282		ehdr->e_type = ET_CORE;
1283#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
1284		ehdr->e_machine = EM_386;
1285#else
1286		ehdr->e_machine = ELF_ARCH;
1287#endif
1288		ehdr->e_version = EV_CURRENT;
1289		ehdr->e_entry = 0;
1290		ehdr->e_phoff = phoff;
1291		ehdr->e_flags = 0;
1292		ehdr->e_ehsize = sizeof(Elf_Ehdr);
1293		ehdr->e_phentsize = sizeof(Elf_Phdr);
1294		ehdr->e_phnum = numsegs + 1;
1295		ehdr->e_shentsize = sizeof(Elf_Shdr);
1296		ehdr->e_shnum = 0;
1297		ehdr->e_shstrndx = SHN_UNDEF;
1298
1299		/*
1300		 * Fill in the program header entries.
1301		 */
1302		phdr = (Elf_Phdr *)((char *)dst + phoff);
1303
1304		/* The note segement. */
1305		phdr->p_type = PT_NOTE;
1306		phdr->p_offset = noteoff;
1307		phdr->p_vaddr = 0;
1308		phdr->p_paddr = 0;
1309		phdr->p_filesz = notesz;
1310		phdr->p_memsz = 0;
1311		phdr->p_flags = 0;
1312		phdr->p_align = 0;
1313		phdr++;
1314
1315		/* All the writable segments from the program. */
1316		phc.phdr = phdr;
1317		phc.offset = *off;
1318		each_writable_segment(td, cb_put_phdr, &phc);
1319	}
1320}
1321
1322static void
1323__elfN(putnote)(void *dst, size_t *off, const char *name, int type,
1324    const void *desc, size_t descsz)
1325{
1326	Elf_Note note;
1327
1328	note.n_namesz = strlen(name) + 1;
1329	note.n_descsz = descsz;
1330	note.n_type = type;
1331	if (dst != NULL)
1332		bcopy(&note, (char *)dst + *off, sizeof note);
1333	*off += sizeof note;
1334	if (dst != NULL)
1335		bcopy(name, (char *)dst + *off, note.n_namesz);
1336	*off += roundup2(note.n_namesz, sizeof(Elf_Size));
1337	if (dst != NULL)
1338		bcopy(desc, (char *)dst + *off, note.n_descsz);
1339	*off += roundup2(note.n_descsz, sizeof(Elf_Size));
1340}
1341
1342/*
1343 * Tell kern_execve.c about it, with a little help from the linker.
1344 */
1345static struct execsw __elfN(execsw) = {
1346	__CONCAT(exec_, __elfN(imgact)),
1347	__XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
1348};
1349EXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw));
1350