imgact_elf.c revision 196512
1/*-
2 * Copyright (c) 2000 David O'Brien
3 * Copyright (c) 1995-1996 S�ren Schmidt
4 * Copyright (c) 1996 Peter Wemm
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer
12 *    in this position and unchanged.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 *    derived from this software without specific prior written permission
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: head/sys/kern/imgact_elf.c 196512 2009-08-24 16:19:47Z bz $");
33
34#include "opt_compat.h"
35
36#include <sys/param.h>
37#include <sys/exec.h>
38#include <sys/fcntl.h>
39#include <sys/imgact.h>
40#include <sys/imgact_elf.h>
41#include <sys/kernel.h>
42#include <sys/lock.h>
43#include <sys/malloc.h>
44#include <sys/mount.h>
45#include <sys/mutex.h>
46#include <sys/mman.h>
47#include <sys/namei.h>
48#include <sys/pioctl.h>
49#include <sys/proc.h>
50#include <sys/procfs.h>
51#include <sys/resourcevar.h>
52#include <sys/sf_buf.h>
53#include <sys/systm.h>
54#include <sys/signalvar.h>
55#include <sys/stat.h>
56#include <sys/sx.h>
57#include <sys/syscall.h>
58#include <sys/sysctl.h>
59#include <sys/sysent.h>
60#include <sys/vnode.h>
61
62#include <vm/vm.h>
63#include <vm/vm_kern.h>
64#include <vm/vm_param.h>
65#include <vm/pmap.h>
66#include <vm/vm_map.h>
67#include <vm/vm_object.h>
68#include <vm/vm_extern.h>
69
70#include <machine/elf.h>
71#include <machine/md_var.h>
72
73#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
74#include <machine/fpu.h>
75#include <compat/ia32/ia32_reg.h>
76#endif
77
78#define OLD_EI_BRAND	8
79
80static int __elfN(check_header)(const Elf_Ehdr *hdr);
81static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp,
82    const char *interp, int32_t *osrel);
83static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
84    u_long *entry, size_t pagesize);
85static int __elfN(load_section)(struct vmspace *vmspace, vm_object_t object,
86    vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz,
87    vm_prot_t prot, size_t pagesize);
88static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp);
89static boolean_t __elfN(freebsd_trans_osrel)(const Elf_Note *note,
90    int32_t *osrel);
91static boolean_t kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel);
92static boolean_t __elfN(check_note)(struct image_params *imgp,
93    Elf_Brandnote *checknote, int32_t *osrel);
94
95SYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE), CTLFLAG_RW, 0,
96    "");
97
98int __elfN(fallback_brand) = -1;
99SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
100    fallback_brand, CTLFLAG_RW, &__elfN(fallback_brand), 0,
101    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort");
102TUNABLE_INT("kern.elf" __XSTRING(__ELF_WORD_SIZE) ".fallback_brand",
103    &__elfN(fallback_brand));
104
105static int elf_legacy_coredump = 0;
106SYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW,
107    &elf_legacy_coredump, 0, "");
108
109static Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
110
111#define	trunc_page_ps(va, ps)	((va) & ~(ps - 1))
112#define	round_page_ps(va, ps)	(((va) + (ps - 1)) & ~(ps - 1))
113#define	aligned(a, t)	(trunc_page_ps((u_long)(a), sizeof(t)) == (u_long)(a))
114
115static const char FREEBSD_ABI_VENDOR[] = "FreeBSD";
116
117Elf_Brandnote __elfN(freebsd_brandnote) = {
118	.hdr.n_namesz	= sizeof(FREEBSD_ABI_VENDOR),
119	.hdr.n_descsz	= sizeof(int32_t),
120	.hdr.n_type	= 1,
121	.vendor		= FREEBSD_ABI_VENDOR,
122	.flags		= BN_TRANSLATE_OSREL,
123	.trans_osrel	= __elfN(freebsd_trans_osrel)
124};
125
126static boolean_t
127__elfN(freebsd_trans_osrel)(const Elf_Note *note, int32_t *osrel)
128{
129	uintptr_t p;
130
131	p = (uintptr_t)(note + 1);
132	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
133	*osrel = *(const int32_t *)(p);
134
135	return (TRUE);
136}
137
138static const char GNU_ABI_VENDOR[] = "GNU";
139static int GNU_KFREEBSD_ABI_DESC = 3;
140
141Elf_Brandnote __elfN(kfreebsd_brandnote) = {
142	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
143	.hdr.n_descsz	= 16,	/* XXX at least 16 */
144	.hdr.n_type	= 1,
145	.vendor		= GNU_ABI_VENDOR,
146	.flags		= BN_TRANSLATE_OSREL,
147	.trans_osrel	= kfreebsd_trans_osrel
148};
149
150static boolean_t
151kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel)
152{
153	const Elf32_Word *desc;
154	uintptr_t p;
155
156	p = (uintptr_t)(note + 1);
157	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
158
159	desc = (const Elf32_Word *)p;
160	if (desc[0] != GNU_KFREEBSD_ABI_DESC)
161		return (FALSE);
162
163	/*
164	 * Debian GNU/kFreeBSD embed the earliest compatible kernel version
165	 * (__FreeBSD_version: <major><two digit minor>Rxx) in the LSB way.
166	 */
167	*osrel = desc[1] * 100000 + desc[2] * 1000 + desc[3];
168
169	return (TRUE);
170}
171
172int
173__elfN(insert_brand_entry)(Elf_Brandinfo *entry)
174{
175	int i;
176
177	for (i = 0; i < MAX_BRANDS; i++) {
178		if (elf_brand_list[i] == NULL) {
179			elf_brand_list[i] = entry;
180			break;
181		}
182	}
183	if (i == MAX_BRANDS)
184		return (-1);
185	return (0);
186}
187
188int
189__elfN(remove_brand_entry)(Elf_Brandinfo *entry)
190{
191	int i;
192
193	for (i = 0; i < MAX_BRANDS; i++) {
194		if (elf_brand_list[i] == entry) {
195			elf_brand_list[i] = NULL;
196			break;
197		}
198	}
199	if (i == MAX_BRANDS)
200		return (-1);
201	return (0);
202}
203
204int
205__elfN(brand_inuse)(Elf_Brandinfo *entry)
206{
207	struct proc *p;
208	int rval = FALSE;
209
210	sx_slock(&allproc_lock);
211	FOREACH_PROC_IN_SYSTEM(p) {
212		if (p->p_sysent == entry->sysvec) {
213			rval = TRUE;
214			break;
215		}
216	}
217	sx_sunlock(&allproc_lock);
218
219	return (rval);
220}
221
222static Elf_Brandinfo *
223__elfN(get_brandinfo)(struct image_params *imgp, const char *interp,
224    int32_t *osrel)
225{
226	const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
227	Elf_Brandinfo *bi;
228	boolean_t ret;
229	int i;
230
231	/*
232	 * We support four types of branding -- (1) the ELF EI_OSABI field
233	 * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string
234	 * branding w/in the ELF header, (3) path of the `interp_path'
235	 * field, and (4) the ".note.ABI-tag" ELF section.
236	 */
237
238	/* Look for an ".note.ABI-tag" ELF section */
239	for (i = 0; i < MAX_BRANDS; i++) {
240		bi = elf_brand_list[i];
241		if (bi != NULL && hdr->e_machine == bi->machine &&
242		    (bi->flags & BI_BRAND_NOTE) != 0) {
243			ret = __elfN(check_note)(imgp, bi->brand_note, osrel);
244			if (ret)
245				return (bi);
246		}
247	}
248
249	/* If the executable has a brand, search for it in the brand list. */
250	for (i = 0; i < MAX_BRANDS; i++) {
251		bi = elf_brand_list[i];
252		if (bi != NULL && hdr->e_machine == bi->machine &&
253		    (hdr->e_ident[EI_OSABI] == bi->brand ||
254		    strncmp((const char *)&hdr->e_ident[OLD_EI_BRAND],
255		    bi->compat_3_brand, strlen(bi->compat_3_brand)) == 0))
256			return (bi);
257	}
258
259	/* Lacking a known brand, search for a recognized interpreter. */
260	if (interp != NULL) {
261		for (i = 0; i < MAX_BRANDS; i++) {
262			bi = elf_brand_list[i];
263			if (bi != NULL && hdr->e_machine == bi->machine &&
264			    strcmp(interp, bi->interp_path) == 0)
265				return (bi);
266		}
267	}
268
269	/* Lacking a recognized interpreter, try the default brand */
270	for (i = 0; i < MAX_BRANDS; i++) {
271		bi = elf_brand_list[i];
272		if (bi != NULL && hdr->e_machine == bi->machine &&
273		    __elfN(fallback_brand) == bi->brand)
274			return (bi);
275	}
276	return (NULL);
277}
278
279static int
280__elfN(check_header)(const Elf_Ehdr *hdr)
281{
282	Elf_Brandinfo *bi;
283	int i;
284
285	if (!IS_ELF(*hdr) ||
286	    hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
287	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
288	    hdr->e_ident[EI_VERSION] != EV_CURRENT ||
289	    hdr->e_phentsize != sizeof(Elf_Phdr) ||
290	    hdr->e_version != ELF_TARG_VER)
291		return (ENOEXEC);
292
293	/*
294	 * Make sure we have at least one brand for this machine.
295	 */
296
297	for (i = 0; i < MAX_BRANDS; i++) {
298		bi = elf_brand_list[i];
299		if (bi != NULL && bi->machine == hdr->e_machine)
300			break;
301	}
302	if (i == MAX_BRANDS)
303		return (ENOEXEC);
304
305	return (0);
306}
307
308static int
309__elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
310    vm_offset_t start, vm_offset_t end, vm_prot_t prot)
311{
312	struct sf_buf *sf;
313	int error;
314	vm_offset_t off;
315
316	/*
317	 * Create the page if it doesn't exist yet. Ignore errors.
318	 */
319	vm_map_lock(map);
320	vm_map_insert(map, NULL, 0, trunc_page(start), round_page(end),
321	    VM_PROT_ALL, VM_PROT_ALL, 0);
322	vm_map_unlock(map);
323
324	/*
325	 * Find the page from the underlying object.
326	 */
327	if (object) {
328		sf = vm_imgact_map_page(object, offset);
329		if (sf == NULL)
330			return (KERN_FAILURE);
331		off = offset - trunc_page(offset);
332		error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start,
333		    end - start);
334		vm_imgact_unmap_page(sf);
335		if (error) {
336			return (KERN_FAILURE);
337		}
338	}
339
340	return (KERN_SUCCESS);
341}
342
343static int
344__elfN(map_insert)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
345    vm_offset_t start, vm_offset_t end, vm_prot_t prot, int cow)
346{
347	struct sf_buf *sf;
348	vm_offset_t off;
349	vm_size_t sz;
350	int error, rv;
351
352	if (start != trunc_page(start)) {
353		rv = __elfN(map_partial)(map, object, offset, start,
354		    round_page(start), prot);
355		if (rv)
356			return (rv);
357		offset += round_page(start) - start;
358		start = round_page(start);
359	}
360	if (end != round_page(end)) {
361		rv = __elfN(map_partial)(map, object, offset +
362		    trunc_page(end) - start, trunc_page(end), end, prot);
363		if (rv)
364			return (rv);
365		end = trunc_page(end);
366	}
367	if (end > start) {
368		if (offset & PAGE_MASK) {
369			/*
370			 * The mapping is not page aligned. This means we have
371			 * to copy the data. Sigh.
372			 */
373			rv = vm_map_find(map, NULL, 0, &start, end - start,
374			    FALSE, prot | VM_PROT_WRITE, VM_PROT_ALL, 0);
375			if (rv)
376				return (rv);
377			if (object == NULL)
378				return (KERN_SUCCESS);
379			for (; start < end; start += sz) {
380				sf = vm_imgact_map_page(object, offset);
381				if (sf == NULL)
382					return (KERN_FAILURE);
383				off = offset - trunc_page(offset);
384				sz = end - start;
385				if (sz > PAGE_SIZE - off)
386					sz = PAGE_SIZE - off;
387				error = copyout((caddr_t)sf_buf_kva(sf) + off,
388				    (caddr_t)start, sz);
389				vm_imgact_unmap_page(sf);
390				if (error) {
391					return (KERN_FAILURE);
392				}
393				offset += sz;
394			}
395			rv = KERN_SUCCESS;
396		} else {
397			vm_object_reference(object);
398			vm_map_lock(map);
399			rv = vm_map_insert(map, object, offset, start, end,
400			    prot, VM_PROT_ALL, cow);
401			vm_map_unlock(map);
402			if (rv != KERN_SUCCESS)
403				vm_object_deallocate(object);
404		}
405		return (rv);
406	} else {
407		return (KERN_SUCCESS);
408	}
409}
410
411static int
412__elfN(load_section)(struct vmspace *vmspace,
413	vm_object_t object, vm_offset_t offset,
414	caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot,
415	size_t pagesize)
416{
417	struct sf_buf *sf;
418	size_t map_len;
419	vm_offset_t map_addr;
420	int error, rv, cow;
421	size_t copy_len;
422	vm_offset_t file_addr;
423
424	/*
425	 * It's necessary to fail if the filsz + offset taken from the
426	 * header is greater than the actual file pager object's size.
427	 * If we were to allow this, then the vm_map_find() below would
428	 * walk right off the end of the file object and into the ether.
429	 *
430	 * While I'm here, might as well check for something else that
431	 * is invalid: filsz cannot be greater than memsz.
432	 */
433	if ((off_t)filsz + offset > object->un_pager.vnp.vnp_size ||
434	    filsz > memsz) {
435		uprintf("elf_load_section: truncated ELF file\n");
436		return (ENOEXEC);
437	}
438
439	map_addr = trunc_page_ps((vm_offset_t)vmaddr, pagesize);
440	file_addr = trunc_page_ps(offset, pagesize);
441
442	/*
443	 * We have two choices.  We can either clear the data in the last page
444	 * of an oversized mapping, or we can start the anon mapping a page
445	 * early and copy the initialized data into that first page.  We
446	 * choose the second..
447	 */
448	if (memsz > filsz)
449		map_len = trunc_page_ps(offset + filsz, pagesize) - file_addr;
450	else
451		map_len = round_page_ps(offset + filsz, pagesize) - file_addr;
452
453	if (map_len != 0) {
454		/* cow flags: don't dump readonly sections in core */
455		cow = MAP_COPY_ON_WRITE | MAP_PREFAULT |
456		    (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP);
457
458		rv = __elfN(map_insert)(&vmspace->vm_map,
459				      object,
460				      file_addr,	/* file offset */
461				      map_addr,		/* virtual start */
462				      map_addr + map_len,/* virtual end */
463				      prot,
464				      cow);
465		if (rv != KERN_SUCCESS)
466			return (EINVAL);
467
468		/* we can stop now if we've covered it all */
469		if (memsz == filsz) {
470			return (0);
471		}
472	}
473
474
475	/*
476	 * We have to get the remaining bit of the file into the first part
477	 * of the oversized map segment.  This is normally because the .data
478	 * segment in the file is extended to provide bss.  It's a neat idea
479	 * to try and save a page, but it's a pain in the behind to implement.
480	 */
481	copy_len = (offset + filsz) - trunc_page_ps(offset + filsz, pagesize);
482	map_addr = trunc_page_ps((vm_offset_t)vmaddr + filsz, pagesize);
483	map_len = round_page_ps((vm_offset_t)vmaddr + memsz, pagesize) -
484	    map_addr;
485
486	/* This had damn well better be true! */
487	if (map_len != 0) {
488		rv = __elfN(map_insert)(&vmspace->vm_map, NULL, 0, map_addr,
489		    map_addr + map_len, VM_PROT_ALL, 0);
490		if (rv != KERN_SUCCESS) {
491			return (EINVAL);
492		}
493	}
494
495	if (copy_len != 0) {
496		vm_offset_t off;
497
498		sf = vm_imgact_map_page(object, offset + filsz);
499		if (sf == NULL)
500			return (EIO);
501
502		/* send the page fragment to user space */
503		off = trunc_page_ps(offset + filsz, pagesize) -
504		    trunc_page(offset + filsz);
505		error = copyout((caddr_t)sf_buf_kva(sf) + off,
506		    (caddr_t)map_addr, copy_len);
507		vm_imgact_unmap_page(sf);
508		if (error) {
509			return (error);
510		}
511	}
512
513	/*
514	 * set it to the specified protection.
515	 * XXX had better undo the damage from pasting over the cracks here!
516	 */
517	vm_map_protect(&vmspace->vm_map, trunc_page(map_addr),
518	    round_page(map_addr + map_len),  prot, FALSE);
519
520	return (0);
521}
522
523/*
524 * Load the file "file" into memory.  It may be either a shared object
525 * or an executable.
526 *
527 * The "addr" reference parameter is in/out.  On entry, it specifies
528 * the address where a shared object should be loaded.  If the file is
529 * an executable, this value is ignored.  On exit, "addr" specifies
530 * where the file was actually loaded.
531 *
532 * The "entry" reference parameter is out only.  On exit, it specifies
533 * the entry point for the loaded file.
534 */
535static int
536__elfN(load_file)(struct proc *p, const char *file, u_long *addr,
537	u_long *entry, size_t pagesize)
538{
539	struct {
540		struct nameidata nd;
541		struct vattr attr;
542		struct image_params image_params;
543	} *tempdata;
544	const Elf_Ehdr *hdr = NULL;
545	const Elf_Phdr *phdr = NULL;
546	struct nameidata *nd;
547	struct vmspace *vmspace = p->p_vmspace;
548	struct vattr *attr;
549	struct image_params *imgp;
550	vm_prot_t prot;
551	u_long rbase;
552	u_long base_addr = 0;
553	int vfslocked, error, i, numsegs;
554
555	tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK);
556	nd = &tempdata->nd;
557	attr = &tempdata->attr;
558	imgp = &tempdata->image_params;
559
560	/*
561	 * Initialize part of the common data
562	 */
563	imgp->proc = p;
564	imgp->attr = attr;
565	imgp->firstpage = NULL;
566	imgp->image_header = NULL;
567	imgp->object = NULL;
568	imgp->execlabel = NULL;
569
570	NDINIT(nd, LOOKUP, MPSAFE|LOCKLEAF|FOLLOW, UIO_SYSSPACE, file,
571	    curthread);
572	vfslocked = 0;
573	if ((error = namei(nd)) != 0) {
574		nd->ni_vp = NULL;
575		goto fail;
576	}
577	vfslocked = NDHASGIANT(nd);
578	NDFREE(nd, NDF_ONLY_PNBUF);
579	imgp->vp = nd->ni_vp;
580
581	/*
582	 * Check permissions, modes, uid, etc on the file, and "open" it.
583	 */
584	error = exec_check_permissions(imgp);
585	if (error)
586		goto fail;
587
588	error = exec_map_first_page(imgp);
589	if (error)
590		goto fail;
591
592	/*
593	 * Also make certain that the interpreter stays the same, so set
594	 * its VV_TEXT flag, too.
595	 */
596	nd->ni_vp->v_vflag |= VV_TEXT;
597
598	imgp->object = nd->ni_vp->v_object;
599
600	hdr = (const Elf_Ehdr *)imgp->image_header;
601	if ((error = __elfN(check_header)(hdr)) != 0)
602		goto fail;
603	if (hdr->e_type == ET_DYN)
604		rbase = *addr;
605	else if (hdr->e_type == ET_EXEC)
606		rbase = 0;
607	else {
608		error = ENOEXEC;
609		goto fail;
610	}
611
612	/* Only support headers that fit within first page for now      */
613	/*    (multiplication of two Elf_Half fields will not overflow) */
614	if ((hdr->e_phoff > PAGE_SIZE) ||
615	    (hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE - hdr->e_phoff) {
616		error = ENOEXEC;
617		goto fail;
618	}
619
620	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
621	if (!aligned(phdr, Elf_Addr)) {
622		error = ENOEXEC;
623		goto fail;
624	}
625
626	for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) {
627		if (phdr[i].p_type == PT_LOAD) {	/* Loadable segment */
628			prot = 0;
629			if (phdr[i].p_flags & PF_X)
630  				prot |= VM_PROT_EXECUTE;
631			if (phdr[i].p_flags & PF_W)
632  				prot |= VM_PROT_WRITE;
633			if (phdr[i].p_flags & PF_R)
634  				prot |= VM_PROT_READ;
635
636			if ((error = __elfN(load_section)(vmspace,
637			    imgp->object, phdr[i].p_offset,
638			    (caddr_t)(uintptr_t)phdr[i].p_vaddr + rbase,
639			    phdr[i].p_memsz, phdr[i].p_filesz, prot,
640			    pagesize)) != 0)
641				goto fail;
642			/*
643			 * Establish the base address if this is the
644			 * first segment.
645			 */
646			if (numsegs == 0)
647  				base_addr = trunc_page(phdr[i].p_vaddr +
648				    rbase);
649			numsegs++;
650		}
651	}
652	*addr = base_addr;
653	*entry = (unsigned long)hdr->e_entry + rbase;
654
655fail:
656	if (imgp->firstpage)
657		exec_unmap_first_page(imgp);
658
659	if (nd->ni_vp)
660		vput(nd->ni_vp);
661
662	VFS_UNLOCK_GIANT(vfslocked);
663	free(tempdata, M_TEMP);
664
665	return (error);
666}
667
668static int
669__CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
670{
671	const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
672	const Elf_Phdr *phdr;
673	Elf_Auxargs *elf_auxargs;
674	struct vmspace *vmspace;
675	vm_prot_t prot;
676	u_long text_size = 0, data_size = 0, total_size = 0;
677	u_long text_addr = 0, data_addr = 0;
678	u_long seg_size, seg_addr;
679	u_long addr, entry = 0, proghdr = 0;
680	int32_t osrel = 0;
681	int error = 0, i;
682	const char *interp = NULL, *newinterp = NULL;
683	Elf_Brandinfo *brand_info;
684	char *path;
685	struct sysentvec *sv;
686
687	/*
688	 * Do we have a valid ELF header ?
689	 *
690	 * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later
691	 * if particular brand doesn't support it.
692	 */
693	if (__elfN(check_header)(hdr) != 0 ||
694	    (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN))
695		return (-1);
696
697	/*
698	 * From here on down, we return an errno, not -1, as we've
699	 * detected an ELF file.
700	 */
701
702	if ((hdr->e_phoff > PAGE_SIZE) ||
703	    (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) {
704		/* Only support headers in first page for now */
705		return (ENOEXEC);
706	}
707	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
708	if (!aligned(phdr, Elf_Addr))
709		return (ENOEXEC);
710	for (i = 0; i < hdr->e_phnum; i++) {
711		if (phdr[i].p_type == PT_INTERP) {
712			/* Path to interpreter */
713			if (phdr[i].p_filesz > MAXPATHLEN ||
714			    phdr[i].p_offset + phdr[i].p_filesz > PAGE_SIZE)
715				return (ENOEXEC);
716			interp = imgp->image_header + phdr[i].p_offset;
717			break;
718		}
719	}
720
721	brand_info = __elfN(get_brandinfo)(imgp, interp, &osrel);
722	if (brand_info == NULL) {
723		uprintf("ELF binary type \"%u\" not known.\n",
724		    hdr->e_ident[EI_OSABI]);
725		return (ENOEXEC);
726	}
727	if (hdr->e_type == ET_DYN &&
728	    (brand_info->flags & BI_CAN_EXEC_DYN) == 0)
729		return (ENOEXEC);
730	sv = brand_info->sysvec;
731	if (interp != NULL && brand_info->interp_newpath != NULL)
732		newinterp = brand_info->interp_newpath;
733
734	/*
735	 * Avoid a possible deadlock if the current address space is destroyed
736	 * and that address space maps the locked vnode.  In the common case,
737	 * the locked vnode's v_usecount is decremented but remains greater
738	 * than zero.  Consequently, the vnode lock is not needed by vrele().
739	 * However, in cases where the vnode lock is external, such as nullfs,
740	 * v_usecount may become zero.
741	 */
742	VOP_UNLOCK(imgp->vp, 0);
743
744	error = exec_new_vmspace(imgp, sv);
745	imgp->proc->p_sysent = sv;
746
747	vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
748	if (error)
749		return (error);
750
751	vmspace = imgp->proc->p_vmspace;
752
753	for (i = 0; i < hdr->e_phnum; i++) {
754		switch (phdr[i].p_type) {
755		case PT_LOAD:	/* Loadable segment */
756			prot = 0;
757			if (phdr[i].p_flags & PF_X)
758  				prot |= VM_PROT_EXECUTE;
759			if (phdr[i].p_flags & PF_W)
760  				prot |= VM_PROT_WRITE;
761			if (phdr[i].p_flags & PF_R)
762  				prot |= VM_PROT_READ;
763
764#if defined(__ia64__) && __ELF_WORD_SIZE == 32 && defined(IA32_ME_HARDER)
765			/*
766			 * Some x86 binaries assume read == executable,
767			 * notably the M3 runtime and therefore cvsup
768			 */
769			if (prot & VM_PROT_READ)
770				prot |= VM_PROT_EXECUTE;
771#endif
772
773			if ((error = __elfN(load_section)(vmspace,
774			    imgp->object, phdr[i].p_offset,
775			    (caddr_t)(uintptr_t)phdr[i].p_vaddr,
776			    phdr[i].p_memsz, phdr[i].p_filesz, prot,
777			    sv->sv_pagesize)) != 0)
778				return (error);
779
780			/*
781			 * If this segment contains the program headers,
782			 * remember their virtual address for the AT_PHDR
783			 * aux entry. Static binaries don't usually include
784			 * a PT_PHDR entry.
785			 */
786			if (phdr[i].p_offset == 0 &&
787			    hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize
788				<= phdr[i].p_filesz)
789				proghdr = phdr[i].p_vaddr + hdr->e_phoff;
790
791			seg_addr = trunc_page(phdr[i].p_vaddr);
792			seg_size = round_page(phdr[i].p_memsz +
793			    phdr[i].p_vaddr - seg_addr);
794
795			/*
796			 * Is this .text or .data?  We can't use
797			 * VM_PROT_WRITE or VM_PROT_EXEC, it breaks the
798			 * alpha terribly and possibly does other bad
799			 * things so we stick to the old way of figuring
800			 * it out:  If the segment contains the program
801			 * entry point, it's a text segment, otherwise it
802			 * is a data segment.
803			 *
804			 * Note that obreak() assumes that data_addr +
805			 * data_size == end of data load area, and the ELF
806			 * file format expects segments to be sorted by
807			 * address.  If multiple data segments exist, the
808			 * last one will be used.
809			 */
810			if (hdr->e_entry >= phdr[i].p_vaddr &&
811			    hdr->e_entry < (phdr[i].p_vaddr +
812			    phdr[i].p_memsz)) {
813				text_size = seg_size;
814				text_addr = seg_addr;
815				entry = (u_long)hdr->e_entry;
816			} else {
817				data_size = seg_size;
818				data_addr = seg_addr;
819			}
820			total_size += seg_size;
821			break;
822		case PT_PHDR: 	/* Program header table info */
823			proghdr = phdr[i].p_vaddr;
824			break;
825		default:
826			break;
827		}
828	}
829
830	if (data_addr == 0 && data_size == 0) {
831		data_addr = text_addr;
832		data_size = text_size;
833	}
834
835	/*
836	 * Check limits.  It should be safe to check the
837	 * limits after loading the segments since we do
838	 * not actually fault in all the segments pages.
839	 */
840	PROC_LOCK(imgp->proc);
841	if (data_size > lim_cur(imgp->proc, RLIMIT_DATA) ||
842	    text_size > maxtsiz ||
843	    total_size > lim_cur(imgp->proc, RLIMIT_VMEM)) {
844		PROC_UNLOCK(imgp->proc);
845		return (ENOMEM);
846	}
847
848	vmspace->vm_tsize = text_size >> PAGE_SHIFT;
849	vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
850	vmspace->vm_dsize = data_size >> PAGE_SHIFT;
851	vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr;
852
853	/*
854	 * We load the dynamic linker where a userland call
855	 * to mmap(0, ...) would put it.  The rationale behind this
856	 * calculation is that it leaves room for the heap to grow to
857	 * its maximum allowed size.
858	 */
859	addr = round_page((vm_offset_t)imgp->proc->p_vmspace->vm_daddr +
860	    lim_max(imgp->proc, RLIMIT_DATA));
861	PROC_UNLOCK(imgp->proc);
862
863	imgp->entry_addr = entry;
864
865	if (interp != NULL) {
866		int have_interp = FALSE;
867		VOP_UNLOCK(imgp->vp, 0);
868		if (brand_info->emul_path != NULL &&
869		    brand_info->emul_path[0] != '\0') {
870			path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
871			snprintf(path, MAXPATHLEN, "%s%s",
872			    brand_info->emul_path, interp);
873			error = __elfN(load_file)(imgp->proc, path, &addr,
874			    &imgp->entry_addr, sv->sv_pagesize);
875			free(path, M_TEMP);
876			if (error == 0)
877				have_interp = TRUE;
878		}
879		if (!have_interp && newinterp != NULL) {
880			error = __elfN(load_file)(imgp->proc, newinterp, &addr,
881			    &imgp->entry_addr, sv->sv_pagesize);
882			if (error == 0)
883				have_interp = TRUE;
884		}
885		if (!have_interp) {
886			error = __elfN(load_file)(imgp->proc, interp, &addr,
887			    &imgp->entry_addr, sv->sv_pagesize);
888		}
889		vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
890		if (error != 0) {
891			uprintf("ELF interpreter %s not found\n", interp);
892			return (error);
893		}
894	} else
895		addr = 0;
896
897	/*
898	 * Construct auxargs table (used by the fixup routine)
899	 */
900	elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK);
901	elf_auxargs->execfd = -1;
902	elf_auxargs->phdr = proghdr;
903	elf_auxargs->phent = hdr->e_phentsize;
904	elf_auxargs->phnum = hdr->e_phnum;
905	elf_auxargs->pagesz = PAGE_SIZE;
906	elf_auxargs->base = addr;
907	elf_auxargs->flags = 0;
908	elf_auxargs->entry = entry;
909
910	imgp->auxargs = elf_auxargs;
911	imgp->interpreted = 0;
912	imgp->proc->p_osrel = osrel;
913
914	return (error);
915}
916
917#define	suword __CONCAT(suword, __ELF_WORD_SIZE)
918
919int
920__elfN(freebsd_fixup)(register_t **stack_base, struct image_params *imgp)
921{
922	Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs;
923	Elf_Addr *base;
924	Elf_Addr *pos;
925
926	base = (Elf_Addr *)*stack_base;
927	pos = base + (imgp->args->argc + imgp->args->envc + 2);
928
929	if (args->execfd != -1)
930		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
931	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
932	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
933	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
934	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
935	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
936	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
937	AUXARGS_ENTRY(pos, AT_BASE, args->base);
938	if (imgp->execpathp != 0)
939		AUXARGS_ENTRY(pos, AT_EXECPATH, imgp->execpathp);
940	AUXARGS_ENTRY(pos, AT_NULL, 0);
941
942	free(imgp->auxargs, M_TEMP);
943	imgp->auxargs = NULL;
944
945	base--;
946	suword(base, (long)imgp->args->argc);
947	*stack_base = (register_t *)base;
948	return (0);
949}
950
951/*
952 * Code for generating ELF core dumps.
953 */
954
955typedef void (*segment_callback)(vm_map_entry_t, void *);
956
957/* Closure for cb_put_phdr(). */
958struct phdr_closure {
959	Elf_Phdr *phdr;		/* Program header to fill in */
960	Elf_Off offset;		/* Offset of segment in core file */
961};
962
963/* Closure for cb_size_segment(). */
964struct sseg_closure {
965	int count;		/* Count of writable segments. */
966	size_t size;		/* Total size of all writable segments. */
967};
968
969static void cb_put_phdr(vm_map_entry_t, void *);
970static void cb_size_segment(vm_map_entry_t, void *);
971static void each_writable_segment(struct thread *, segment_callback, void *);
972static int __elfN(corehdr)(struct thread *, struct vnode *, struct ucred *,
973    int, void *, size_t);
974static void __elfN(puthdr)(struct thread *, void *, size_t *, int);
975static void __elfN(putnote)(void *, size_t *, const char *, int,
976    const void *, size_t);
977
978int
979__elfN(coredump)(td, vp, limit)
980	struct thread *td;
981	struct vnode *vp;
982	off_t limit;
983{
984	struct ucred *cred = td->td_ucred;
985	int error = 0;
986	struct sseg_closure seginfo;
987	void *hdr;
988	size_t hdrsize;
989
990	/* Size the program segments. */
991	seginfo.count = 0;
992	seginfo.size = 0;
993	each_writable_segment(td, cb_size_segment, &seginfo);
994
995	/*
996	 * Calculate the size of the core file header area by making
997	 * a dry run of generating it.  Nothing is written, but the
998	 * size is calculated.
999	 */
1000	hdrsize = 0;
1001	__elfN(puthdr)(td, (void *)NULL, &hdrsize, seginfo.count);
1002
1003	if (hdrsize + seginfo.size >= limit)
1004		return (EFAULT);
1005
1006	/*
1007	 * Allocate memory for building the header, fill it up,
1008	 * and write it out.
1009	 */
1010	hdr = malloc(hdrsize, M_TEMP, M_WAITOK);
1011	if (hdr == NULL) {
1012		return (EINVAL);
1013	}
1014	error = __elfN(corehdr)(td, vp, cred, seginfo.count, hdr, hdrsize);
1015
1016	/* Write the contents of all of the writable segments. */
1017	if (error == 0) {
1018		Elf_Phdr *php;
1019		off_t offset;
1020		int i;
1021
1022		php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
1023		offset = hdrsize;
1024		for (i = 0; i < seginfo.count; i++) {
1025			error = vn_rdwr_inchunks(UIO_WRITE, vp,
1026			    (caddr_t)(uintptr_t)php->p_vaddr,
1027			    php->p_filesz, offset, UIO_USERSPACE,
1028			    IO_UNIT | IO_DIRECT, cred, NOCRED, NULL,
1029			    curthread);
1030			if (error != 0)
1031				break;
1032			offset += php->p_filesz;
1033			php++;
1034		}
1035	}
1036	free(hdr, M_TEMP);
1037
1038	return (error);
1039}
1040
1041/*
1042 * A callback for each_writable_segment() to write out the segment's
1043 * program header entry.
1044 */
1045static void
1046cb_put_phdr(entry, closure)
1047	vm_map_entry_t entry;
1048	void *closure;
1049{
1050	struct phdr_closure *phc = (struct phdr_closure *)closure;
1051	Elf_Phdr *phdr = phc->phdr;
1052
1053	phc->offset = round_page(phc->offset);
1054
1055	phdr->p_type = PT_LOAD;
1056	phdr->p_offset = phc->offset;
1057	phdr->p_vaddr = entry->start;
1058	phdr->p_paddr = 0;
1059	phdr->p_filesz = phdr->p_memsz = entry->end - entry->start;
1060	phdr->p_align = PAGE_SIZE;
1061	phdr->p_flags = 0;
1062	if (entry->protection & VM_PROT_READ)
1063		phdr->p_flags |= PF_R;
1064	if (entry->protection & VM_PROT_WRITE)
1065		phdr->p_flags |= PF_W;
1066	if (entry->protection & VM_PROT_EXECUTE)
1067		phdr->p_flags |= PF_X;
1068
1069	phc->offset += phdr->p_filesz;
1070	phc->phdr++;
1071}
1072
1073/*
1074 * A callback for each_writable_segment() to gather information about
1075 * the number of segments and their total size.
1076 */
1077static void
1078cb_size_segment(entry, closure)
1079	vm_map_entry_t entry;
1080	void *closure;
1081{
1082	struct sseg_closure *ssc = (struct sseg_closure *)closure;
1083
1084	ssc->count++;
1085	ssc->size += entry->end - entry->start;
1086}
1087
1088/*
1089 * For each writable segment in the process's memory map, call the given
1090 * function with a pointer to the map entry and some arbitrary
1091 * caller-supplied data.
1092 */
1093static void
1094each_writable_segment(td, func, closure)
1095	struct thread *td;
1096	segment_callback func;
1097	void *closure;
1098{
1099	struct proc *p = td->td_proc;
1100	vm_map_t map = &p->p_vmspace->vm_map;
1101	vm_map_entry_t entry;
1102	vm_object_t backing_object, object;
1103	boolean_t ignore_entry;
1104
1105	vm_map_lock_read(map);
1106	for (entry = map->header.next; entry != &map->header;
1107	    entry = entry->next) {
1108		/*
1109		 * Don't dump inaccessible mappings, deal with legacy
1110		 * coredump mode.
1111		 *
1112		 * Note that read-only segments related to the elf binary
1113		 * are marked MAP_ENTRY_NOCOREDUMP now so we no longer
1114		 * need to arbitrarily ignore such segments.
1115		 */
1116		if (elf_legacy_coredump) {
1117			if ((entry->protection & VM_PROT_RW) != VM_PROT_RW)
1118				continue;
1119		} else {
1120			if ((entry->protection & VM_PROT_ALL) == 0)
1121				continue;
1122		}
1123
1124		/*
1125		 * Dont include memory segment in the coredump if
1126		 * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in
1127		 * madvise(2).  Do not dump submaps (i.e. parts of the
1128		 * kernel map).
1129		 */
1130		if (entry->eflags & (MAP_ENTRY_NOCOREDUMP|MAP_ENTRY_IS_SUB_MAP))
1131			continue;
1132
1133		if ((object = entry->object.vm_object) == NULL)
1134			continue;
1135
1136		/* Ignore memory-mapped devices and such things. */
1137		VM_OBJECT_LOCK(object);
1138		while ((backing_object = object->backing_object) != NULL) {
1139			VM_OBJECT_LOCK(backing_object);
1140			VM_OBJECT_UNLOCK(object);
1141			object = backing_object;
1142		}
1143		ignore_entry = object->type != OBJT_DEFAULT &&
1144		    object->type != OBJT_SWAP && object->type != OBJT_VNODE;
1145		VM_OBJECT_UNLOCK(object);
1146		if (ignore_entry)
1147			continue;
1148
1149		(*func)(entry, closure);
1150	}
1151	vm_map_unlock_read(map);
1152}
1153
1154/*
1155 * Write the core file header to the file, including padding up to
1156 * the page boundary.
1157 */
1158static int
1159__elfN(corehdr)(td, vp, cred, numsegs, hdr, hdrsize)
1160	struct thread *td;
1161	struct vnode *vp;
1162	struct ucred *cred;
1163	int numsegs;
1164	size_t hdrsize;
1165	void *hdr;
1166{
1167	size_t off;
1168
1169	/* Fill in the header. */
1170	bzero(hdr, hdrsize);
1171	off = 0;
1172	__elfN(puthdr)(td, hdr, &off, numsegs);
1173
1174	/* Write it to the core file. */
1175	return (vn_rdwr_inchunks(UIO_WRITE, vp, hdr, hdrsize, (off_t)0,
1176	    UIO_SYSSPACE, IO_UNIT | IO_DIRECT, cred, NOCRED, NULL,
1177	    td));
1178}
1179
1180#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
1181typedef struct prstatus32 elf_prstatus_t;
1182typedef struct prpsinfo32 elf_prpsinfo_t;
1183typedef struct fpreg32 elf_prfpregset_t;
1184typedef struct fpreg32 elf_fpregset_t;
1185typedef struct reg32 elf_gregset_t;
1186#else
1187typedef prstatus_t elf_prstatus_t;
1188typedef prpsinfo_t elf_prpsinfo_t;
1189typedef prfpregset_t elf_prfpregset_t;
1190typedef prfpregset_t elf_fpregset_t;
1191typedef gregset_t elf_gregset_t;
1192#endif
1193
1194static void
1195__elfN(puthdr)(struct thread *td, void *dst, size_t *off, int numsegs)
1196{
1197	struct {
1198		elf_prstatus_t status;
1199		elf_prfpregset_t fpregset;
1200		elf_prpsinfo_t psinfo;
1201	} *tempdata;
1202	elf_prstatus_t *status;
1203	elf_prfpregset_t *fpregset;
1204	elf_prpsinfo_t *psinfo;
1205	struct proc *p;
1206	struct thread *thr;
1207	size_t ehoff, noteoff, notesz, phoff;
1208
1209	p = td->td_proc;
1210
1211	ehoff = *off;
1212	*off += sizeof(Elf_Ehdr);
1213
1214	phoff = *off;
1215	*off += (numsegs + 1) * sizeof(Elf_Phdr);
1216
1217	noteoff = *off;
1218	/*
1219	 * Don't allocate space for the notes if we're just calculating
1220	 * the size of the header. We also don't collect the data.
1221	 */
1222	if (dst != NULL) {
1223		tempdata = malloc(sizeof(*tempdata), M_TEMP, M_ZERO|M_WAITOK);
1224		status = &tempdata->status;
1225		fpregset = &tempdata->fpregset;
1226		psinfo = &tempdata->psinfo;
1227	} else {
1228		tempdata = NULL;
1229		status = NULL;
1230		fpregset = NULL;
1231		psinfo = NULL;
1232	}
1233
1234	if (dst != NULL) {
1235		psinfo->pr_version = PRPSINFO_VERSION;
1236		psinfo->pr_psinfosz = sizeof(elf_prpsinfo_t);
1237		strlcpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname));
1238		/*
1239		 * XXX - We don't fill in the command line arguments properly
1240		 * yet.
1241		 */
1242		strlcpy(psinfo->pr_psargs, p->p_comm,
1243		    sizeof(psinfo->pr_psargs));
1244	}
1245	__elfN(putnote)(dst, off, "FreeBSD", NT_PRPSINFO, psinfo,
1246	    sizeof *psinfo);
1247
1248	/*
1249	 * To have the debugger select the right thread (LWP) as the initial
1250	 * thread, we dump the state of the thread passed to us in td first.
1251	 * This is the thread that causes the core dump and thus likely to
1252	 * be the right thread one wants to have selected in the debugger.
1253	 */
1254	thr = td;
1255	while (thr != NULL) {
1256		if (dst != NULL) {
1257			status->pr_version = PRSTATUS_VERSION;
1258			status->pr_statussz = sizeof(elf_prstatus_t);
1259			status->pr_gregsetsz = sizeof(elf_gregset_t);
1260			status->pr_fpregsetsz = sizeof(elf_fpregset_t);
1261			status->pr_osreldate = osreldate;
1262			status->pr_cursig = p->p_sig;
1263			status->pr_pid = thr->td_tid;
1264#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
1265			fill_regs32(thr, &status->pr_reg);
1266			fill_fpregs32(thr, fpregset);
1267#else
1268			fill_regs(thr, &status->pr_reg);
1269			fill_fpregs(thr, fpregset);
1270#endif
1271		}
1272		__elfN(putnote)(dst, off, "FreeBSD", NT_PRSTATUS, status,
1273		    sizeof *status);
1274		__elfN(putnote)(dst, off, "FreeBSD", NT_FPREGSET, fpregset,
1275		    sizeof *fpregset);
1276		/*
1277		 * Allow for MD specific notes, as well as any MD
1278		 * specific preparations for writing MI notes.
1279		 */
1280		__elfN(dump_thread)(thr, dst, off);
1281
1282		thr = (thr == td) ? TAILQ_FIRST(&p->p_threads) :
1283		    TAILQ_NEXT(thr, td_plist);
1284		if (thr == td)
1285			thr = TAILQ_NEXT(thr, td_plist);
1286	}
1287
1288	notesz = *off - noteoff;
1289
1290	if (dst != NULL)
1291		free(tempdata, M_TEMP);
1292
1293	/* Align up to a page boundary for the program segments. */
1294	*off = round_page(*off);
1295
1296	if (dst != NULL) {
1297		Elf_Ehdr *ehdr;
1298		Elf_Phdr *phdr;
1299		struct phdr_closure phc;
1300
1301		/*
1302		 * Fill in the ELF header.
1303		 */
1304		ehdr = (Elf_Ehdr *)((char *)dst + ehoff);
1305		ehdr->e_ident[EI_MAG0] = ELFMAG0;
1306		ehdr->e_ident[EI_MAG1] = ELFMAG1;
1307		ehdr->e_ident[EI_MAG2] = ELFMAG2;
1308		ehdr->e_ident[EI_MAG3] = ELFMAG3;
1309		ehdr->e_ident[EI_CLASS] = ELF_CLASS;
1310		ehdr->e_ident[EI_DATA] = ELF_DATA;
1311		ehdr->e_ident[EI_VERSION] = EV_CURRENT;
1312		ehdr->e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
1313		ehdr->e_ident[EI_ABIVERSION] = 0;
1314		ehdr->e_ident[EI_PAD] = 0;
1315		ehdr->e_type = ET_CORE;
1316#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
1317		ehdr->e_machine = EM_386;
1318#else
1319		ehdr->e_machine = ELF_ARCH;
1320#endif
1321		ehdr->e_version = EV_CURRENT;
1322		ehdr->e_entry = 0;
1323		ehdr->e_phoff = phoff;
1324		ehdr->e_flags = 0;
1325		ehdr->e_ehsize = sizeof(Elf_Ehdr);
1326		ehdr->e_phentsize = sizeof(Elf_Phdr);
1327		ehdr->e_phnum = numsegs + 1;
1328		ehdr->e_shentsize = sizeof(Elf_Shdr);
1329		ehdr->e_shnum = 0;
1330		ehdr->e_shstrndx = SHN_UNDEF;
1331
1332		/*
1333		 * Fill in the program header entries.
1334		 */
1335		phdr = (Elf_Phdr *)((char *)dst + phoff);
1336
1337		/* The note segement. */
1338		phdr->p_type = PT_NOTE;
1339		phdr->p_offset = noteoff;
1340		phdr->p_vaddr = 0;
1341		phdr->p_paddr = 0;
1342		phdr->p_filesz = notesz;
1343		phdr->p_memsz = 0;
1344		phdr->p_flags = 0;
1345		phdr->p_align = 0;
1346		phdr++;
1347
1348		/* All the writable segments from the program. */
1349		phc.phdr = phdr;
1350		phc.offset = *off;
1351		each_writable_segment(td, cb_put_phdr, &phc);
1352	}
1353}
1354
1355static void
1356__elfN(putnote)(void *dst, size_t *off, const char *name, int type,
1357    const void *desc, size_t descsz)
1358{
1359	Elf_Note note;
1360
1361	note.n_namesz = strlen(name) + 1;
1362	note.n_descsz = descsz;
1363	note.n_type = type;
1364	if (dst != NULL)
1365		bcopy(&note, (char *)dst + *off, sizeof note);
1366	*off += sizeof note;
1367	if (dst != NULL)
1368		bcopy(name, (char *)dst + *off, note.n_namesz);
1369	*off += roundup2(note.n_namesz, sizeof(Elf_Size));
1370	if (dst != NULL)
1371		bcopy(desc, (char *)dst + *off, note.n_descsz);
1372	*off += roundup2(note.n_descsz, sizeof(Elf_Size));
1373}
1374
1375/*
1376 * Try to find the appropriate ABI-note section for checknote,
1377 * fetch the osreldate for binary from the ELF OSABI-note. Only the
1378 * first page of the image is searched, the same as for headers.
1379 */
1380static boolean_t
1381__elfN(check_note)(struct image_params *imgp, Elf_Brandnote *checknote,
1382    int32_t *osrel)
1383{
1384	const Elf_Note *note, *note0, *note_end;
1385	const Elf_Phdr *phdr, *pnote;
1386	const Elf_Ehdr *hdr;
1387	const char *note_name;
1388	int i;
1389
1390	pnote = NULL;
1391	hdr = (const Elf_Ehdr *)imgp->image_header;
1392	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
1393
1394	for (i = 0; i < hdr->e_phnum; i++) {
1395		if (phdr[i].p_type == PT_NOTE) {
1396			pnote = &phdr[i];
1397			break;
1398		}
1399	}
1400
1401	if (pnote == NULL || pnote->p_offset >= PAGE_SIZE ||
1402	    pnote->p_offset + pnote->p_filesz >= PAGE_SIZE)
1403		return (FALSE);
1404
1405	note = note0 = (const Elf_Note *)(imgp->image_header + pnote->p_offset);
1406	note_end = (const Elf_Note *)(imgp->image_header +
1407	    pnote->p_offset + pnote->p_filesz);
1408	for (i = 0; i < 100 && note >= note0 && note < note_end; i++) {
1409		if (!aligned(note, Elf32_Addr))
1410			return (FALSE);
1411		if (note->n_namesz != checknote->hdr.n_namesz ||
1412		    note->n_descsz != checknote->hdr.n_descsz ||
1413		    note->n_type != checknote->hdr.n_type)
1414			goto nextnote;
1415		note_name = (const char *)(note + 1);
1416		if (strncmp(checknote->vendor, note_name,
1417		    checknote->hdr.n_namesz) != 0)
1418			goto nextnote;
1419
1420		/*
1421		 * Fetch the osreldate for binary
1422		 * from the ELF OSABI-note if necessary.
1423		 */
1424		if ((checknote->flags & BN_TRANSLATE_OSREL) != 0 &&
1425		    checknote->trans_osrel != NULL)
1426			return (checknote->trans_osrel(note, osrel));
1427		return (TRUE);
1428
1429nextnote:
1430		note = (const Elf_Note *)((const char *)(note + 1) +
1431		    roundup2(note->n_namesz, sizeof(Elf32_Addr)) +
1432		    roundup2(note->n_descsz, sizeof(Elf32_Addr)));
1433	}
1434
1435	return (FALSE);
1436}
1437
1438/*
1439 * Tell kern_execve.c about it, with a little help from the linker.
1440 */
1441static struct execsw __elfN(execsw) = {
1442	__CONCAT(exec_, __elfN(imgact)),
1443	__XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
1444};
1445EXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw));
1446