imgact_elf.c revision 186235
1/*-
2 * Copyright (c) 2000 David O'Brien
3 * Copyright (c) 1995-1996 S�ren Schmidt
4 * Copyright (c) 1996 Peter Wemm
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer
12 *    in this position and unchanged.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 *    derived from this software without specific prior written permission
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: head/sys/kern/imgact_elf.c 186235 2008-12-17 16:54:29Z peter $");
33
34#include "opt_compat.h"
35
36#include <sys/param.h>
37#include <sys/exec.h>
38#include <sys/fcntl.h>
39#include <sys/imgact.h>
40#include <sys/imgact_elf.h>
41#include <sys/kernel.h>
42#include <sys/lock.h>
43#include <sys/malloc.h>
44#include <sys/mount.h>
45#include <sys/mutex.h>
46#include <sys/mman.h>
47#include <sys/namei.h>
48#include <sys/pioctl.h>
49#include <sys/proc.h>
50#include <sys/procfs.h>
51#include <sys/resourcevar.h>
52#include <sys/sf_buf.h>
53#include <sys/systm.h>
54#include <sys/signalvar.h>
55#include <sys/stat.h>
56#include <sys/sx.h>
57#include <sys/syscall.h>
58#include <sys/sysctl.h>
59#include <sys/sysent.h>
60#include <sys/vnode.h>
61
62#include <vm/vm.h>
63#include <vm/vm_kern.h>
64#include <vm/vm_param.h>
65#include <vm/pmap.h>
66#include <vm/vm_map.h>
67#include <vm/vm_object.h>
68#include <vm/vm_extern.h>
69
70#include <machine/elf.h>
71#include <machine/md_var.h>
72
73#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
74#include <machine/fpu.h>
75#include <compat/ia32/ia32_reg.h>
76#endif
77
78#define OLD_EI_BRAND	8
79
80static int __elfN(check_header)(const Elf_Ehdr *hdr);
81static Elf_Brandinfo *__elfN(get_brandinfo)(const Elf_Ehdr *hdr,
82    const char *interp);
83static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
84    u_long *entry, size_t pagesize);
85static int __elfN(load_section)(struct vmspace *vmspace, vm_object_t object,
86    vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz,
87    vm_prot_t prot, size_t pagesize);
88static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp);
89
90SYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE), CTLFLAG_RW, 0,
91    "");
92
93int __elfN(fallback_brand) = -1;
94SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
95    fallback_brand, CTLFLAG_RW, &__elfN(fallback_brand), 0,
96    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort");
97TUNABLE_INT("kern.elf" __XSTRING(__ELF_WORD_SIZE) ".fallback_brand",
98    &__elfN(fallback_brand));
99
100static int elf_legacy_coredump = 0;
101SYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW,
102    &elf_legacy_coredump, 0, "");
103
104static Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
105
106#define	trunc_page_ps(va, ps)	((va) & ~(ps - 1))
107#define	round_page_ps(va, ps)	(((va) + (ps - 1)) & ~(ps - 1))
108#define	aligned(a, t)	(trunc_page_ps((u_long)(a), sizeof(t)) == (u_long)(a))
109
110int
111__elfN(insert_brand_entry)(Elf_Brandinfo *entry)
112{
113	int i;
114
115	for (i = 0; i < MAX_BRANDS; i++) {
116		if (elf_brand_list[i] == NULL) {
117			elf_brand_list[i] = entry;
118			break;
119		}
120	}
121	if (i == MAX_BRANDS)
122		return (-1);
123	return (0);
124}
125
126int
127__elfN(remove_brand_entry)(Elf_Brandinfo *entry)
128{
129	int i;
130
131	for (i = 0; i < MAX_BRANDS; i++) {
132		if (elf_brand_list[i] == entry) {
133			elf_brand_list[i] = NULL;
134			break;
135		}
136	}
137	if (i == MAX_BRANDS)
138		return (-1);
139	return (0);
140}
141
142int
143__elfN(brand_inuse)(Elf_Brandinfo *entry)
144{
145	struct proc *p;
146	int rval = FALSE;
147
148	sx_slock(&allproc_lock);
149	FOREACH_PROC_IN_SYSTEM(p) {
150		if (p->p_sysent == entry->sysvec) {
151			rval = TRUE;
152			break;
153		}
154	}
155	sx_sunlock(&allproc_lock);
156
157	return (rval);
158}
159
160static Elf_Brandinfo *
161__elfN(get_brandinfo)(const Elf_Ehdr *hdr, const char *interp)
162{
163	Elf_Brandinfo *bi;
164	int i;
165
166	/*
167	 * We support three types of branding -- (1) the ELF EI_OSABI field
168	 * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string
169	 * branding w/in the ELF header, and (3) path of the `interp_path'
170	 * field.  We should also look for an ".note.ABI-tag" ELF section now
171	 * in all Linux ELF binaries, FreeBSD 4.1+, and some NetBSD ones.
172	 */
173
174	/* If the executable has a brand, search for it in the brand list. */
175	for (i = 0; i < MAX_BRANDS; i++) {
176		bi = elf_brand_list[i];
177		if (bi != NULL && hdr->e_machine == bi->machine &&
178		    (hdr->e_ident[EI_OSABI] == bi->brand ||
179		    strncmp((const char *)&hdr->e_ident[OLD_EI_BRAND],
180		    bi->compat_3_brand, strlen(bi->compat_3_brand)) == 0))
181			return (bi);
182	}
183
184	/* Lacking a known brand, search for a recognized interpreter. */
185	if (interp != NULL) {
186		for (i = 0; i < MAX_BRANDS; i++) {
187			bi = elf_brand_list[i];
188			if (bi != NULL && hdr->e_machine == bi->machine &&
189			    strcmp(interp, bi->interp_path) == 0)
190				return (bi);
191		}
192	}
193
194	/* Lacking a recognized interpreter, try the default brand */
195	for (i = 0; i < MAX_BRANDS; i++) {
196		bi = elf_brand_list[i];
197		if (bi != NULL && hdr->e_machine == bi->machine &&
198		    __elfN(fallback_brand) == bi->brand)
199			return (bi);
200	}
201	return (NULL);
202}
203
204static int
205__elfN(check_header)(const Elf_Ehdr *hdr)
206{
207	Elf_Brandinfo *bi;
208	int i;
209
210	if (!IS_ELF(*hdr) ||
211	    hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
212	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
213	    hdr->e_ident[EI_VERSION] != EV_CURRENT ||
214	    hdr->e_phentsize != sizeof(Elf_Phdr) ||
215	    hdr->e_version != ELF_TARG_VER)
216		return (ENOEXEC);
217
218	/*
219	 * Make sure we have at least one brand for this machine.
220	 */
221
222	for (i = 0; i < MAX_BRANDS; i++) {
223		bi = elf_brand_list[i];
224		if (bi != NULL && bi->machine == hdr->e_machine)
225			break;
226	}
227	if (i == MAX_BRANDS)
228		return (ENOEXEC);
229
230	return (0);
231}
232
233static int
234__elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
235    vm_offset_t start, vm_offset_t end, vm_prot_t prot)
236{
237	struct sf_buf *sf;
238	int error;
239	vm_offset_t off;
240
241	/*
242	 * Create the page if it doesn't exist yet. Ignore errors.
243	 */
244	vm_map_lock(map);
245	vm_map_insert(map, NULL, 0, trunc_page(start), round_page(end),
246	    VM_PROT_ALL, VM_PROT_ALL, 0);
247	vm_map_unlock(map);
248
249	/*
250	 * Find the page from the underlying object.
251	 */
252	if (object) {
253		sf = vm_imgact_map_page(object, offset);
254		if (sf == NULL)
255			return (KERN_FAILURE);
256		off = offset - trunc_page(offset);
257		error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start,
258		    end - start);
259		vm_imgact_unmap_page(sf);
260		if (error) {
261			return (KERN_FAILURE);
262		}
263	}
264
265	return (KERN_SUCCESS);
266}
267
268static int
269__elfN(map_insert)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
270    vm_offset_t start, vm_offset_t end, vm_prot_t prot, int cow)
271{
272	struct sf_buf *sf;
273	vm_offset_t off;
274	vm_size_t sz;
275	int error, rv;
276
277	if (start != trunc_page(start)) {
278		rv = __elfN(map_partial)(map, object, offset, start,
279		    round_page(start), prot);
280		if (rv)
281			return (rv);
282		offset += round_page(start) - start;
283		start = round_page(start);
284	}
285	if (end != round_page(end)) {
286		rv = __elfN(map_partial)(map, object, offset +
287		    trunc_page(end) - start, trunc_page(end), end, prot);
288		if (rv)
289			return (rv);
290		end = trunc_page(end);
291	}
292	if (end > start) {
293		if (offset & PAGE_MASK) {
294			/*
295			 * The mapping is not page aligned. This means we have
296			 * to copy the data. Sigh.
297			 */
298			rv = vm_map_find(map, NULL, 0, &start, end - start,
299			    FALSE, prot | VM_PROT_WRITE, VM_PROT_ALL, 0);
300			if (rv)
301				return (rv);
302			if (object == NULL)
303				return (KERN_SUCCESS);
304			for (; start < end; start += sz) {
305				sf = vm_imgact_map_page(object, offset);
306				if (sf == NULL)
307					return (KERN_FAILURE);
308				off = offset - trunc_page(offset);
309				sz = end - start;
310				if (sz > PAGE_SIZE - off)
311					sz = PAGE_SIZE - off;
312				error = copyout((caddr_t)sf_buf_kva(sf) + off,
313				    (caddr_t)start, sz);
314				vm_imgact_unmap_page(sf);
315				if (error) {
316					return (KERN_FAILURE);
317				}
318				offset += sz;
319			}
320			rv = KERN_SUCCESS;
321		} else {
322			vm_object_reference(object);
323			vm_map_lock(map);
324			rv = vm_map_insert(map, object, offset, start, end,
325			    prot, VM_PROT_ALL, cow);
326			vm_map_unlock(map);
327			if (rv != KERN_SUCCESS)
328				vm_object_deallocate(object);
329		}
330		return (rv);
331	} else {
332		return (KERN_SUCCESS);
333	}
334}
335
336static int
337__elfN(load_section)(struct vmspace *vmspace,
338	vm_object_t object, vm_offset_t offset,
339	caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot,
340	size_t pagesize)
341{
342	struct sf_buf *sf;
343	size_t map_len;
344	vm_offset_t map_addr;
345	int error, rv, cow;
346	size_t copy_len;
347	vm_offset_t file_addr;
348
349	/*
350	 * It's necessary to fail if the filsz + offset taken from the
351	 * header is greater than the actual file pager object's size.
352	 * If we were to allow this, then the vm_map_find() below would
353	 * walk right off the end of the file object and into the ether.
354	 *
355	 * While I'm here, might as well check for something else that
356	 * is invalid: filsz cannot be greater than memsz.
357	 */
358	if ((off_t)filsz + offset > object->un_pager.vnp.vnp_size ||
359	    filsz > memsz) {
360		uprintf("elf_load_section: truncated ELF file\n");
361		return (ENOEXEC);
362	}
363
364	map_addr = trunc_page_ps((vm_offset_t)vmaddr, pagesize);
365	file_addr = trunc_page_ps(offset, pagesize);
366
367	/*
368	 * We have two choices.  We can either clear the data in the last page
369	 * of an oversized mapping, or we can start the anon mapping a page
370	 * early and copy the initialized data into that first page.  We
371	 * choose the second..
372	 */
373	if (memsz > filsz)
374		map_len = trunc_page_ps(offset + filsz, pagesize) - file_addr;
375	else
376		map_len = round_page_ps(offset + filsz, pagesize) - file_addr;
377
378	if (map_len != 0) {
379		/* cow flags: don't dump readonly sections in core */
380		cow = MAP_COPY_ON_WRITE | MAP_PREFAULT |
381		    (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP);
382
383		rv = __elfN(map_insert)(&vmspace->vm_map,
384				      object,
385				      file_addr,	/* file offset */
386				      map_addr,		/* virtual start */
387				      map_addr + map_len,/* virtual end */
388				      prot,
389				      cow);
390		if (rv != KERN_SUCCESS)
391			return (EINVAL);
392
393		/* we can stop now if we've covered it all */
394		if (memsz == filsz) {
395			return (0);
396		}
397	}
398
399
400	/*
401	 * We have to get the remaining bit of the file into the first part
402	 * of the oversized map segment.  This is normally because the .data
403	 * segment in the file is extended to provide bss.  It's a neat idea
404	 * to try and save a page, but it's a pain in the behind to implement.
405	 */
406	copy_len = (offset + filsz) - trunc_page_ps(offset + filsz, pagesize);
407	map_addr = trunc_page_ps((vm_offset_t)vmaddr + filsz, pagesize);
408	map_len = round_page_ps((vm_offset_t)vmaddr + memsz, pagesize) -
409	    map_addr;
410
411	/* This had damn well better be true! */
412	if (map_len != 0) {
413		rv = __elfN(map_insert)(&vmspace->vm_map, NULL, 0, map_addr,
414		    map_addr + map_len, VM_PROT_ALL, 0);
415		if (rv != KERN_SUCCESS) {
416			return (EINVAL);
417		}
418	}
419
420	if (copy_len != 0) {
421		vm_offset_t off;
422
423		sf = vm_imgact_map_page(object, offset + filsz);
424		if (sf == NULL)
425			return (EIO);
426
427		/* send the page fragment to user space */
428		off = trunc_page_ps(offset + filsz, pagesize) -
429		    trunc_page(offset + filsz);
430		error = copyout((caddr_t)sf_buf_kva(sf) + off,
431		    (caddr_t)map_addr, copy_len);
432		vm_imgact_unmap_page(sf);
433		if (error) {
434			return (error);
435		}
436	}
437
438	/*
439	 * set it to the specified protection.
440	 * XXX had better undo the damage from pasting over the cracks here!
441	 */
442	vm_map_protect(&vmspace->vm_map, trunc_page(map_addr),
443	    round_page(map_addr + map_len),  prot, FALSE);
444
445	return (0);
446}
447
448/*
449 * Load the file "file" into memory.  It may be either a shared object
450 * or an executable.
451 *
452 * The "addr" reference parameter is in/out.  On entry, it specifies
453 * the address where a shared object should be loaded.  If the file is
454 * an executable, this value is ignored.  On exit, "addr" specifies
455 * where the file was actually loaded.
456 *
457 * The "entry" reference parameter is out only.  On exit, it specifies
458 * the entry point for the loaded file.
459 */
460static int
461__elfN(load_file)(struct proc *p, const char *file, u_long *addr,
462	u_long *entry, size_t pagesize)
463{
464	struct {
465		struct nameidata nd;
466		struct vattr attr;
467		struct image_params image_params;
468	} *tempdata;
469	const Elf_Ehdr *hdr = NULL;
470	const Elf_Phdr *phdr = NULL;
471	struct nameidata *nd;
472	struct vmspace *vmspace = p->p_vmspace;
473	struct vattr *attr;
474	struct image_params *imgp;
475	vm_prot_t prot;
476	u_long rbase;
477	u_long base_addr = 0;
478	int vfslocked, error, i, numsegs;
479
480	tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK);
481	nd = &tempdata->nd;
482	attr = &tempdata->attr;
483	imgp = &tempdata->image_params;
484
485	/*
486	 * Initialize part of the common data
487	 */
488	imgp->proc = p;
489	imgp->attr = attr;
490	imgp->firstpage = NULL;
491	imgp->image_header = NULL;
492	imgp->object = NULL;
493	imgp->execlabel = NULL;
494
495	NDINIT(nd, LOOKUP, MPSAFE|LOCKLEAF|FOLLOW, UIO_SYSSPACE, file,
496	    curthread);
497	vfslocked = 0;
498	if ((error = namei(nd)) != 0) {
499		nd->ni_vp = NULL;
500		goto fail;
501	}
502	vfslocked = NDHASGIANT(nd);
503	NDFREE(nd, NDF_ONLY_PNBUF);
504	imgp->vp = nd->ni_vp;
505
506	/*
507	 * Check permissions, modes, uid, etc on the file, and "open" it.
508	 */
509	error = exec_check_permissions(imgp);
510	if (error)
511		goto fail;
512
513	error = exec_map_first_page(imgp);
514	if (error)
515		goto fail;
516
517	/*
518	 * Also make certain that the interpreter stays the same, so set
519	 * its VV_TEXT flag, too.
520	 */
521	nd->ni_vp->v_vflag |= VV_TEXT;
522
523	imgp->object = nd->ni_vp->v_object;
524
525	hdr = (const Elf_Ehdr *)imgp->image_header;
526	if ((error = __elfN(check_header)(hdr)) != 0)
527		goto fail;
528	if (hdr->e_type == ET_DYN)
529		rbase = *addr;
530	else if (hdr->e_type == ET_EXEC)
531		rbase = 0;
532	else {
533		error = ENOEXEC;
534		goto fail;
535	}
536
537	/* Only support headers that fit within first page for now      */
538	/*    (multiplication of two Elf_Half fields will not overflow) */
539	if ((hdr->e_phoff > PAGE_SIZE) ||
540	    (hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE - hdr->e_phoff) {
541		error = ENOEXEC;
542		goto fail;
543	}
544
545	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
546	if (!aligned(phdr, Elf_Addr)) {
547		error = ENOEXEC;
548		goto fail;
549	}
550
551	for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) {
552		if (phdr[i].p_type == PT_LOAD) {	/* Loadable segment */
553			prot = 0;
554			if (phdr[i].p_flags & PF_X)
555  				prot |= VM_PROT_EXECUTE;
556			if (phdr[i].p_flags & PF_W)
557  				prot |= VM_PROT_WRITE;
558			if (phdr[i].p_flags & PF_R)
559  				prot |= VM_PROT_READ;
560
561			if ((error = __elfN(load_section)(vmspace,
562			    imgp->object, phdr[i].p_offset,
563			    (caddr_t)(uintptr_t)phdr[i].p_vaddr + rbase,
564			    phdr[i].p_memsz, phdr[i].p_filesz, prot,
565			    pagesize)) != 0)
566				goto fail;
567			/*
568			 * Establish the base address if this is the
569			 * first segment.
570			 */
571			if (numsegs == 0)
572  				base_addr = trunc_page(phdr[i].p_vaddr +
573				    rbase);
574			numsegs++;
575		}
576	}
577	*addr = base_addr;
578	*entry = (unsigned long)hdr->e_entry + rbase;
579
580fail:
581	if (imgp->firstpage)
582		exec_unmap_first_page(imgp);
583
584	if (nd->ni_vp)
585		vput(nd->ni_vp);
586
587	VFS_UNLOCK_GIANT(vfslocked);
588	free(tempdata, M_TEMP);
589
590	return (error);
591}
592
593static const char FREEBSD_ABI_VENDOR[] = "FreeBSD";
594
595static int
596__CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
597{
598	const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
599	const Elf_Phdr *phdr, *pnote = NULL;
600	Elf_Auxargs *elf_auxargs;
601	struct vmspace *vmspace;
602	vm_prot_t prot;
603	u_long text_size = 0, data_size = 0, total_size = 0;
604	u_long text_addr = 0, data_addr = 0;
605	u_long seg_size, seg_addr;
606	u_long addr, entry = 0, proghdr = 0;
607	int error = 0, i;
608	const char *interp = NULL, *newinterp = NULL;
609	Elf_Brandinfo *brand_info;
610	const Elf_Note *note, *note_end;
611	char *path;
612	const char *note_name;
613	struct sysentvec *sv;
614
615	/*
616	 * Do we have a valid ELF header ?
617	 *
618	 * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later
619	 * if particular brand doesn't support it.
620	 */
621	if (__elfN(check_header)(hdr) != 0 ||
622	    (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN))
623		return (-1);
624
625	/*
626	 * From here on down, we return an errno, not -1, as we've
627	 * detected an ELF file.
628	 */
629
630	if ((hdr->e_phoff > PAGE_SIZE) ||
631	    (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) {
632		/* Only support headers in first page for now */
633		return (ENOEXEC);
634	}
635	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
636	if (!aligned(phdr, Elf_Addr))
637		return (ENOEXEC);
638	for (i = 0; i < hdr->e_phnum; i++) {
639		if (phdr[i].p_type == PT_INTERP) {
640			/* Path to interpreter */
641			if (phdr[i].p_filesz > MAXPATHLEN ||
642			    phdr[i].p_offset + phdr[i].p_filesz > PAGE_SIZE)
643				return (ENOEXEC);
644			interp = imgp->image_header + phdr[i].p_offset;
645			break;
646		}
647	}
648
649	brand_info = __elfN(get_brandinfo)(hdr, interp);
650	if (brand_info == NULL) {
651		uprintf("ELF binary type \"%u\" not known.\n",
652		    hdr->e_ident[EI_OSABI]);
653		return (ENOEXEC);
654	}
655	if (hdr->e_type == ET_DYN &&
656	    (brand_info->flags & BI_CAN_EXEC_DYN) == 0)
657		return (ENOEXEC);
658	sv = brand_info->sysvec;
659	if (interp != NULL && brand_info->interp_newpath != NULL)
660		newinterp = brand_info->interp_newpath;
661
662	/*
663	 * Avoid a possible deadlock if the current address space is destroyed
664	 * and that address space maps the locked vnode.  In the common case,
665	 * the locked vnode's v_usecount is decremented but remains greater
666	 * than zero.  Consequently, the vnode lock is not needed by vrele().
667	 * However, in cases where the vnode lock is external, such as nullfs,
668	 * v_usecount may become zero.
669	 */
670	VOP_UNLOCK(imgp->vp, 0);
671
672	error = exec_new_vmspace(imgp, sv);
673	imgp->proc->p_sysent = sv;
674
675	vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
676	if (error)
677		return (error);
678
679	vmspace = imgp->proc->p_vmspace;
680
681	for (i = 0; i < hdr->e_phnum; i++) {
682		switch (phdr[i].p_type) {
683		case PT_LOAD:	/* Loadable segment */
684			prot = 0;
685			if (phdr[i].p_flags & PF_X)
686  				prot |= VM_PROT_EXECUTE;
687			if (phdr[i].p_flags & PF_W)
688  				prot |= VM_PROT_WRITE;
689			if (phdr[i].p_flags & PF_R)
690  				prot |= VM_PROT_READ;
691
692#if defined(__ia64__) && __ELF_WORD_SIZE == 32 && defined(IA32_ME_HARDER)
693			/*
694			 * Some x86 binaries assume read == executable,
695			 * notably the M3 runtime and therefore cvsup
696			 */
697			if (prot & VM_PROT_READ)
698				prot |= VM_PROT_EXECUTE;
699#endif
700
701			if ((error = __elfN(load_section)(vmspace,
702			    imgp->object, phdr[i].p_offset,
703			    (caddr_t)(uintptr_t)phdr[i].p_vaddr,
704			    phdr[i].p_memsz, phdr[i].p_filesz, prot,
705			    sv->sv_pagesize)) != 0)
706				return (error);
707
708			/*
709			 * If this segment contains the program headers,
710			 * remember their virtual address for the AT_PHDR
711			 * aux entry. Static binaries don't usually include
712			 * a PT_PHDR entry.
713			 */
714			if (phdr[i].p_offset == 0 &&
715			    hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize
716				<= phdr[i].p_filesz)
717				proghdr = phdr[i].p_vaddr + hdr->e_phoff;
718
719			seg_addr = trunc_page(phdr[i].p_vaddr);
720			seg_size = round_page(phdr[i].p_memsz +
721			    phdr[i].p_vaddr - seg_addr);
722
723			/*
724			 * Is this .text or .data?  We can't use
725			 * VM_PROT_WRITE or VM_PROT_EXEC, it breaks the
726			 * alpha terribly and possibly does other bad
727			 * things so we stick to the old way of figuring
728			 * it out:  If the segment contains the program
729			 * entry point, it's a text segment, otherwise it
730			 * is a data segment.
731			 *
732			 * Note that obreak() assumes that data_addr +
733			 * data_size == end of data load area, and the ELF
734			 * file format expects segments to be sorted by
735			 * address.  If multiple data segments exist, the
736			 * last one will be used.
737			 */
738			if (hdr->e_entry >= phdr[i].p_vaddr &&
739			    hdr->e_entry < (phdr[i].p_vaddr +
740			    phdr[i].p_memsz)) {
741				text_size = seg_size;
742				text_addr = seg_addr;
743				entry = (u_long)hdr->e_entry;
744			} else {
745				data_size = seg_size;
746				data_addr = seg_addr;
747			}
748			total_size += seg_size;
749			break;
750		case PT_PHDR: 	/* Program header table info */
751			proghdr = phdr[i].p_vaddr;
752			break;
753		case PT_NOTE:
754			pnote = &phdr[i];
755			break;
756		default:
757			break;
758		}
759	}
760
761	if (data_addr == 0 && data_size == 0) {
762		data_addr = text_addr;
763		data_size = text_size;
764	}
765
766	/*
767	 * Check limits.  It should be safe to check the
768	 * limits after loading the segments since we do
769	 * not actually fault in all the segments pages.
770	 */
771	PROC_LOCK(imgp->proc);
772	if (data_size > lim_cur(imgp->proc, RLIMIT_DATA) ||
773	    text_size > maxtsiz ||
774	    total_size > lim_cur(imgp->proc, RLIMIT_VMEM)) {
775		PROC_UNLOCK(imgp->proc);
776		return (ENOMEM);
777	}
778
779	vmspace->vm_tsize = text_size >> PAGE_SHIFT;
780	vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
781	vmspace->vm_dsize = data_size >> PAGE_SHIFT;
782	vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr;
783
784	/*
785	 * We load the dynamic linker where a userland call
786	 * to mmap(0, ...) would put it.  The rationale behind this
787	 * calculation is that it leaves room for the heap to grow to
788	 * its maximum allowed size.
789	 */
790	addr = round_page((vm_offset_t)imgp->proc->p_vmspace->vm_daddr +
791	    lim_max(imgp->proc, RLIMIT_DATA));
792	PROC_UNLOCK(imgp->proc);
793
794	imgp->entry_addr = entry;
795
796	if (interp != NULL) {
797		int have_interp = FALSE;
798		VOP_UNLOCK(imgp->vp, 0);
799		if (brand_info->emul_path != NULL &&
800		    brand_info->emul_path[0] != '\0') {
801			path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
802			snprintf(path, MAXPATHLEN, "%s%s",
803			    brand_info->emul_path, interp);
804			error = __elfN(load_file)(imgp->proc, path, &addr,
805			    &imgp->entry_addr, sv->sv_pagesize);
806			free(path, M_TEMP);
807			if (error == 0)
808				have_interp = TRUE;
809		}
810		if (!have_interp && newinterp != NULL) {
811			error = __elfN(load_file)(imgp->proc, newinterp, &addr,
812			    &imgp->entry_addr, sv->sv_pagesize);
813			if (error == 0)
814				have_interp = TRUE;
815		}
816		if (!have_interp) {
817			error = __elfN(load_file)(imgp->proc, interp, &addr,
818			    &imgp->entry_addr, sv->sv_pagesize);
819		}
820		vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
821		if (error != 0) {
822			uprintf("ELF interpreter %s not found\n", interp);
823			return (error);
824		}
825	}
826
827	/*
828	 * Construct auxargs table (used by the fixup routine)
829	 */
830	elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK);
831	elf_auxargs->execfd = -1;
832	elf_auxargs->phdr = proghdr;
833	elf_auxargs->phent = hdr->e_phentsize;
834	elf_auxargs->phnum = hdr->e_phnum;
835	elf_auxargs->pagesz = PAGE_SIZE;
836	elf_auxargs->base = addr;
837	elf_auxargs->flags = 0;
838	elf_auxargs->entry = entry;
839
840	imgp->auxargs = elf_auxargs;
841	imgp->interpreted = 0;
842
843	/*
844	 * Try to fetch the osreldate for FreeBSD binary from the ELF
845	 * OSABI-note. Only the first page of the image is searched,
846	 * the same as for headers.
847	 */
848	if (pnote != NULL && pnote->p_offset < PAGE_SIZE &&
849	    pnote->p_offset + pnote->p_filesz < PAGE_SIZE ) {
850		note = (const Elf_Note *)(imgp->image_header + pnote->p_offset);
851		if (!aligned(note, Elf32_Addr)) {
852			free(imgp->auxargs, M_TEMP);
853			imgp->auxargs = NULL;
854			return (ENOEXEC);
855		}
856		note_end = (const Elf_Note *)(imgp->image_header + pnote->p_offset +
857		    pnote->p_filesz);
858		while (note < note_end) {
859			if (note->n_namesz == sizeof(FREEBSD_ABI_VENDOR) &&
860			    note->n_descsz == sizeof(int32_t) &&
861			    note->n_type == 1 /* ABI_NOTETYPE */) {
862				note_name = (const char *)(note + 1);
863				if (strncmp(FREEBSD_ABI_VENDOR, note_name,
864				    sizeof(FREEBSD_ABI_VENDOR)) == 0) {
865					imgp->proc->p_osrel = *(const int32_t *)
866					    (note_name +
867					    round_page_ps(sizeof(FREEBSD_ABI_VENDOR),
868						sizeof(Elf32_Addr)));
869					break;
870				}
871			}
872			note = (const Elf_Note *)((const char *)(note + 1) +
873			    round_page_ps(note->n_namesz, sizeof(Elf32_Addr)) +
874			    round_page_ps(note->n_descsz, sizeof(Elf32_Addr)));
875		}
876	}
877
878	return (error);
879}
880
881#define	suword __CONCAT(suword, __ELF_WORD_SIZE)
882
883int
884__elfN(freebsd_fixup)(register_t **stack_base, struct image_params *imgp)
885{
886	Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs;
887	Elf_Addr *base;
888	Elf_Addr *pos;
889
890	base = (Elf_Addr *)*stack_base;
891	pos = base + (imgp->args->argc + imgp->args->envc + 2);
892
893	if (args->execfd != -1)
894		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
895	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
896	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
897	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
898	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
899	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
900	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
901	AUXARGS_ENTRY(pos, AT_BASE, args->base);
902	AUXARGS_ENTRY(pos, AT_NULL, 0);
903
904	free(imgp->auxargs, M_TEMP);
905	imgp->auxargs = NULL;
906
907	base--;
908	suword(base, (long)imgp->args->argc);
909	*stack_base = (register_t *)base;
910	return (0);
911}
912
913/*
914 * Code for generating ELF core dumps.
915 */
916
917typedef void (*segment_callback)(vm_map_entry_t, void *);
918
919/* Closure for cb_put_phdr(). */
920struct phdr_closure {
921	Elf_Phdr *phdr;		/* Program header to fill in */
922	Elf_Off offset;		/* Offset of segment in core file */
923};
924
925/* Closure for cb_size_segment(). */
926struct sseg_closure {
927	int count;		/* Count of writable segments. */
928	size_t size;		/* Total size of all writable segments. */
929};
930
931static void cb_put_phdr(vm_map_entry_t, void *);
932static void cb_size_segment(vm_map_entry_t, void *);
933static void each_writable_segment(struct thread *, segment_callback, void *);
934static int __elfN(corehdr)(struct thread *, struct vnode *, struct ucred *,
935    int, void *, size_t);
936static void __elfN(puthdr)(struct thread *, void *, size_t *, int);
937static void __elfN(putnote)(void *, size_t *, const char *, int,
938    const void *, size_t);
939
940int
941__elfN(coredump)(td, vp, limit)
942	struct thread *td;
943	struct vnode *vp;
944	off_t limit;
945{
946	struct ucred *cred = td->td_ucred;
947	int error = 0;
948	struct sseg_closure seginfo;
949	void *hdr;
950	size_t hdrsize;
951
952	/* Size the program segments. */
953	seginfo.count = 0;
954	seginfo.size = 0;
955	each_writable_segment(td, cb_size_segment, &seginfo);
956
957	/*
958	 * Calculate the size of the core file header area by making
959	 * a dry run of generating it.  Nothing is written, but the
960	 * size is calculated.
961	 */
962	hdrsize = 0;
963	__elfN(puthdr)(td, (void *)NULL, &hdrsize, seginfo.count);
964
965	if (hdrsize + seginfo.size >= limit)
966		return (EFAULT);
967
968	/*
969	 * Allocate memory for building the header, fill it up,
970	 * and write it out.
971	 */
972	hdr = malloc(hdrsize, M_TEMP, M_WAITOK);
973	if (hdr == NULL) {
974		return (EINVAL);
975	}
976	error = __elfN(corehdr)(td, vp, cred, seginfo.count, hdr, hdrsize);
977
978	/* Write the contents of all of the writable segments. */
979	if (error == 0) {
980		Elf_Phdr *php;
981		off_t offset;
982		int i;
983
984		php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
985		offset = hdrsize;
986		for (i = 0; i < seginfo.count; i++) {
987			error = vn_rdwr_inchunks(UIO_WRITE, vp,
988			    (caddr_t)(uintptr_t)php->p_vaddr,
989			    php->p_filesz, offset, UIO_USERSPACE,
990			    IO_UNIT | IO_DIRECT, cred, NOCRED, NULL,
991			    curthread);
992			if (error != 0)
993				break;
994			offset += php->p_filesz;
995			php++;
996		}
997	}
998	free(hdr, M_TEMP);
999
1000	return (error);
1001}
1002
1003/*
1004 * A callback for each_writable_segment() to write out the segment's
1005 * program header entry.
1006 */
1007static void
1008cb_put_phdr(entry, closure)
1009	vm_map_entry_t entry;
1010	void *closure;
1011{
1012	struct phdr_closure *phc = (struct phdr_closure *)closure;
1013	Elf_Phdr *phdr = phc->phdr;
1014
1015	phc->offset = round_page(phc->offset);
1016
1017	phdr->p_type = PT_LOAD;
1018	phdr->p_offset = phc->offset;
1019	phdr->p_vaddr = entry->start;
1020	phdr->p_paddr = 0;
1021	phdr->p_filesz = phdr->p_memsz = entry->end - entry->start;
1022	phdr->p_align = PAGE_SIZE;
1023	phdr->p_flags = 0;
1024	if (entry->protection & VM_PROT_READ)
1025		phdr->p_flags |= PF_R;
1026	if (entry->protection & VM_PROT_WRITE)
1027		phdr->p_flags |= PF_W;
1028	if (entry->protection & VM_PROT_EXECUTE)
1029		phdr->p_flags |= PF_X;
1030
1031	phc->offset += phdr->p_filesz;
1032	phc->phdr++;
1033}
1034
1035/*
1036 * A callback for each_writable_segment() to gather information about
1037 * the number of segments and their total size.
1038 */
1039static void
1040cb_size_segment(entry, closure)
1041	vm_map_entry_t entry;
1042	void *closure;
1043{
1044	struct sseg_closure *ssc = (struct sseg_closure *)closure;
1045
1046	ssc->count++;
1047	ssc->size += entry->end - entry->start;
1048}
1049
1050/*
1051 * For each writable segment in the process's memory map, call the given
1052 * function with a pointer to the map entry and some arbitrary
1053 * caller-supplied data.
1054 */
1055static void
1056each_writable_segment(td, func, closure)
1057	struct thread *td;
1058	segment_callback func;
1059	void *closure;
1060{
1061	struct proc *p = td->td_proc;
1062	vm_map_t map = &p->p_vmspace->vm_map;
1063	vm_map_entry_t entry;
1064	vm_object_t backing_object, object;
1065	boolean_t ignore_entry;
1066
1067	vm_map_lock_read(map);
1068	for (entry = map->header.next; entry != &map->header;
1069	    entry = entry->next) {
1070		/*
1071		 * Don't dump inaccessible mappings, deal with legacy
1072		 * coredump mode.
1073		 *
1074		 * Note that read-only segments related to the elf binary
1075		 * are marked MAP_ENTRY_NOCOREDUMP now so we no longer
1076		 * need to arbitrarily ignore such segments.
1077		 */
1078		if (elf_legacy_coredump) {
1079			if ((entry->protection & VM_PROT_RW) != VM_PROT_RW)
1080				continue;
1081		} else {
1082			if ((entry->protection & VM_PROT_ALL) == 0)
1083				continue;
1084		}
1085
1086		/*
1087		 * Dont include memory segment in the coredump if
1088		 * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in
1089		 * madvise(2).  Do not dump submaps (i.e. parts of the
1090		 * kernel map).
1091		 */
1092		if (entry->eflags & (MAP_ENTRY_NOCOREDUMP|MAP_ENTRY_IS_SUB_MAP))
1093			continue;
1094
1095		if ((object = entry->object.vm_object) == NULL)
1096			continue;
1097
1098		/* Ignore memory-mapped devices and such things. */
1099		VM_OBJECT_LOCK(object);
1100		while ((backing_object = object->backing_object) != NULL) {
1101			VM_OBJECT_LOCK(backing_object);
1102			VM_OBJECT_UNLOCK(object);
1103			object = backing_object;
1104		}
1105		ignore_entry = object->type != OBJT_DEFAULT &&
1106		    object->type != OBJT_SWAP && object->type != OBJT_VNODE;
1107		VM_OBJECT_UNLOCK(object);
1108		if (ignore_entry)
1109			continue;
1110
1111		(*func)(entry, closure);
1112	}
1113	vm_map_unlock_read(map);
1114}
1115
1116/*
1117 * Write the core file header to the file, including padding up to
1118 * the page boundary.
1119 */
1120static int
1121__elfN(corehdr)(td, vp, cred, numsegs, hdr, hdrsize)
1122	struct thread *td;
1123	struct vnode *vp;
1124	struct ucred *cred;
1125	int numsegs;
1126	size_t hdrsize;
1127	void *hdr;
1128{
1129	size_t off;
1130
1131	/* Fill in the header. */
1132	bzero(hdr, hdrsize);
1133	off = 0;
1134	__elfN(puthdr)(td, hdr, &off, numsegs);
1135
1136	/* Write it to the core file. */
1137	return (vn_rdwr_inchunks(UIO_WRITE, vp, hdr, hdrsize, (off_t)0,
1138	    UIO_SYSSPACE, IO_UNIT | IO_DIRECT, cred, NOCRED, NULL,
1139	    td));
1140}
1141
1142#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
1143typedef struct prstatus32 elf_prstatus_t;
1144typedef struct prpsinfo32 elf_prpsinfo_t;
1145typedef struct fpreg32 elf_prfpregset_t;
1146typedef struct fpreg32 elf_fpregset_t;
1147typedef struct reg32 elf_gregset_t;
1148#else
1149typedef prstatus_t elf_prstatus_t;
1150typedef prpsinfo_t elf_prpsinfo_t;
1151typedef prfpregset_t elf_prfpregset_t;
1152typedef prfpregset_t elf_fpregset_t;
1153typedef gregset_t elf_gregset_t;
1154#endif
1155
1156static void
1157__elfN(puthdr)(struct thread *td, void *dst, size_t *off, int numsegs)
1158{
1159	struct {
1160		elf_prstatus_t status;
1161		elf_prfpregset_t fpregset;
1162		elf_prpsinfo_t psinfo;
1163	} *tempdata;
1164	elf_prstatus_t *status;
1165	elf_prfpregset_t *fpregset;
1166	elf_prpsinfo_t *psinfo;
1167	struct proc *p;
1168	struct thread *thr;
1169	size_t ehoff, noteoff, notesz, phoff;
1170
1171	p = td->td_proc;
1172
1173	ehoff = *off;
1174	*off += sizeof(Elf_Ehdr);
1175
1176	phoff = *off;
1177	*off += (numsegs + 1) * sizeof(Elf_Phdr);
1178
1179	noteoff = *off;
1180	/*
1181	 * Don't allocate space for the notes if we're just calculating
1182	 * the size of the header. We also don't collect the data.
1183	 */
1184	if (dst != NULL) {
1185		tempdata = malloc(sizeof(*tempdata), M_TEMP, M_ZERO|M_WAITOK);
1186		status = &tempdata->status;
1187		fpregset = &tempdata->fpregset;
1188		psinfo = &tempdata->psinfo;
1189	} else {
1190		tempdata = NULL;
1191		status = NULL;
1192		fpregset = NULL;
1193		psinfo = NULL;
1194	}
1195
1196	if (dst != NULL) {
1197		psinfo->pr_version = PRPSINFO_VERSION;
1198		psinfo->pr_psinfosz = sizeof(elf_prpsinfo_t);
1199		strlcpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname));
1200		/*
1201		 * XXX - We don't fill in the command line arguments properly
1202		 * yet.
1203		 */
1204		strlcpy(psinfo->pr_psargs, p->p_comm,
1205		    sizeof(psinfo->pr_psargs));
1206	}
1207	__elfN(putnote)(dst, off, "FreeBSD", NT_PRPSINFO, psinfo,
1208	    sizeof *psinfo);
1209
1210	/*
1211	 * To have the debugger select the right thread (LWP) as the initial
1212	 * thread, we dump the state of the thread passed to us in td first.
1213	 * This is the thread that causes the core dump and thus likely to
1214	 * be the right thread one wants to have selected in the debugger.
1215	 */
1216	thr = td;
1217	while (thr != NULL) {
1218		if (dst != NULL) {
1219			status->pr_version = PRSTATUS_VERSION;
1220			status->pr_statussz = sizeof(elf_prstatus_t);
1221			status->pr_gregsetsz = sizeof(elf_gregset_t);
1222			status->pr_fpregsetsz = sizeof(elf_fpregset_t);
1223			status->pr_osreldate = osreldate;
1224			status->pr_cursig = p->p_sig;
1225			status->pr_pid = thr->td_tid;
1226#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
1227			fill_regs32(thr, &status->pr_reg);
1228			fill_fpregs32(thr, fpregset);
1229#else
1230			fill_regs(thr, &status->pr_reg);
1231			fill_fpregs(thr, fpregset);
1232#endif
1233		}
1234		__elfN(putnote)(dst, off, "FreeBSD", NT_PRSTATUS, status,
1235		    sizeof *status);
1236		__elfN(putnote)(dst, off, "FreeBSD", NT_FPREGSET, fpregset,
1237		    sizeof *fpregset);
1238		/*
1239		 * Allow for MD specific notes, as well as any MD
1240		 * specific preparations for writing MI notes.
1241		 */
1242		__elfN(dump_thread)(thr, dst, off);
1243
1244		thr = (thr == td) ? TAILQ_FIRST(&p->p_threads) :
1245		    TAILQ_NEXT(thr, td_plist);
1246		if (thr == td)
1247			thr = TAILQ_NEXT(thr, td_plist);
1248	}
1249
1250	notesz = *off - noteoff;
1251
1252	if (dst != NULL)
1253		free(tempdata, M_TEMP);
1254
1255	/* Align up to a page boundary for the program segments. */
1256	*off = round_page(*off);
1257
1258	if (dst != NULL) {
1259		Elf_Ehdr *ehdr;
1260		Elf_Phdr *phdr;
1261		struct phdr_closure phc;
1262
1263		/*
1264		 * Fill in the ELF header.
1265		 */
1266		ehdr = (Elf_Ehdr *)((char *)dst + ehoff);
1267		ehdr->e_ident[EI_MAG0] = ELFMAG0;
1268		ehdr->e_ident[EI_MAG1] = ELFMAG1;
1269		ehdr->e_ident[EI_MAG2] = ELFMAG2;
1270		ehdr->e_ident[EI_MAG3] = ELFMAG3;
1271		ehdr->e_ident[EI_CLASS] = ELF_CLASS;
1272		ehdr->e_ident[EI_DATA] = ELF_DATA;
1273		ehdr->e_ident[EI_VERSION] = EV_CURRENT;
1274		ehdr->e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
1275		ehdr->e_ident[EI_ABIVERSION] = 0;
1276		ehdr->e_ident[EI_PAD] = 0;
1277		ehdr->e_type = ET_CORE;
1278#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
1279		ehdr->e_machine = EM_386;
1280#else
1281		ehdr->e_machine = ELF_ARCH;
1282#endif
1283		ehdr->e_version = EV_CURRENT;
1284		ehdr->e_entry = 0;
1285		ehdr->e_phoff = phoff;
1286		ehdr->e_flags = 0;
1287		ehdr->e_ehsize = sizeof(Elf_Ehdr);
1288		ehdr->e_phentsize = sizeof(Elf_Phdr);
1289		ehdr->e_phnum = numsegs + 1;
1290		ehdr->e_shentsize = sizeof(Elf_Shdr);
1291		ehdr->e_shnum = 0;
1292		ehdr->e_shstrndx = SHN_UNDEF;
1293
1294		/*
1295		 * Fill in the program header entries.
1296		 */
1297		phdr = (Elf_Phdr *)((char *)dst + phoff);
1298
1299		/* The note segement. */
1300		phdr->p_type = PT_NOTE;
1301		phdr->p_offset = noteoff;
1302		phdr->p_vaddr = 0;
1303		phdr->p_paddr = 0;
1304		phdr->p_filesz = notesz;
1305		phdr->p_memsz = 0;
1306		phdr->p_flags = 0;
1307		phdr->p_align = 0;
1308		phdr++;
1309
1310		/* All the writable segments from the program. */
1311		phc.phdr = phdr;
1312		phc.offset = *off;
1313		each_writable_segment(td, cb_put_phdr, &phc);
1314	}
1315}
1316
1317static void
1318__elfN(putnote)(void *dst, size_t *off, const char *name, int type,
1319    const void *desc, size_t descsz)
1320{
1321	Elf_Note note;
1322
1323	note.n_namesz = strlen(name) + 1;
1324	note.n_descsz = descsz;
1325	note.n_type = type;
1326	if (dst != NULL)
1327		bcopy(&note, (char *)dst + *off, sizeof note);
1328	*off += sizeof note;
1329	if (dst != NULL)
1330		bcopy(name, (char *)dst + *off, note.n_namesz);
1331	*off += roundup2(note.n_namesz, sizeof(Elf_Size));
1332	if (dst != NULL)
1333		bcopy(desc, (char *)dst + *off, note.n_descsz);
1334	*off += roundup2(note.n_descsz, sizeof(Elf_Size));
1335}
1336
1337/*
1338 * Tell kern_execve.c about it, with a little help from the linker.
1339 */
1340static struct execsw __elfN(execsw) = {
1341	__CONCAT(exec_, __elfN(imgact)),
1342	__XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
1343};
1344EXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw));
1345