imgact_elf.c revision 189771
1/*-
2 * Copyright (c) 2000 David O'Brien
3 * Copyright (c) 1995-1996 S�ren Schmidt
4 * Copyright (c) 1996 Peter Wemm
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer
12 *    in this position and unchanged.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 *    derived from this software without specific prior written permission
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: head/sys/kern/imgact_elf.c 189771 2009-03-13 16:40:51Z dchagin $");
33
34#include "opt_compat.h"
35
36#include <sys/param.h>
37#include <sys/exec.h>
38#include <sys/fcntl.h>
39#include <sys/imgact.h>
40#include <sys/imgact_elf.h>
41#include <sys/kernel.h>
42#include <sys/lock.h>
43#include <sys/malloc.h>
44#include <sys/mount.h>
45#include <sys/mutex.h>
46#include <sys/mman.h>
47#include <sys/namei.h>
48#include <sys/pioctl.h>
49#include <sys/proc.h>
50#include <sys/procfs.h>
51#include <sys/resourcevar.h>
52#include <sys/sf_buf.h>
53#include <sys/systm.h>
54#include <sys/signalvar.h>
55#include <sys/stat.h>
56#include <sys/sx.h>
57#include <sys/syscall.h>
58#include <sys/sysctl.h>
59#include <sys/sysent.h>
60#include <sys/vnode.h>
61
62#include <vm/vm.h>
63#include <vm/vm_kern.h>
64#include <vm/vm_param.h>
65#include <vm/pmap.h>
66#include <vm/vm_map.h>
67#include <vm/vm_object.h>
68#include <vm/vm_extern.h>
69
70#include <machine/elf.h>
71#include <machine/md_var.h>
72
73#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
74#include <machine/fpu.h>
75#include <compat/ia32/ia32_reg.h>
76#endif
77
78#define OLD_EI_BRAND	8
79
80static int __elfN(check_header)(const Elf_Ehdr *hdr);
81static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp,
82    const char *interp, int32_t *osrel);
83static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
84    u_long *entry, size_t pagesize);
85static int __elfN(load_section)(struct vmspace *vmspace, vm_object_t object,
86    vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz,
87    vm_prot_t prot, size_t pagesize);
88static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp);
89static boolean_t __elfN(check_note)(struct image_params *imgp,
90    Elf_Brandnote *checknote, int32_t *osrel);
91
92SYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE), CTLFLAG_RW, 0,
93    "");
94
95int __elfN(fallback_brand) = -1;
96SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
97    fallback_brand, CTLFLAG_RW, &__elfN(fallback_brand), 0,
98    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort");
99TUNABLE_INT("kern.elf" __XSTRING(__ELF_WORD_SIZE) ".fallback_brand",
100    &__elfN(fallback_brand));
101
102static int elf_legacy_coredump = 0;
103SYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW,
104    &elf_legacy_coredump, 0, "");
105
106static Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
107
108#define	trunc_page_ps(va, ps)	((va) & ~(ps - 1))
109#define	round_page_ps(va, ps)	(((va) + (ps - 1)) & ~(ps - 1))
110#define	aligned(a, t)	(trunc_page_ps((u_long)(a), sizeof(t)) == (u_long)(a))
111
112static const char FREEBSD_ABI_VENDOR[] = "FreeBSD";
113
114Elf_Brandnote __elfN(freebsd_brandnote) = {
115	.hdr.n_namesz	= sizeof(FREEBSD_ABI_VENDOR),
116	.hdr.n_descsz	= sizeof(int32_t),
117	.hdr.n_type	= 1,
118	.vendor		= FREEBSD_ABI_VENDOR,
119	.flags		= BN_CAN_FETCH_OSREL
120};
121
122int
123__elfN(insert_brand_entry)(Elf_Brandinfo *entry)
124{
125	int i;
126
127	for (i = 0; i < MAX_BRANDS; i++) {
128		if (elf_brand_list[i] == NULL) {
129			elf_brand_list[i] = entry;
130			break;
131		}
132	}
133	if (i == MAX_BRANDS)
134		return (-1);
135	return (0);
136}
137
138int
139__elfN(remove_brand_entry)(Elf_Brandinfo *entry)
140{
141	int i;
142
143	for (i = 0; i < MAX_BRANDS; i++) {
144		if (elf_brand_list[i] == entry) {
145			elf_brand_list[i] = NULL;
146			break;
147		}
148	}
149	if (i == MAX_BRANDS)
150		return (-1);
151	return (0);
152}
153
154int
155__elfN(brand_inuse)(Elf_Brandinfo *entry)
156{
157	struct proc *p;
158	int rval = FALSE;
159
160	sx_slock(&allproc_lock);
161	FOREACH_PROC_IN_SYSTEM(p) {
162		if (p->p_sysent == entry->sysvec) {
163			rval = TRUE;
164			break;
165		}
166	}
167	sx_sunlock(&allproc_lock);
168
169	return (rval);
170}
171
172static Elf_Brandinfo *
173__elfN(get_brandinfo)(struct image_params *imgp, const char *interp,
174    int32_t *osrel)
175{
176	const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
177	Elf_Brandinfo *bi;
178	boolean_t ret;
179	int i;
180
181	/*
182	 * We support four types of branding -- (1) the ELF EI_OSABI field
183	 * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string
184	 * branding w/in the ELF header, (3) path of the `interp_path'
185	 * field, and (4) the ".note.ABI-tag" ELF section.
186	 */
187
188	/* Look for an ".note.ABI-tag" ELF section */
189	for (i = 0; i < MAX_BRANDS; i++) {
190		bi = elf_brand_list[i];
191		if (bi != NULL && hdr->e_machine == bi->machine &&
192		    bi->brand_note != NULL) {
193			ret = __elfN(check_note)(imgp, bi->brand_note, osrel);
194			if (ret)
195				return (bi);
196		}
197	}
198
199	/* If the executable has a brand, search for it in the brand list. */
200	for (i = 0; i < MAX_BRANDS; i++) {
201		bi = elf_brand_list[i];
202		if (bi != NULL && hdr->e_machine == bi->machine &&
203		    (hdr->e_ident[EI_OSABI] == bi->brand ||
204		    strncmp((const char *)&hdr->e_ident[OLD_EI_BRAND],
205		    bi->compat_3_brand, strlen(bi->compat_3_brand)) == 0))
206			return (bi);
207	}
208
209	/* Lacking a known brand, search for a recognized interpreter. */
210	if (interp != NULL) {
211		for (i = 0; i < MAX_BRANDS; i++) {
212			bi = elf_brand_list[i];
213			if (bi != NULL && hdr->e_machine == bi->machine &&
214			    strcmp(interp, bi->interp_path) == 0)
215				return (bi);
216		}
217	}
218
219	/* Lacking a recognized interpreter, try the default brand */
220	for (i = 0; i < MAX_BRANDS; i++) {
221		bi = elf_brand_list[i];
222		if (bi != NULL && hdr->e_machine == bi->machine &&
223		    __elfN(fallback_brand) == bi->brand)
224			return (bi);
225	}
226	return (NULL);
227}
228
229static int
230__elfN(check_header)(const Elf_Ehdr *hdr)
231{
232	Elf_Brandinfo *bi;
233	int i;
234
235	if (!IS_ELF(*hdr) ||
236	    hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
237	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
238	    hdr->e_ident[EI_VERSION] != EV_CURRENT ||
239	    hdr->e_phentsize != sizeof(Elf_Phdr) ||
240	    hdr->e_version != ELF_TARG_VER)
241		return (ENOEXEC);
242
243	/*
244	 * Make sure we have at least one brand for this machine.
245	 */
246
247	for (i = 0; i < MAX_BRANDS; i++) {
248		bi = elf_brand_list[i];
249		if (bi != NULL && bi->machine == hdr->e_machine)
250			break;
251	}
252	if (i == MAX_BRANDS)
253		return (ENOEXEC);
254
255	return (0);
256}
257
258static int
259__elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
260    vm_offset_t start, vm_offset_t end, vm_prot_t prot)
261{
262	struct sf_buf *sf;
263	int error;
264	vm_offset_t off;
265
266	/*
267	 * Create the page if it doesn't exist yet. Ignore errors.
268	 */
269	vm_map_lock(map);
270	vm_map_insert(map, NULL, 0, trunc_page(start), round_page(end),
271	    VM_PROT_ALL, VM_PROT_ALL, 0);
272	vm_map_unlock(map);
273
274	/*
275	 * Find the page from the underlying object.
276	 */
277	if (object) {
278		sf = vm_imgact_map_page(object, offset);
279		if (sf == NULL)
280			return (KERN_FAILURE);
281		off = offset - trunc_page(offset);
282		error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start,
283		    end - start);
284		vm_imgact_unmap_page(sf);
285		if (error) {
286			return (KERN_FAILURE);
287		}
288	}
289
290	return (KERN_SUCCESS);
291}
292
293static int
294__elfN(map_insert)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
295    vm_offset_t start, vm_offset_t end, vm_prot_t prot, int cow)
296{
297	struct sf_buf *sf;
298	vm_offset_t off;
299	vm_size_t sz;
300	int error, rv;
301
302	if (start != trunc_page(start)) {
303		rv = __elfN(map_partial)(map, object, offset, start,
304		    round_page(start), prot);
305		if (rv)
306			return (rv);
307		offset += round_page(start) - start;
308		start = round_page(start);
309	}
310	if (end != round_page(end)) {
311		rv = __elfN(map_partial)(map, object, offset +
312		    trunc_page(end) - start, trunc_page(end), end, prot);
313		if (rv)
314			return (rv);
315		end = trunc_page(end);
316	}
317	if (end > start) {
318		if (offset & PAGE_MASK) {
319			/*
320			 * The mapping is not page aligned. This means we have
321			 * to copy the data. Sigh.
322			 */
323			rv = vm_map_find(map, NULL, 0, &start, end - start,
324			    FALSE, prot | VM_PROT_WRITE, VM_PROT_ALL, 0);
325			if (rv)
326				return (rv);
327			if (object == NULL)
328				return (KERN_SUCCESS);
329			for (; start < end; start += sz) {
330				sf = vm_imgact_map_page(object, offset);
331				if (sf == NULL)
332					return (KERN_FAILURE);
333				off = offset - trunc_page(offset);
334				sz = end - start;
335				if (sz > PAGE_SIZE - off)
336					sz = PAGE_SIZE - off;
337				error = copyout((caddr_t)sf_buf_kva(sf) + off,
338				    (caddr_t)start, sz);
339				vm_imgact_unmap_page(sf);
340				if (error) {
341					return (KERN_FAILURE);
342				}
343				offset += sz;
344			}
345			rv = KERN_SUCCESS;
346		} else {
347			vm_object_reference(object);
348			vm_map_lock(map);
349			rv = vm_map_insert(map, object, offset, start, end,
350			    prot, VM_PROT_ALL, cow);
351			vm_map_unlock(map);
352			if (rv != KERN_SUCCESS)
353				vm_object_deallocate(object);
354		}
355		return (rv);
356	} else {
357		return (KERN_SUCCESS);
358	}
359}
360
361static int
362__elfN(load_section)(struct vmspace *vmspace,
363	vm_object_t object, vm_offset_t offset,
364	caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot,
365	size_t pagesize)
366{
367	struct sf_buf *sf;
368	size_t map_len;
369	vm_offset_t map_addr;
370	int error, rv, cow;
371	size_t copy_len;
372	vm_offset_t file_addr;
373
374	/*
375	 * It's necessary to fail if the filsz + offset taken from the
376	 * header is greater than the actual file pager object's size.
377	 * If we were to allow this, then the vm_map_find() below would
378	 * walk right off the end of the file object and into the ether.
379	 *
380	 * While I'm here, might as well check for something else that
381	 * is invalid: filsz cannot be greater than memsz.
382	 */
383	if ((off_t)filsz + offset > object->un_pager.vnp.vnp_size ||
384	    filsz > memsz) {
385		uprintf("elf_load_section: truncated ELF file\n");
386		return (ENOEXEC);
387	}
388
389	map_addr = trunc_page_ps((vm_offset_t)vmaddr, pagesize);
390	file_addr = trunc_page_ps(offset, pagesize);
391
392	/*
393	 * We have two choices.  We can either clear the data in the last page
394	 * of an oversized mapping, or we can start the anon mapping a page
395	 * early and copy the initialized data into that first page.  We
396	 * choose the second..
397	 */
398	if (memsz > filsz)
399		map_len = trunc_page_ps(offset + filsz, pagesize) - file_addr;
400	else
401		map_len = round_page_ps(offset + filsz, pagesize) - file_addr;
402
403	if (map_len != 0) {
404		/* cow flags: don't dump readonly sections in core */
405		cow = MAP_COPY_ON_WRITE | MAP_PREFAULT |
406		    (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP);
407
408		rv = __elfN(map_insert)(&vmspace->vm_map,
409				      object,
410				      file_addr,	/* file offset */
411				      map_addr,		/* virtual start */
412				      map_addr + map_len,/* virtual end */
413				      prot,
414				      cow);
415		if (rv != KERN_SUCCESS)
416			return (EINVAL);
417
418		/* we can stop now if we've covered it all */
419		if (memsz == filsz) {
420			return (0);
421		}
422	}
423
424
425	/*
426	 * We have to get the remaining bit of the file into the first part
427	 * of the oversized map segment.  This is normally because the .data
428	 * segment in the file is extended to provide bss.  It's a neat idea
429	 * to try and save a page, but it's a pain in the behind to implement.
430	 */
431	copy_len = (offset + filsz) - trunc_page_ps(offset + filsz, pagesize);
432	map_addr = trunc_page_ps((vm_offset_t)vmaddr + filsz, pagesize);
433	map_len = round_page_ps((vm_offset_t)vmaddr + memsz, pagesize) -
434	    map_addr;
435
436	/* This had damn well better be true! */
437	if (map_len != 0) {
438		rv = __elfN(map_insert)(&vmspace->vm_map, NULL, 0, map_addr,
439		    map_addr + map_len, VM_PROT_ALL, 0);
440		if (rv != KERN_SUCCESS) {
441			return (EINVAL);
442		}
443	}
444
445	if (copy_len != 0) {
446		vm_offset_t off;
447
448		sf = vm_imgact_map_page(object, offset + filsz);
449		if (sf == NULL)
450			return (EIO);
451
452		/* send the page fragment to user space */
453		off = trunc_page_ps(offset + filsz, pagesize) -
454		    trunc_page(offset + filsz);
455		error = copyout((caddr_t)sf_buf_kva(sf) + off,
456		    (caddr_t)map_addr, copy_len);
457		vm_imgact_unmap_page(sf);
458		if (error) {
459			return (error);
460		}
461	}
462
463	/*
464	 * set it to the specified protection.
465	 * XXX had better undo the damage from pasting over the cracks here!
466	 */
467	vm_map_protect(&vmspace->vm_map, trunc_page(map_addr),
468	    round_page(map_addr + map_len),  prot, FALSE);
469
470	return (0);
471}
472
473/*
474 * Load the file "file" into memory.  It may be either a shared object
475 * or an executable.
476 *
477 * The "addr" reference parameter is in/out.  On entry, it specifies
478 * the address where a shared object should be loaded.  If the file is
479 * an executable, this value is ignored.  On exit, "addr" specifies
480 * where the file was actually loaded.
481 *
482 * The "entry" reference parameter is out only.  On exit, it specifies
483 * the entry point for the loaded file.
484 */
485static int
486__elfN(load_file)(struct proc *p, const char *file, u_long *addr,
487	u_long *entry, size_t pagesize)
488{
489	struct {
490		struct nameidata nd;
491		struct vattr attr;
492		struct image_params image_params;
493	} *tempdata;
494	const Elf_Ehdr *hdr = NULL;
495	const Elf_Phdr *phdr = NULL;
496	struct nameidata *nd;
497	struct vmspace *vmspace = p->p_vmspace;
498	struct vattr *attr;
499	struct image_params *imgp;
500	vm_prot_t prot;
501	u_long rbase;
502	u_long base_addr = 0;
503	int vfslocked, error, i, numsegs;
504
505	tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK);
506	nd = &tempdata->nd;
507	attr = &tempdata->attr;
508	imgp = &tempdata->image_params;
509
510	/*
511	 * Initialize part of the common data
512	 */
513	imgp->proc = p;
514	imgp->attr = attr;
515	imgp->firstpage = NULL;
516	imgp->image_header = NULL;
517	imgp->object = NULL;
518	imgp->execlabel = NULL;
519
520	NDINIT(nd, LOOKUP, MPSAFE|LOCKLEAF|FOLLOW, UIO_SYSSPACE, file,
521	    curthread);
522	vfslocked = 0;
523	if ((error = namei(nd)) != 0) {
524		nd->ni_vp = NULL;
525		goto fail;
526	}
527	vfslocked = NDHASGIANT(nd);
528	NDFREE(nd, NDF_ONLY_PNBUF);
529	imgp->vp = nd->ni_vp;
530
531	/*
532	 * Check permissions, modes, uid, etc on the file, and "open" it.
533	 */
534	error = exec_check_permissions(imgp);
535	if (error)
536		goto fail;
537
538	error = exec_map_first_page(imgp);
539	if (error)
540		goto fail;
541
542	/*
543	 * Also make certain that the interpreter stays the same, so set
544	 * its VV_TEXT flag, too.
545	 */
546	nd->ni_vp->v_vflag |= VV_TEXT;
547
548	imgp->object = nd->ni_vp->v_object;
549
550	hdr = (const Elf_Ehdr *)imgp->image_header;
551	if ((error = __elfN(check_header)(hdr)) != 0)
552		goto fail;
553	if (hdr->e_type == ET_DYN)
554		rbase = *addr;
555	else if (hdr->e_type == ET_EXEC)
556		rbase = 0;
557	else {
558		error = ENOEXEC;
559		goto fail;
560	}
561
562	/* Only support headers that fit within first page for now      */
563	/*    (multiplication of two Elf_Half fields will not overflow) */
564	if ((hdr->e_phoff > PAGE_SIZE) ||
565	    (hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE - hdr->e_phoff) {
566		error = ENOEXEC;
567		goto fail;
568	}
569
570	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
571	if (!aligned(phdr, Elf_Addr)) {
572		error = ENOEXEC;
573		goto fail;
574	}
575
576	for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) {
577		if (phdr[i].p_type == PT_LOAD) {	/* Loadable segment */
578			prot = 0;
579			if (phdr[i].p_flags & PF_X)
580  				prot |= VM_PROT_EXECUTE;
581			if (phdr[i].p_flags & PF_W)
582  				prot |= VM_PROT_WRITE;
583			if (phdr[i].p_flags & PF_R)
584  				prot |= VM_PROT_READ;
585
586			if ((error = __elfN(load_section)(vmspace,
587			    imgp->object, phdr[i].p_offset,
588			    (caddr_t)(uintptr_t)phdr[i].p_vaddr + rbase,
589			    phdr[i].p_memsz, phdr[i].p_filesz, prot,
590			    pagesize)) != 0)
591				goto fail;
592			/*
593			 * Establish the base address if this is the
594			 * first segment.
595			 */
596			if (numsegs == 0)
597  				base_addr = trunc_page(phdr[i].p_vaddr +
598				    rbase);
599			numsegs++;
600		}
601	}
602	*addr = base_addr;
603	*entry = (unsigned long)hdr->e_entry + rbase;
604
605fail:
606	if (imgp->firstpage)
607		exec_unmap_first_page(imgp);
608
609	if (nd->ni_vp)
610		vput(nd->ni_vp);
611
612	VFS_UNLOCK_GIANT(vfslocked);
613	free(tempdata, M_TEMP);
614
615	return (error);
616}
617
618static int
619__CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
620{
621	const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
622	const Elf_Phdr *phdr;
623	Elf_Auxargs *elf_auxargs;
624	struct vmspace *vmspace;
625	vm_prot_t prot;
626	u_long text_size = 0, data_size = 0, total_size = 0;
627	u_long text_addr = 0, data_addr = 0;
628	u_long seg_size, seg_addr;
629	u_long addr, entry = 0, proghdr = 0;
630	int32_t osrel = 0;
631	int error = 0, i;
632	const char *interp = NULL, *newinterp = NULL;
633	Elf_Brandinfo *brand_info;
634	char *path;
635	struct sysentvec *sv;
636
637	/*
638	 * Do we have a valid ELF header ?
639	 *
640	 * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later
641	 * if particular brand doesn't support it.
642	 */
643	if (__elfN(check_header)(hdr) != 0 ||
644	    (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN))
645		return (-1);
646
647	/*
648	 * From here on down, we return an errno, not -1, as we've
649	 * detected an ELF file.
650	 */
651
652	if ((hdr->e_phoff > PAGE_SIZE) ||
653	    (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) {
654		/* Only support headers in first page for now */
655		return (ENOEXEC);
656	}
657	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
658	if (!aligned(phdr, Elf_Addr))
659		return (ENOEXEC);
660	for (i = 0; i < hdr->e_phnum; i++) {
661		if (phdr[i].p_type == PT_INTERP) {
662			/* Path to interpreter */
663			if (phdr[i].p_filesz > MAXPATHLEN ||
664			    phdr[i].p_offset + phdr[i].p_filesz > PAGE_SIZE)
665				return (ENOEXEC);
666			interp = imgp->image_header + phdr[i].p_offset;
667			break;
668		}
669	}
670
671	brand_info = __elfN(get_brandinfo)(imgp, interp, &osrel);
672	if (brand_info == NULL) {
673		uprintf("ELF binary type \"%u\" not known.\n",
674		    hdr->e_ident[EI_OSABI]);
675		return (ENOEXEC);
676	}
677	if (hdr->e_type == ET_DYN &&
678	    (brand_info->flags & BI_CAN_EXEC_DYN) == 0)
679		return (ENOEXEC);
680	sv = brand_info->sysvec;
681	if (interp != NULL && brand_info->interp_newpath != NULL)
682		newinterp = brand_info->interp_newpath;
683
684	/*
685	 * Avoid a possible deadlock if the current address space is destroyed
686	 * and that address space maps the locked vnode.  In the common case,
687	 * the locked vnode's v_usecount is decremented but remains greater
688	 * than zero.  Consequently, the vnode lock is not needed by vrele().
689	 * However, in cases where the vnode lock is external, such as nullfs,
690	 * v_usecount may become zero.
691	 */
692	VOP_UNLOCK(imgp->vp, 0);
693
694	error = exec_new_vmspace(imgp, sv);
695	imgp->proc->p_sysent = sv;
696
697	vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
698	if (error)
699		return (error);
700
701	vmspace = imgp->proc->p_vmspace;
702
703	for (i = 0; i < hdr->e_phnum; i++) {
704		switch (phdr[i].p_type) {
705		case PT_LOAD:	/* Loadable segment */
706			prot = 0;
707			if (phdr[i].p_flags & PF_X)
708  				prot |= VM_PROT_EXECUTE;
709			if (phdr[i].p_flags & PF_W)
710  				prot |= VM_PROT_WRITE;
711			if (phdr[i].p_flags & PF_R)
712  				prot |= VM_PROT_READ;
713
714#if defined(__ia64__) && __ELF_WORD_SIZE == 32 && defined(IA32_ME_HARDER)
715			/*
716			 * Some x86 binaries assume read == executable,
717			 * notably the M3 runtime and therefore cvsup
718			 */
719			if (prot & VM_PROT_READ)
720				prot |= VM_PROT_EXECUTE;
721#endif
722
723			if ((error = __elfN(load_section)(vmspace,
724			    imgp->object, phdr[i].p_offset,
725			    (caddr_t)(uintptr_t)phdr[i].p_vaddr,
726			    phdr[i].p_memsz, phdr[i].p_filesz, prot,
727			    sv->sv_pagesize)) != 0)
728				return (error);
729
730			/*
731			 * If this segment contains the program headers,
732			 * remember their virtual address for the AT_PHDR
733			 * aux entry. Static binaries don't usually include
734			 * a PT_PHDR entry.
735			 */
736			if (phdr[i].p_offset == 0 &&
737			    hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize
738				<= phdr[i].p_filesz)
739				proghdr = phdr[i].p_vaddr + hdr->e_phoff;
740
741			seg_addr = trunc_page(phdr[i].p_vaddr);
742			seg_size = round_page(phdr[i].p_memsz +
743			    phdr[i].p_vaddr - seg_addr);
744
745			/*
746			 * Is this .text or .data?  We can't use
747			 * VM_PROT_WRITE or VM_PROT_EXEC, it breaks the
748			 * alpha terribly and possibly does other bad
749			 * things so we stick to the old way of figuring
750			 * it out:  If the segment contains the program
751			 * entry point, it's a text segment, otherwise it
752			 * is a data segment.
753			 *
754			 * Note that obreak() assumes that data_addr +
755			 * data_size == end of data load area, and the ELF
756			 * file format expects segments to be sorted by
757			 * address.  If multiple data segments exist, the
758			 * last one will be used.
759			 */
760			if (hdr->e_entry >= phdr[i].p_vaddr &&
761			    hdr->e_entry < (phdr[i].p_vaddr +
762			    phdr[i].p_memsz)) {
763				text_size = seg_size;
764				text_addr = seg_addr;
765				entry = (u_long)hdr->e_entry;
766			} else {
767				data_size = seg_size;
768				data_addr = seg_addr;
769			}
770			total_size += seg_size;
771			break;
772		case PT_PHDR: 	/* Program header table info */
773			proghdr = phdr[i].p_vaddr;
774			break;
775		default:
776			break;
777		}
778	}
779
780	if (data_addr == 0 && data_size == 0) {
781		data_addr = text_addr;
782		data_size = text_size;
783	}
784
785	/*
786	 * Check limits.  It should be safe to check the
787	 * limits after loading the segments since we do
788	 * not actually fault in all the segments pages.
789	 */
790	PROC_LOCK(imgp->proc);
791	if (data_size > lim_cur(imgp->proc, RLIMIT_DATA) ||
792	    text_size > maxtsiz ||
793	    total_size > lim_cur(imgp->proc, RLIMIT_VMEM)) {
794		PROC_UNLOCK(imgp->proc);
795		return (ENOMEM);
796	}
797
798	vmspace->vm_tsize = text_size >> PAGE_SHIFT;
799	vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
800	vmspace->vm_dsize = data_size >> PAGE_SHIFT;
801	vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr;
802
803	/*
804	 * We load the dynamic linker where a userland call
805	 * to mmap(0, ...) would put it.  The rationale behind this
806	 * calculation is that it leaves room for the heap to grow to
807	 * its maximum allowed size.
808	 */
809	addr = round_page((vm_offset_t)imgp->proc->p_vmspace->vm_daddr +
810	    lim_max(imgp->proc, RLIMIT_DATA));
811	PROC_UNLOCK(imgp->proc);
812
813	imgp->entry_addr = entry;
814
815	if (interp != NULL) {
816		int have_interp = FALSE;
817		VOP_UNLOCK(imgp->vp, 0);
818		if (brand_info->emul_path != NULL &&
819		    brand_info->emul_path[0] != '\0') {
820			path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
821			snprintf(path, MAXPATHLEN, "%s%s",
822			    brand_info->emul_path, interp);
823			error = __elfN(load_file)(imgp->proc, path, &addr,
824			    &imgp->entry_addr, sv->sv_pagesize);
825			free(path, M_TEMP);
826			if (error == 0)
827				have_interp = TRUE;
828		}
829		if (!have_interp && newinterp != NULL) {
830			error = __elfN(load_file)(imgp->proc, newinterp, &addr,
831			    &imgp->entry_addr, sv->sv_pagesize);
832			if (error == 0)
833				have_interp = TRUE;
834		}
835		if (!have_interp) {
836			error = __elfN(load_file)(imgp->proc, interp, &addr,
837			    &imgp->entry_addr, sv->sv_pagesize);
838		}
839		vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
840		if (error != 0) {
841			uprintf("ELF interpreter %s not found\n", interp);
842			return (error);
843		}
844	} else
845		addr = 0;
846
847	/*
848	 * Construct auxargs table (used by the fixup routine)
849	 */
850	elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK);
851	elf_auxargs->execfd = -1;
852	elf_auxargs->phdr = proghdr;
853	elf_auxargs->phent = hdr->e_phentsize;
854	elf_auxargs->phnum = hdr->e_phnum;
855	elf_auxargs->pagesz = PAGE_SIZE;
856	elf_auxargs->base = addr;
857	elf_auxargs->flags = 0;
858	elf_auxargs->entry = entry;
859
860	imgp->auxargs = elf_auxargs;
861	imgp->interpreted = 0;
862	imgp->proc->p_osrel = osrel;
863
864	return (error);
865}
866
867#define	suword __CONCAT(suword, __ELF_WORD_SIZE)
868
869int
870__elfN(freebsd_fixup)(register_t **stack_base, struct image_params *imgp)
871{
872	Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs;
873	Elf_Addr *base;
874	Elf_Addr *pos;
875
876	base = (Elf_Addr *)*stack_base;
877	pos = base + (imgp->args->argc + imgp->args->envc + 2);
878
879	if (args->execfd != -1)
880		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
881	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
882	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
883	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
884	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
885	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
886	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
887	AUXARGS_ENTRY(pos, AT_BASE, args->base);
888	AUXARGS_ENTRY(pos, AT_NULL, 0);
889
890	free(imgp->auxargs, M_TEMP);
891	imgp->auxargs = NULL;
892
893	base--;
894	suword(base, (long)imgp->args->argc);
895	*stack_base = (register_t *)base;
896	return (0);
897}
898
899/*
900 * Code for generating ELF core dumps.
901 */
902
903typedef void (*segment_callback)(vm_map_entry_t, void *);
904
905/* Closure for cb_put_phdr(). */
906struct phdr_closure {
907	Elf_Phdr *phdr;		/* Program header to fill in */
908	Elf_Off offset;		/* Offset of segment in core file */
909};
910
911/* Closure for cb_size_segment(). */
912struct sseg_closure {
913	int count;		/* Count of writable segments. */
914	size_t size;		/* Total size of all writable segments. */
915};
916
917static void cb_put_phdr(vm_map_entry_t, void *);
918static void cb_size_segment(vm_map_entry_t, void *);
919static void each_writable_segment(struct thread *, segment_callback, void *);
920static int __elfN(corehdr)(struct thread *, struct vnode *, struct ucred *,
921    int, void *, size_t);
922static void __elfN(puthdr)(struct thread *, void *, size_t *, int);
923static void __elfN(putnote)(void *, size_t *, const char *, int,
924    const void *, size_t);
925
926int
927__elfN(coredump)(td, vp, limit)
928	struct thread *td;
929	struct vnode *vp;
930	off_t limit;
931{
932	struct ucred *cred = td->td_ucred;
933	int error = 0;
934	struct sseg_closure seginfo;
935	void *hdr;
936	size_t hdrsize;
937
938	/* Size the program segments. */
939	seginfo.count = 0;
940	seginfo.size = 0;
941	each_writable_segment(td, cb_size_segment, &seginfo);
942
943	/*
944	 * Calculate the size of the core file header area by making
945	 * a dry run of generating it.  Nothing is written, but the
946	 * size is calculated.
947	 */
948	hdrsize = 0;
949	__elfN(puthdr)(td, (void *)NULL, &hdrsize, seginfo.count);
950
951	if (hdrsize + seginfo.size >= limit)
952		return (EFAULT);
953
954	/*
955	 * Allocate memory for building the header, fill it up,
956	 * and write it out.
957	 */
958	hdr = malloc(hdrsize, M_TEMP, M_WAITOK);
959	if (hdr == NULL) {
960		return (EINVAL);
961	}
962	error = __elfN(corehdr)(td, vp, cred, seginfo.count, hdr, hdrsize);
963
964	/* Write the contents of all of the writable segments. */
965	if (error == 0) {
966		Elf_Phdr *php;
967		off_t offset;
968		int i;
969
970		php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
971		offset = hdrsize;
972		for (i = 0; i < seginfo.count; i++) {
973			error = vn_rdwr_inchunks(UIO_WRITE, vp,
974			    (caddr_t)(uintptr_t)php->p_vaddr,
975			    php->p_filesz, offset, UIO_USERSPACE,
976			    IO_UNIT | IO_DIRECT, cred, NOCRED, NULL,
977			    curthread);
978			if (error != 0)
979				break;
980			offset += php->p_filesz;
981			php++;
982		}
983	}
984	free(hdr, M_TEMP);
985
986	return (error);
987}
988
989/*
990 * A callback for each_writable_segment() to write out the segment's
991 * program header entry.
992 */
993static void
994cb_put_phdr(entry, closure)
995	vm_map_entry_t entry;
996	void *closure;
997{
998	struct phdr_closure *phc = (struct phdr_closure *)closure;
999	Elf_Phdr *phdr = phc->phdr;
1000
1001	phc->offset = round_page(phc->offset);
1002
1003	phdr->p_type = PT_LOAD;
1004	phdr->p_offset = phc->offset;
1005	phdr->p_vaddr = entry->start;
1006	phdr->p_paddr = 0;
1007	phdr->p_filesz = phdr->p_memsz = entry->end - entry->start;
1008	phdr->p_align = PAGE_SIZE;
1009	phdr->p_flags = 0;
1010	if (entry->protection & VM_PROT_READ)
1011		phdr->p_flags |= PF_R;
1012	if (entry->protection & VM_PROT_WRITE)
1013		phdr->p_flags |= PF_W;
1014	if (entry->protection & VM_PROT_EXECUTE)
1015		phdr->p_flags |= PF_X;
1016
1017	phc->offset += phdr->p_filesz;
1018	phc->phdr++;
1019}
1020
1021/*
1022 * A callback for each_writable_segment() to gather information about
1023 * the number of segments and their total size.
1024 */
1025static void
1026cb_size_segment(entry, closure)
1027	vm_map_entry_t entry;
1028	void *closure;
1029{
1030	struct sseg_closure *ssc = (struct sseg_closure *)closure;
1031
1032	ssc->count++;
1033	ssc->size += entry->end - entry->start;
1034}
1035
1036/*
1037 * For each writable segment in the process's memory map, call the given
1038 * function with a pointer to the map entry and some arbitrary
1039 * caller-supplied data.
1040 */
1041static void
1042each_writable_segment(td, func, closure)
1043	struct thread *td;
1044	segment_callback func;
1045	void *closure;
1046{
1047	struct proc *p = td->td_proc;
1048	vm_map_t map = &p->p_vmspace->vm_map;
1049	vm_map_entry_t entry;
1050	vm_object_t backing_object, object;
1051	boolean_t ignore_entry;
1052
1053	vm_map_lock_read(map);
1054	for (entry = map->header.next; entry != &map->header;
1055	    entry = entry->next) {
1056		/*
1057		 * Don't dump inaccessible mappings, deal with legacy
1058		 * coredump mode.
1059		 *
1060		 * Note that read-only segments related to the elf binary
1061		 * are marked MAP_ENTRY_NOCOREDUMP now so we no longer
1062		 * need to arbitrarily ignore such segments.
1063		 */
1064		if (elf_legacy_coredump) {
1065			if ((entry->protection & VM_PROT_RW) != VM_PROT_RW)
1066				continue;
1067		} else {
1068			if ((entry->protection & VM_PROT_ALL) == 0)
1069				continue;
1070		}
1071
1072		/*
1073		 * Dont include memory segment in the coredump if
1074		 * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in
1075		 * madvise(2).  Do not dump submaps (i.e. parts of the
1076		 * kernel map).
1077		 */
1078		if (entry->eflags & (MAP_ENTRY_NOCOREDUMP|MAP_ENTRY_IS_SUB_MAP))
1079			continue;
1080
1081		if ((object = entry->object.vm_object) == NULL)
1082			continue;
1083
1084		/* Ignore memory-mapped devices and such things. */
1085		VM_OBJECT_LOCK(object);
1086		while ((backing_object = object->backing_object) != NULL) {
1087			VM_OBJECT_LOCK(backing_object);
1088			VM_OBJECT_UNLOCK(object);
1089			object = backing_object;
1090		}
1091		ignore_entry = object->type != OBJT_DEFAULT &&
1092		    object->type != OBJT_SWAP && object->type != OBJT_VNODE;
1093		VM_OBJECT_UNLOCK(object);
1094		if (ignore_entry)
1095			continue;
1096
1097		(*func)(entry, closure);
1098	}
1099	vm_map_unlock_read(map);
1100}
1101
1102/*
1103 * Write the core file header to the file, including padding up to
1104 * the page boundary.
1105 */
1106static int
1107__elfN(corehdr)(td, vp, cred, numsegs, hdr, hdrsize)
1108	struct thread *td;
1109	struct vnode *vp;
1110	struct ucred *cred;
1111	int numsegs;
1112	size_t hdrsize;
1113	void *hdr;
1114{
1115	size_t off;
1116
1117	/* Fill in the header. */
1118	bzero(hdr, hdrsize);
1119	off = 0;
1120	__elfN(puthdr)(td, hdr, &off, numsegs);
1121
1122	/* Write it to the core file. */
1123	return (vn_rdwr_inchunks(UIO_WRITE, vp, hdr, hdrsize, (off_t)0,
1124	    UIO_SYSSPACE, IO_UNIT | IO_DIRECT, cred, NOCRED, NULL,
1125	    td));
1126}
1127
1128#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
1129typedef struct prstatus32 elf_prstatus_t;
1130typedef struct prpsinfo32 elf_prpsinfo_t;
1131typedef struct fpreg32 elf_prfpregset_t;
1132typedef struct fpreg32 elf_fpregset_t;
1133typedef struct reg32 elf_gregset_t;
1134#else
1135typedef prstatus_t elf_prstatus_t;
1136typedef prpsinfo_t elf_prpsinfo_t;
1137typedef prfpregset_t elf_prfpregset_t;
1138typedef prfpregset_t elf_fpregset_t;
1139typedef gregset_t elf_gregset_t;
1140#endif
1141
1142static void
1143__elfN(puthdr)(struct thread *td, void *dst, size_t *off, int numsegs)
1144{
1145	struct {
1146		elf_prstatus_t status;
1147		elf_prfpregset_t fpregset;
1148		elf_prpsinfo_t psinfo;
1149	} *tempdata;
1150	elf_prstatus_t *status;
1151	elf_prfpregset_t *fpregset;
1152	elf_prpsinfo_t *psinfo;
1153	struct proc *p;
1154	struct thread *thr;
1155	size_t ehoff, noteoff, notesz, phoff;
1156
1157	p = td->td_proc;
1158
1159	ehoff = *off;
1160	*off += sizeof(Elf_Ehdr);
1161
1162	phoff = *off;
1163	*off += (numsegs + 1) * sizeof(Elf_Phdr);
1164
1165	noteoff = *off;
1166	/*
1167	 * Don't allocate space for the notes if we're just calculating
1168	 * the size of the header. We also don't collect the data.
1169	 */
1170	if (dst != NULL) {
1171		tempdata = malloc(sizeof(*tempdata), M_TEMP, M_ZERO|M_WAITOK);
1172		status = &tempdata->status;
1173		fpregset = &tempdata->fpregset;
1174		psinfo = &tempdata->psinfo;
1175	} else {
1176		tempdata = NULL;
1177		status = NULL;
1178		fpregset = NULL;
1179		psinfo = NULL;
1180	}
1181
1182	if (dst != NULL) {
1183		psinfo->pr_version = PRPSINFO_VERSION;
1184		psinfo->pr_psinfosz = sizeof(elf_prpsinfo_t);
1185		strlcpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname));
1186		/*
1187		 * XXX - We don't fill in the command line arguments properly
1188		 * yet.
1189		 */
1190		strlcpy(psinfo->pr_psargs, p->p_comm,
1191		    sizeof(psinfo->pr_psargs));
1192	}
1193	__elfN(putnote)(dst, off, "FreeBSD", NT_PRPSINFO, psinfo,
1194	    sizeof *psinfo);
1195
1196	/*
1197	 * To have the debugger select the right thread (LWP) as the initial
1198	 * thread, we dump the state of the thread passed to us in td first.
1199	 * This is the thread that causes the core dump and thus likely to
1200	 * be the right thread one wants to have selected in the debugger.
1201	 */
1202	thr = td;
1203	while (thr != NULL) {
1204		if (dst != NULL) {
1205			status->pr_version = PRSTATUS_VERSION;
1206			status->pr_statussz = sizeof(elf_prstatus_t);
1207			status->pr_gregsetsz = sizeof(elf_gregset_t);
1208			status->pr_fpregsetsz = sizeof(elf_fpregset_t);
1209			status->pr_osreldate = osreldate;
1210			status->pr_cursig = p->p_sig;
1211			status->pr_pid = thr->td_tid;
1212#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
1213			fill_regs32(thr, &status->pr_reg);
1214			fill_fpregs32(thr, fpregset);
1215#else
1216			fill_regs(thr, &status->pr_reg);
1217			fill_fpregs(thr, fpregset);
1218#endif
1219		}
1220		__elfN(putnote)(dst, off, "FreeBSD", NT_PRSTATUS, status,
1221		    sizeof *status);
1222		__elfN(putnote)(dst, off, "FreeBSD", NT_FPREGSET, fpregset,
1223		    sizeof *fpregset);
1224		/*
1225		 * Allow for MD specific notes, as well as any MD
1226		 * specific preparations for writing MI notes.
1227		 */
1228		__elfN(dump_thread)(thr, dst, off);
1229
1230		thr = (thr == td) ? TAILQ_FIRST(&p->p_threads) :
1231		    TAILQ_NEXT(thr, td_plist);
1232		if (thr == td)
1233			thr = TAILQ_NEXT(thr, td_plist);
1234	}
1235
1236	notesz = *off - noteoff;
1237
1238	if (dst != NULL)
1239		free(tempdata, M_TEMP);
1240
1241	/* Align up to a page boundary for the program segments. */
1242	*off = round_page(*off);
1243
1244	if (dst != NULL) {
1245		Elf_Ehdr *ehdr;
1246		Elf_Phdr *phdr;
1247		struct phdr_closure phc;
1248
1249		/*
1250		 * Fill in the ELF header.
1251		 */
1252		ehdr = (Elf_Ehdr *)((char *)dst + ehoff);
1253		ehdr->e_ident[EI_MAG0] = ELFMAG0;
1254		ehdr->e_ident[EI_MAG1] = ELFMAG1;
1255		ehdr->e_ident[EI_MAG2] = ELFMAG2;
1256		ehdr->e_ident[EI_MAG3] = ELFMAG3;
1257		ehdr->e_ident[EI_CLASS] = ELF_CLASS;
1258		ehdr->e_ident[EI_DATA] = ELF_DATA;
1259		ehdr->e_ident[EI_VERSION] = EV_CURRENT;
1260		ehdr->e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
1261		ehdr->e_ident[EI_ABIVERSION] = 0;
1262		ehdr->e_ident[EI_PAD] = 0;
1263		ehdr->e_type = ET_CORE;
1264#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
1265		ehdr->e_machine = EM_386;
1266#else
1267		ehdr->e_machine = ELF_ARCH;
1268#endif
1269		ehdr->e_version = EV_CURRENT;
1270		ehdr->e_entry = 0;
1271		ehdr->e_phoff = phoff;
1272		ehdr->e_flags = 0;
1273		ehdr->e_ehsize = sizeof(Elf_Ehdr);
1274		ehdr->e_phentsize = sizeof(Elf_Phdr);
1275		ehdr->e_phnum = numsegs + 1;
1276		ehdr->e_shentsize = sizeof(Elf_Shdr);
1277		ehdr->e_shnum = 0;
1278		ehdr->e_shstrndx = SHN_UNDEF;
1279
1280		/*
1281		 * Fill in the program header entries.
1282		 */
1283		phdr = (Elf_Phdr *)((char *)dst + phoff);
1284
1285		/* The note segement. */
1286		phdr->p_type = PT_NOTE;
1287		phdr->p_offset = noteoff;
1288		phdr->p_vaddr = 0;
1289		phdr->p_paddr = 0;
1290		phdr->p_filesz = notesz;
1291		phdr->p_memsz = 0;
1292		phdr->p_flags = 0;
1293		phdr->p_align = 0;
1294		phdr++;
1295
1296		/* All the writable segments from the program. */
1297		phc.phdr = phdr;
1298		phc.offset = *off;
1299		each_writable_segment(td, cb_put_phdr, &phc);
1300	}
1301}
1302
1303static void
1304__elfN(putnote)(void *dst, size_t *off, const char *name, int type,
1305    const void *desc, size_t descsz)
1306{
1307	Elf_Note note;
1308
1309	note.n_namesz = strlen(name) + 1;
1310	note.n_descsz = descsz;
1311	note.n_type = type;
1312	if (dst != NULL)
1313		bcopy(&note, (char *)dst + *off, sizeof note);
1314	*off += sizeof note;
1315	if (dst != NULL)
1316		bcopy(name, (char *)dst + *off, note.n_namesz);
1317	*off += roundup2(note.n_namesz, sizeof(Elf_Size));
1318	if (dst != NULL)
1319		bcopy(desc, (char *)dst + *off, note.n_descsz);
1320	*off += roundup2(note.n_descsz, sizeof(Elf_Size));
1321}
1322
1323/*
1324 * Try to find the appropriate ABI-note section for checknote,
1325 * fetch the osreldate for binary from the ELF OSABI-note. Only the
1326 * first page of the image is searched, the same as for headers.
1327 */
1328static boolean_t
1329__elfN(check_note)(struct image_params *imgp, Elf_Brandnote *checknote,
1330    int32_t *osrel)
1331{
1332	const Elf_Note *note, *note_end;
1333	const Elf32_Phdr *phdr, *pnote;
1334	const Elf32_Ehdr *hdr;
1335	const char *note_name;
1336	int i;
1337
1338	pnote = NULL;
1339	hdr = (const Elf32_Ehdr *)imgp->image_header;
1340	phdr = (const Elf32_Phdr *)(imgp->image_header + hdr->e_phoff);
1341
1342	for (i = 0; i < hdr->e_phnum; i++) {
1343		if (phdr[i].p_type == PT_NOTE) {
1344			pnote = &phdr[i];
1345			break;
1346		}
1347	}
1348
1349	if (pnote == NULL || pnote->p_offset >= PAGE_SIZE ||
1350	    pnote->p_offset + pnote->p_filesz >= PAGE_SIZE)
1351		return (FALSE);
1352
1353	note = (const Elf_Note *)(imgp->image_header + pnote->p_offset);
1354	if (!aligned(note, Elf32_Addr))
1355		return (FALSE);
1356	note_end = (const Elf_Note *)(imgp->image_header +
1357	    pnote->p_offset + pnote->p_filesz);
1358	while (note < note_end) {
1359		if (note->n_namesz != checknote->hdr.n_namesz ||
1360		    note->n_descsz != checknote->hdr.n_descsz ||
1361		    note->n_type != checknote->hdr.n_type)
1362			goto nextnote;
1363		note_name = (const char *)(note + 1);
1364		if (strncmp(checknote->vendor, note_name,
1365		    checknote->hdr.n_namesz) != 0)
1366			goto nextnote;
1367
1368		/*
1369		 * Fetch the osreldate for binary
1370		 * from the ELF OSABI-note if necessary.
1371		 */
1372		if ((checknote->flags & BN_CAN_FETCH_OSREL) != 0 &&
1373		    osrel != NULL)
1374			*osrel = *(const int32_t *) (note_name +
1375			    roundup2(checknote->hdr.n_namesz,
1376			    sizeof(Elf32_Addr)));
1377		return (TRUE);
1378
1379nextnote:
1380		note = (const Elf_Note *)((const char *)(note + 1) +
1381		    roundup2(note->n_namesz, sizeof(Elf32_Addr)) +
1382		    roundup2(note->n_descsz, sizeof(Elf32_Addr)));
1383	}
1384
1385	return (FALSE);
1386}
1387
1388/*
1389 * Tell kern_execve.c about it, with a little help from the linker.
1390 */
1391static struct execsw __elfN(execsw) = {
1392	__CONCAT(exec_, __elfN(imgact)),
1393	__XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
1394};
1395EXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw));
1396