imgact_elf.c revision 166073
12116Sjkh/*-
22116Sjkh * Copyright (c) 2000 David O'Brien
3152535Sbde * Copyright (c) 1995-1996 S�ren Schmidt
42116Sjkh * Copyright (c) 1996 Peter Wemm
52116Sjkh * All rights reserved.
62116Sjkh *
72116Sjkh * Redistribution and use in source and binary forms, with or without
82116Sjkh * modification, are permitted provided that the following conditions
92116Sjkh * are met:
102116Sjkh * 1. Redistributions of source code must retain the above copyright
112116Sjkh *    notice, this list of conditions and the following disclaimer
128870Srgrimes *    in this position and unchanged.
132116Sjkh * 2. Redistributions in binary form must reproduce the above copyright
142116Sjkh *    notice, this list of conditions and the following disclaimer in the
152116Sjkh *    documentation and/or other materials provided with the distribution.
162116Sjkh * 3. The name of the author may not be used to endorse or promote products
17176410Sbde *    derived from this software without specific prior written permission
18176410Sbde *
192116Sjkh * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
202116Sjkh * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
218870Srgrimes * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22176552Sbde * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23176552Sbde * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24152535Sbde * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
252116Sjkh * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
262116Sjkh * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27176465Sbde * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28176465Sbde * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
292116Sjkh */
302116Sjkh
312116Sjkh#include <sys/cdefs.h>
322116Sjkh__FBSDID("$FreeBSD: head/sys/kern/imgact_elf.c 166073 2007-01-17 14:58:53Z delphij $");
33151864Sbde
34239192Sdim#include "opt_compat.h"
352116Sjkh
362116Sjkh#include <sys/param.h>
372116Sjkh#include <sys/exec.h>
38151855Sbde#include <sys/fcntl.h>
39151864Sbde#include <sys/imgact.h>
40176640Sbde#include <sys/imgact_elf.h>
41176640Sbde#include <sys/kernel.h>
42151855Sbde#include <sys/lock.h>
43239192Sdim#include <sys/malloc.h>
44239195Sdim#include <sys/mount.h>
45176569Sbde#include <sys/mutex.h>
46239192Sdim#include <sys/mman.h>
47176552Sbde#include <sys/namei.h>
482116Sjkh#include <sys/pioctl.h>
49176476Sbde#include <sys/proc.h>
50176558Sbde#include <sys/procfs.h>
51152707Sbde#include <sys/resourcevar.h>
52152707Sbde#include <sys/sf_buf.h>
532116Sjkh#include <sys/systm.h>
542116Sjkh#include <sys/signalvar.h>
552116Sjkh#include <sys/stat.h>
56152535Sbde#include <sys/sx.h>
57176640Sbde#include <sys/syscall.h>
58176465Sbde#include <sys/sysctl.h>
59176476Sbde#include <sys/sysent.h>
60176465Sbde#include <sys/vnode.h>
61176467Sbde
62176465Sbde#include <vm/vm.h>
63176465Sbde#include <vm/vm_kern.h>
64176467Sbde#include <vm/vm_param.h>
65176465Sbde#include <vm/pmap.h>
66176476Sbde#include <vm/vm_map.h>
67151864Sbde#include <vm/vm_object.h>
68176552Sbde#include <vm/vm_extern.h>
69176476Sbde
702116Sjkh#include <machine/elf.h>
718870Srgrimes#include <machine/md_var.h>
722116Sjkh
732116Sjkh#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
742116Sjkh#include <machine/fpu.h>
75176552Sbde#include <compat/ia32/ia32_reg.h>
762116Sjkh#endif
77152707Sbde
78152707Sbde#define OLD_EI_BRAND	8
79152707Sbde
80152707Sbdestatic int __elfN(check_header)(const Elf_Ehdr *hdr);
81176558Sbdestatic Elf_Brandinfo *__elfN(get_brandinfo)(const Elf_Ehdr *hdr,
82176558Sbde    const char *interp);
83176558Sbdestatic int __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
842116Sjkh    u_long *entry, size_t pagesize);
85static int __elfN(load_section)(struct vmspace *vmspace, vm_object_t object,
86    vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz,
87    vm_prot_t prot, size_t pagesize);
88static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp);
89
90SYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE), CTLFLAG_RW, 0,
91    "");
92
93int __elfN(fallback_brand) = -1;
94SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
95    fallback_brand, CTLFLAG_RW, &__elfN(fallback_brand), 0,
96    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort");
97TUNABLE_INT("kern.elf" __XSTRING(__ELF_WORD_SIZE) ".fallback_brand",
98    &__elfN(fallback_brand));
99
100static int elf_trace = 0;
101SYSCTL_INT(_debug, OID_AUTO, __elfN(trace), CTLFLAG_RW, &elf_trace, 0, "");
102
103static int elf_legacy_coredump = 0;
104SYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW,
105    &elf_legacy_coredump, 0, "");
106
107static Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
108
109int
110__elfN(insert_brand_entry)(Elf_Brandinfo *entry)
111{
112	int i;
113
114	for (i = 0; i < MAX_BRANDS; i++) {
115		if (elf_brand_list[i] == NULL) {
116			elf_brand_list[i] = entry;
117			break;
118		}
119	}
120	if (i == MAX_BRANDS)
121		return (-1);
122	return (0);
123}
124
125int
126__elfN(remove_brand_entry)(Elf_Brandinfo *entry)
127{
128	int i;
129
130	for (i = 0; i < MAX_BRANDS; i++) {
131		if (elf_brand_list[i] == entry) {
132			elf_brand_list[i] = NULL;
133			break;
134		}
135	}
136	if (i == MAX_BRANDS)
137		return (-1);
138	return (0);
139}
140
141int
142__elfN(brand_inuse)(Elf_Brandinfo *entry)
143{
144	struct proc *p;
145	int rval = FALSE;
146
147	sx_slock(&allproc_lock);
148	FOREACH_PROC_IN_SYSTEM(p) {
149		if (p->p_sysent == entry->sysvec) {
150			rval = TRUE;
151			break;
152		}
153	}
154	sx_sunlock(&allproc_lock);
155
156	return (rval);
157}
158
159static Elf_Brandinfo *
160__elfN(get_brandinfo)(const Elf_Ehdr *hdr, const char *interp)
161{
162	Elf_Brandinfo *bi;
163	int i;
164
165	/*
166	 * We support three types of branding -- (1) the ELF EI_OSABI field
167	 * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string
168	 * branding w/in the ELF header, and (3) path of the `interp_path'
169	 * field.  We should also look for an ".note.ABI-tag" ELF section now
170	 * in all Linux ELF binaries, FreeBSD 4.1+, and some NetBSD ones.
171	 */
172
173	/* If the executable has a brand, search for it in the brand list. */
174	for (i = 0; i < MAX_BRANDS; i++) {
175		bi = elf_brand_list[i];
176		if (bi != NULL && hdr->e_machine == bi->machine &&
177		    (hdr->e_ident[EI_OSABI] == bi->brand ||
178		    strncmp((const char *)&hdr->e_ident[OLD_EI_BRAND],
179		    bi->compat_3_brand, strlen(bi->compat_3_brand)) == 0))
180			return (bi);
181	}
182
183	/* Lacking a known brand, search for a recognized interpreter. */
184	if (interp != NULL) {
185		for (i = 0; i < MAX_BRANDS; i++) {
186			bi = elf_brand_list[i];
187			if (bi != NULL && hdr->e_machine == bi->machine &&
188			    strcmp(interp, bi->interp_path) == 0)
189				return (bi);
190		}
191	}
192
193	/* Lacking a recognized interpreter, try the default brand */
194	for (i = 0; i < MAX_BRANDS; i++) {
195		bi = elf_brand_list[i];
196		if (bi != NULL && hdr->e_machine == bi->machine &&
197		    __elfN(fallback_brand) == bi->brand)
198			return (bi);
199	}
200	return (NULL);
201}
202
203static int
204__elfN(check_header)(const Elf_Ehdr *hdr)
205{
206	Elf_Brandinfo *bi;
207	int i;
208
209	if (!IS_ELF(*hdr) ||
210	    hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
211	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
212	    hdr->e_ident[EI_VERSION] != EV_CURRENT ||
213	    hdr->e_phentsize != sizeof(Elf_Phdr) ||
214	    hdr->e_version != ELF_TARG_VER)
215		return (ENOEXEC);
216
217	/*
218	 * Make sure we have at least one brand for this machine.
219	 */
220
221	for (i = 0; i < MAX_BRANDS; i++) {
222		bi = elf_brand_list[i];
223		if (bi != NULL && bi->machine == hdr->e_machine)
224			break;
225	}
226	if (i == MAX_BRANDS)
227		return (ENOEXEC);
228
229	return (0);
230}
231
232static int
233__elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
234    vm_offset_t start, vm_offset_t end, vm_prot_t prot)
235{
236	struct sf_buf *sf;
237	int error;
238	vm_offset_t off;
239
240	/*
241	 * Create the page if it doesn't exist yet. Ignore errors.
242	 */
243	vm_map_lock(map);
244	vm_map_insert(map, NULL, 0, trunc_page(start), round_page(end),
245	    VM_PROT_ALL, VM_PROT_ALL, 0);
246	vm_map_unlock(map);
247
248	/*
249	 * Find the page from the underlying object.
250	 */
251	if (object) {
252		sf = vm_imgact_map_page(object, offset);
253		if (sf == NULL)
254			return (KERN_FAILURE);
255		off = offset - trunc_page(offset);
256		error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start,
257		    end - start);
258		vm_imgact_unmap_page(sf);
259		if (error) {
260			return (KERN_FAILURE);
261		}
262	}
263
264	return (KERN_SUCCESS);
265}
266
267static int
268__elfN(map_insert)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
269    vm_offset_t start, vm_offset_t end, vm_prot_t prot, int cow)
270{
271	struct sf_buf *sf;
272	vm_offset_t off;
273	vm_size_t sz;
274	int error, rv;
275
276	if (start != trunc_page(start)) {
277		rv = __elfN(map_partial)(map, object, offset, start,
278		    round_page(start), prot);
279		if (rv)
280			return (rv);
281		offset += round_page(start) - start;
282		start = round_page(start);
283	}
284	if (end != round_page(end)) {
285		rv = __elfN(map_partial)(map, object, offset +
286		    trunc_page(end) - start, trunc_page(end), end, prot);
287		if (rv)
288			return (rv);
289		end = trunc_page(end);
290	}
291	if (end > start) {
292		if (offset & PAGE_MASK) {
293			/*
294			 * The mapping is not page aligned. This means we have
295			 * to copy the data. Sigh.
296			 */
297			rv = vm_map_find(map, NULL, 0, &start, end - start,
298			    FALSE, prot | VM_PROT_WRITE, VM_PROT_ALL, 0);
299			if (rv)
300				return (rv);
301			if (object == NULL)
302				return (KERN_SUCCESS);
303			for (; start < end; start += sz) {
304				sf = vm_imgact_map_page(object, offset);
305				if (sf == NULL)
306					return (KERN_FAILURE);
307				off = offset - trunc_page(offset);
308				sz = end - start;
309				if (sz > PAGE_SIZE - off)
310					sz = PAGE_SIZE - off;
311				error = copyout((caddr_t)sf_buf_kva(sf) + off,
312				    (caddr_t)start, sz);
313				vm_imgact_unmap_page(sf);
314				if (error) {
315					return (KERN_FAILURE);
316				}
317				offset += sz;
318			}
319			rv = KERN_SUCCESS;
320		} else {
321			vm_object_reference(object);
322			vm_map_lock(map);
323			rv = vm_map_insert(map, object, offset, start, end,
324			    prot, VM_PROT_ALL, cow);
325			vm_map_unlock(map);
326			if (rv != KERN_SUCCESS)
327				vm_object_deallocate(object);
328		}
329		return (rv);
330	} else {
331		return (KERN_SUCCESS);
332	}
333}
334
335static int
336__elfN(load_section)(struct vmspace *vmspace,
337	vm_object_t object, vm_offset_t offset,
338	caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot,
339	size_t pagesize)
340{
341	struct sf_buf *sf;
342	size_t map_len;
343	vm_offset_t map_addr;
344	int error, rv, cow;
345	size_t copy_len;
346	vm_offset_t file_addr;
347
348	/*
349	 * It's necessary to fail if the filsz + offset taken from the
350	 * header is greater than the actual file pager object's size.
351	 * If we were to allow this, then the vm_map_find() below would
352	 * walk right off the end of the file object and into the ether.
353	 *
354	 * While I'm here, might as well check for something else that
355	 * is invalid: filsz cannot be greater than memsz.
356	 */
357	if ((off_t)filsz + offset > object->un_pager.vnp.vnp_size ||
358	    filsz > memsz) {
359		uprintf("elf_load_section: truncated ELF file\n");
360		return (ENOEXEC);
361	}
362
363#define trunc_page_ps(va, ps)	((va) & ~(ps - 1))
364#define round_page_ps(va, ps)	(((va) + (ps - 1)) & ~(ps - 1))
365
366	map_addr = trunc_page_ps((vm_offset_t)vmaddr, pagesize);
367	file_addr = trunc_page_ps(offset, pagesize);
368
369	/*
370	 * We have two choices.  We can either clear the data in the last page
371	 * of an oversized mapping, or we can start the anon mapping a page
372	 * early and copy the initialized data into that first page.  We
373	 * choose the second..
374	 */
375	if (memsz > filsz)
376		map_len = trunc_page_ps(offset + filsz, pagesize) - file_addr;
377	else
378		map_len = round_page_ps(offset + filsz, pagesize) - file_addr;
379
380	if (map_len != 0) {
381		/* cow flags: don't dump readonly sections in core */
382		cow = MAP_COPY_ON_WRITE | MAP_PREFAULT |
383		    (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP);
384
385		rv = __elfN(map_insert)(&vmspace->vm_map,
386				      object,
387				      file_addr,	/* file offset */
388				      map_addr,		/* virtual start */
389				      map_addr + map_len,/* virtual end */
390				      prot,
391				      cow);
392		if (rv != KERN_SUCCESS)
393			return (EINVAL);
394
395		/* we can stop now if we've covered it all */
396		if (memsz == filsz) {
397			return (0);
398		}
399	}
400
401
402	/*
403	 * We have to get the remaining bit of the file into the first part
404	 * of the oversized map segment.  This is normally because the .data
405	 * segment in the file is extended to provide bss.  It's a neat idea
406	 * to try and save a page, but it's a pain in the behind to implement.
407	 */
408	copy_len = (offset + filsz) - trunc_page_ps(offset + filsz, pagesize);
409	map_addr = trunc_page_ps((vm_offset_t)vmaddr + filsz, pagesize);
410	map_len = round_page_ps((vm_offset_t)vmaddr + memsz, pagesize) -
411	    map_addr;
412
413	/* This had damn well better be true! */
414	if (map_len != 0) {
415		rv = __elfN(map_insert)(&vmspace->vm_map, NULL, 0, map_addr,
416		    map_addr + map_len, VM_PROT_ALL, 0);
417		if (rv != KERN_SUCCESS) {
418			return (EINVAL);
419		}
420	}
421
422	if (copy_len != 0) {
423		vm_offset_t off;
424
425		sf = vm_imgact_map_page(object, offset + filsz);
426		if (sf == NULL)
427			return (EIO);
428
429		/* send the page fragment to user space */
430		off = trunc_page_ps(offset + filsz, pagesize) -
431		    trunc_page(offset + filsz);
432		error = copyout((caddr_t)sf_buf_kva(sf) + off,
433		    (caddr_t)map_addr, copy_len);
434		vm_imgact_unmap_page(sf);
435		if (error) {
436			return (error);
437		}
438	}
439
440	/*
441	 * set it to the specified protection.
442	 * XXX had better undo the damage from pasting over the cracks here!
443	 */
444	vm_map_protect(&vmspace->vm_map, trunc_page(map_addr),
445	    round_page(map_addr + map_len),  prot, FALSE);
446
447	return (0);
448}
449
450/*
451 * Load the file "file" into memory.  It may be either a shared object
452 * or an executable.
453 *
454 * The "addr" reference parameter is in/out.  On entry, it specifies
455 * the address where a shared object should be loaded.  If the file is
456 * an executable, this value is ignored.  On exit, "addr" specifies
457 * where the file was actually loaded.
458 *
459 * The "entry" reference parameter is out only.  On exit, it specifies
460 * the entry point for the loaded file.
461 */
462static int
463__elfN(load_file)(struct proc *p, const char *file, u_long *addr,
464	u_long *entry, size_t pagesize)
465{
466	struct {
467		struct nameidata nd;
468		struct vattr attr;
469		struct image_params image_params;
470	} *tempdata;
471	const Elf_Ehdr *hdr = NULL;
472	const Elf_Phdr *phdr = NULL;
473	struct nameidata *nd;
474	struct vmspace *vmspace = p->p_vmspace;
475	struct vattr *attr;
476	struct image_params *imgp;
477	vm_prot_t prot;
478	u_long rbase;
479	u_long base_addr = 0;
480	int vfslocked, error, i, numsegs;
481
482	if (curthread->td_proc != p)
483		panic("elf_load_file - thread");	/* XXXKSE DIAGNOSTIC */
484
485	tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK);
486	nd = &tempdata->nd;
487	attr = &tempdata->attr;
488	imgp = &tempdata->image_params;
489
490	/*
491	 * Initialize part of the common data
492	 */
493	imgp->proc = p;
494	imgp->attr = attr;
495	imgp->firstpage = NULL;
496	imgp->image_header = NULL;
497	imgp->object = NULL;
498	imgp->execlabel = NULL;
499
500	/* XXXKSE */
501	NDINIT(nd, LOOKUP, MPSAFE|LOCKLEAF|FOLLOW, UIO_SYSSPACE, file,
502	    curthread);
503	vfslocked = 0;
504	if ((error = namei(nd)) != 0) {
505		nd->ni_vp = NULL;
506		goto fail;
507	}
508	vfslocked = NDHASGIANT(nd);
509	NDFREE(nd, NDF_ONLY_PNBUF);
510	imgp->vp = nd->ni_vp;
511
512	/*
513	 * Check permissions, modes, uid, etc on the file, and "open" it.
514	 */
515	error = exec_check_permissions(imgp);
516	if (error)
517		goto fail;
518
519	error = exec_map_first_page(imgp);
520	if (error)
521		goto fail;
522
523	/*
524	 * Also make certain that the interpreter stays the same, so set
525	 * its VV_TEXT flag, too.
526	 */
527	nd->ni_vp->v_vflag |= VV_TEXT;
528
529	imgp->object = nd->ni_vp->v_object;
530
531	hdr = (const Elf_Ehdr *)imgp->image_header;
532	if ((error = __elfN(check_header)(hdr)) != 0)
533		goto fail;
534	if (hdr->e_type == ET_DYN)
535		rbase = *addr;
536	else if (hdr->e_type == ET_EXEC)
537		rbase = 0;
538	else {
539		error = ENOEXEC;
540		goto fail;
541	}
542
543	/* Only support headers that fit within first page for now      */
544	/*    (multiplication of two Elf_Half fields will not overflow) */
545	if ((hdr->e_phoff > PAGE_SIZE) ||
546	    (hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE - hdr->e_phoff) {
547		error = ENOEXEC;
548		goto fail;
549	}
550
551	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
552
553	for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) {
554		if (phdr[i].p_type == PT_LOAD) {	/* Loadable segment */
555			prot = 0;
556			if (phdr[i].p_flags & PF_X)
557  				prot |= VM_PROT_EXECUTE;
558			if (phdr[i].p_flags & PF_W)
559  				prot |= VM_PROT_WRITE;
560			if (phdr[i].p_flags & PF_R)
561  				prot |= VM_PROT_READ;
562
563			if ((error = __elfN(load_section)(vmspace,
564			    imgp->object, phdr[i].p_offset,
565			    (caddr_t)(uintptr_t)phdr[i].p_vaddr + rbase,
566			    phdr[i].p_memsz, phdr[i].p_filesz, prot,
567			    pagesize)) != 0)
568				goto fail;
569			/*
570			 * Establish the base address if this is the
571			 * first segment.
572			 */
573			if (numsegs == 0)
574  				base_addr = trunc_page(phdr[i].p_vaddr +
575				    rbase);
576			numsegs++;
577		}
578	}
579	*addr = base_addr;
580	*entry = (unsigned long)hdr->e_entry + rbase;
581
582fail:
583	if (imgp->firstpage)
584		exec_unmap_first_page(imgp);
585
586	if (nd->ni_vp)
587		vput(nd->ni_vp);
588
589	VFS_UNLOCK_GIANT(vfslocked);
590	free(tempdata, M_TEMP);
591
592	return (error);
593}
594
595static int
596__CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
597{
598	const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
599	const Elf_Phdr *phdr;
600	Elf_Auxargs *elf_auxargs;
601	struct vmspace *vmspace;
602	vm_prot_t prot;
603	u_long text_size = 0, data_size = 0, total_size = 0;
604	u_long text_addr = 0, data_addr = 0;
605	u_long seg_size, seg_addr;
606	u_long addr, entry = 0, proghdr = 0;
607	int error = 0, i;
608	const char *interp = NULL;
609	Elf_Brandinfo *brand_info;
610	char *path;
611	struct thread *td = curthread;
612	struct sysentvec *sv;
613
614	/*
615	 * Do we have a valid ELF header ?
616	 *
617	 * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later
618	 * if particular brand doesn't support it.
619	 */
620	if (__elfN(check_header)(hdr) != 0 ||
621	    (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN))
622		return (-1);
623
624	/*
625	 * From here on down, we return an errno, not -1, as we've
626	 * detected an ELF file.
627	 */
628
629	if ((hdr->e_phoff > PAGE_SIZE) ||
630	    (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) {
631		/* Only support headers in first page for now */
632		return (ENOEXEC);
633	}
634	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
635	for (i = 0; i < hdr->e_phnum; i++) {
636		if (phdr[i].p_type == PT_INTERP) {
637			/* Path to interpreter */
638			if (phdr[i].p_filesz > MAXPATHLEN ||
639			    phdr[i].p_offset + phdr[i].p_filesz > PAGE_SIZE)
640				return (ENOEXEC);
641			interp = imgp->image_header + phdr[i].p_offset;
642			break;
643		}
644	}
645
646	brand_info = __elfN(get_brandinfo)(hdr, interp);
647	if (brand_info == NULL) {
648		uprintf("ELF binary type \"%u\" not known.\n",
649		    hdr->e_ident[EI_OSABI]);
650		return (ENOEXEC);
651	}
652	if (hdr->e_type == ET_DYN &&
653	    (brand_info->flags & BI_CAN_EXEC_DYN) == 0)
654		return (ENOEXEC);
655	sv = brand_info->sysvec;
656	if (interp != NULL && brand_info->interp_newpath != NULL)
657		interp = brand_info->interp_newpath;
658
659	/*
660	 * Avoid a possible deadlock if the current address space is destroyed
661	 * and that address space maps the locked vnode.  In the common case,
662	 * the locked vnode's v_usecount is decremented but remains greater
663	 * than zero.  Consequently, the vnode lock is not needed by vrele().
664	 * However, in cases where the vnode lock is external, such as nullfs,
665	 * v_usecount may become zero.
666	 */
667	VOP_UNLOCK(imgp->vp, 0, td);
668
669	exec_new_vmspace(imgp, sv);
670
671	vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY, td);
672
673	vmspace = imgp->proc->p_vmspace;
674
675	for (i = 0; i < hdr->e_phnum; i++) {
676		switch (phdr[i].p_type) {
677		case PT_LOAD:	/* Loadable segment */
678			prot = 0;
679			if (phdr[i].p_flags & PF_X)
680  				prot |= VM_PROT_EXECUTE;
681			if (phdr[i].p_flags & PF_W)
682  				prot |= VM_PROT_WRITE;
683			if (phdr[i].p_flags & PF_R)
684  				prot |= VM_PROT_READ;
685
686#if defined(__ia64__) && __ELF_WORD_SIZE == 32 && defined(IA32_ME_HARDER)
687			/*
688			 * Some x86 binaries assume read == executable,
689			 * notably the M3 runtime and therefore cvsup
690			 */
691			if (prot & VM_PROT_READ)
692				prot |= VM_PROT_EXECUTE;
693#endif
694
695			if ((error = __elfN(load_section)(vmspace,
696			    imgp->object, phdr[i].p_offset,
697			    (caddr_t)(uintptr_t)phdr[i].p_vaddr,
698			    phdr[i].p_memsz, phdr[i].p_filesz, prot,
699			    sv->sv_pagesize)) != 0)
700				return (error);
701
702			/*
703			 * If this segment contains the program headers,
704			 * remember their virtual address for the AT_PHDR
705			 * aux entry. Static binaries don't usually include
706			 * a PT_PHDR entry.
707			 */
708			if (phdr[i].p_offset == 0 &&
709			    hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize
710				<= phdr[i].p_filesz)
711				proghdr = phdr[i].p_vaddr + hdr->e_phoff;
712
713			seg_addr = trunc_page(phdr[i].p_vaddr);
714			seg_size = round_page(phdr[i].p_memsz +
715			    phdr[i].p_vaddr - seg_addr);
716
717			/*
718			 * Is this .text or .data?  We can't use
719			 * VM_PROT_WRITE or VM_PROT_EXEC, it breaks the
720			 * alpha terribly and possibly does other bad
721			 * things so we stick to the old way of figuring
722			 * it out:  If the segment contains the program
723			 * entry point, it's a text segment, otherwise it
724			 * is a data segment.
725			 *
726			 * Note that obreak() assumes that data_addr +
727			 * data_size == end of data load area, and the ELF
728			 * file format expects segments to be sorted by
729			 * address.  If multiple data segments exist, the
730			 * last one will be used.
731			 */
732			if (hdr->e_entry >= phdr[i].p_vaddr &&
733			    hdr->e_entry < (phdr[i].p_vaddr +
734			    phdr[i].p_memsz)) {
735				text_size = seg_size;
736				text_addr = seg_addr;
737				entry = (u_long)hdr->e_entry;
738			} else {
739				data_size = seg_size;
740				data_addr = seg_addr;
741			}
742			total_size += seg_size;
743			break;
744		case PT_PHDR: 	/* Program header table info */
745			proghdr = phdr[i].p_vaddr;
746			break;
747		default:
748			break;
749		}
750	}
751
752	if (data_addr == 0 && data_size == 0) {
753		data_addr = text_addr;
754		data_size = text_size;
755	}
756
757	/*
758	 * Check limits.  It should be safe to check the
759	 * limits after loading the segments since we do
760	 * not actually fault in all the segments pages.
761	 */
762	PROC_LOCK(imgp->proc);
763	if (data_size > lim_cur(imgp->proc, RLIMIT_DATA) ||
764	    text_size > maxtsiz ||
765	    total_size > lim_cur(imgp->proc, RLIMIT_VMEM)) {
766		PROC_UNLOCK(imgp->proc);
767		return (ENOMEM);
768	}
769
770	vmspace->vm_tsize = text_size >> PAGE_SHIFT;
771	vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
772	vmspace->vm_dsize = data_size >> PAGE_SHIFT;
773	vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr;
774
775	/*
776	 * We load the dynamic linker where a userland call
777	 * to mmap(0, ...) would put it.  The rationale behind this
778	 * calculation is that it leaves room for the heap to grow to
779	 * its maximum allowed size.
780	 */
781	addr = round_page((vm_offset_t)imgp->proc->p_vmspace->vm_daddr +
782	    lim_max(imgp->proc, RLIMIT_DATA));
783	PROC_UNLOCK(imgp->proc);
784
785	imgp->entry_addr = entry;
786
787	imgp->proc->p_sysent = sv;
788	if (interp != NULL) {
789		VOP_UNLOCK(imgp->vp, 0, td);
790		if (brand_info->emul_path != NULL &&
791		    brand_info->emul_path[0] != '\0') {
792			path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
793			snprintf(path, MAXPATHLEN, "%s%s",
794			    brand_info->emul_path, interp);
795			error = __elfN(load_file)(imgp->proc, path, &addr,
796			    &imgp->entry_addr, sv->sv_pagesize);
797			free(path, M_TEMP);
798			if (error == 0)
799				interp = NULL;
800		}
801		if (interp != NULL) {
802			error = __elfN(load_file)(imgp->proc, interp, &addr,
803			    &imgp->entry_addr, sv->sv_pagesize);
804		}
805		vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY, td);
806		if (error != 0) {
807			uprintf("ELF interpreter %s not found\n", interp);
808			return (error);
809		}
810	}
811
812	/*
813	 * Construct auxargs table (used by the fixup routine)
814	 */
815	elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK);
816	elf_auxargs->execfd = -1;
817	elf_auxargs->phdr = proghdr;
818	elf_auxargs->phent = hdr->e_phentsize;
819	elf_auxargs->phnum = hdr->e_phnum;
820	elf_auxargs->pagesz = PAGE_SIZE;
821	elf_auxargs->base = addr;
822	elf_auxargs->flags = 0;
823	elf_auxargs->entry = entry;
824	elf_auxargs->trace = elf_trace;
825
826	imgp->auxargs = elf_auxargs;
827	imgp->interpreted = 0;
828
829	return (error);
830}
831
832#define	suword __CONCAT(suword, __ELF_WORD_SIZE)
833
834int
835__elfN(freebsd_fixup)(register_t **stack_base, struct image_params *imgp)
836{
837	Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs;
838	Elf_Addr *base;
839	Elf_Addr *pos;
840
841	base = (Elf_Addr *)*stack_base;
842	pos = base + (imgp->args->argc + imgp->args->envc + 2);
843
844	if (args->trace) {
845		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
846	}
847	if (args->execfd != -1) {
848		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
849	}
850	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
851	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
852	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
853	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
854	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
855	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
856	AUXARGS_ENTRY(pos, AT_BASE, args->base);
857	AUXARGS_ENTRY(pos, AT_NULL, 0);
858
859	free(imgp->auxargs, M_TEMP);
860	imgp->auxargs = NULL;
861
862	base--;
863	suword(base, (long)imgp->args->argc);
864	*stack_base = (register_t *)base;
865	return (0);
866}
867
868/*
869 * Code for generating ELF core dumps.
870 */
871
872typedef void (*segment_callback)(vm_map_entry_t, void *);
873
874/* Closure for cb_put_phdr(). */
875struct phdr_closure {
876	Elf_Phdr *phdr;		/* Program header to fill in */
877	Elf_Off offset;		/* Offset of segment in core file */
878};
879
880/* Closure for cb_size_segment(). */
881struct sseg_closure {
882	int count;		/* Count of writable segments. */
883	size_t size;		/* Total size of all writable segments. */
884};
885
886static void cb_put_phdr(vm_map_entry_t, void *);
887static void cb_size_segment(vm_map_entry_t, void *);
888static void each_writable_segment(struct thread *, segment_callback, void *);
889static int __elfN(corehdr)(struct thread *, struct vnode *, struct ucred *,
890    int, void *, size_t);
891static void __elfN(puthdr)(struct thread *, void *, size_t *, int);
892static void __elfN(putnote)(void *, size_t *, const char *, int,
893    const void *, size_t);
894
895extern int osreldate;
896
897int
898__elfN(coredump)(td, vp, limit)
899	struct thread *td;
900	struct vnode *vp;
901	off_t limit;
902{
903	struct ucred *cred = td->td_ucred;
904	int error = 0;
905	struct sseg_closure seginfo;
906	void *hdr;
907	size_t hdrsize;
908
909	/* Size the program segments. */
910	seginfo.count = 0;
911	seginfo.size = 0;
912	each_writable_segment(td, cb_size_segment, &seginfo);
913
914	/*
915	 * Calculate the size of the core file header area by making
916	 * a dry run of generating it.  Nothing is written, but the
917	 * size is calculated.
918	 */
919	hdrsize = 0;
920	__elfN(puthdr)(td, (void *)NULL, &hdrsize, seginfo.count);
921
922	if (hdrsize + seginfo.size >= limit)
923		return (EFAULT);
924
925	/*
926	 * Allocate memory for building the header, fill it up,
927	 * and write it out.
928	 */
929	hdr = malloc(hdrsize, M_TEMP, M_WAITOK);
930	if (hdr == NULL) {
931		return (EINVAL);
932	}
933	error = __elfN(corehdr)(td, vp, cred, seginfo.count, hdr, hdrsize);
934
935	/* Write the contents of all of the writable segments. */
936	if (error == 0) {
937		Elf_Phdr *php;
938		off_t offset;
939		int i;
940
941		php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
942		offset = hdrsize;
943		for (i = 0; i < seginfo.count; i++) {
944			error = vn_rdwr_inchunks(UIO_WRITE, vp,
945			    (caddr_t)(uintptr_t)php->p_vaddr,
946			    php->p_filesz, offset, UIO_USERSPACE,
947			    IO_UNIT | IO_DIRECT, cred, NOCRED, NULL,
948			    curthread); /* XXXKSE */
949			if (error != 0)
950				break;
951			offset += php->p_filesz;
952			php++;
953		}
954	}
955	free(hdr, M_TEMP);
956
957	return (error);
958}
959
960/*
961 * A callback for each_writable_segment() to write out the segment's
962 * program header entry.
963 */
964static void
965cb_put_phdr(entry, closure)
966	vm_map_entry_t entry;
967	void *closure;
968{
969	struct phdr_closure *phc = (struct phdr_closure *)closure;
970	Elf_Phdr *phdr = phc->phdr;
971
972	phc->offset = round_page(phc->offset);
973
974	phdr->p_type = PT_LOAD;
975	phdr->p_offset = phc->offset;
976	phdr->p_vaddr = entry->start;
977	phdr->p_paddr = 0;
978	phdr->p_filesz = phdr->p_memsz = entry->end - entry->start;
979	phdr->p_align = PAGE_SIZE;
980	phdr->p_flags = 0;
981	if (entry->protection & VM_PROT_READ)
982		phdr->p_flags |= PF_R;
983	if (entry->protection & VM_PROT_WRITE)
984		phdr->p_flags |= PF_W;
985	if (entry->protection & VM_PROT_EXECUTE)
986		phdr->p_flags |= PF_X;
987
988	phc->offset += phdr->p_filesz;
989	phc->phdr++;
990}
991
992/*
993 * A callback for each_writable_segment() to gather information about
994 * the number of segments and their total size.
995 */
996static void
997cb_size_segment(entry, closure)
998	vm_map_entry_t entry;
999	void *closure;
1000{
1001	struct sseg_closure *ssc = (struct sseg_closure *)closure;
1002
1003	ssc->count++;
1004	ssc->size += entry->end - entry->start;
1005}
1006
1007/*
1008 * For each writable segment in the process's memory map, call the given
1009 * function with a pointer to the map entry and some arbitrary
1010 * caller-supplied data.
1011 */
1012static void
1013each_writable_segment(td, func, closure)
1014	struct thread *td;
1015	segment_callback func;
1016	void *closure;
1017{
1018	struct proc *p = td->td_proc;
1019	vm_map_t map = &p->p_vmspace->vm_map;
1020	vm_map_entry_t entry;
1021	vm_object_t backing_object, object;
1022	boolean_t ignore_entry;
1023
1024	vm_map_lock_read(map);
1025	for (entry = map->header.next; entry != &map->header;
1026	    entry = entry->next) {
1027		/*
1028		 * Don't dump inaccessible mappings, deal with legacy
1029		 * coredump mode.
1030		 *
1031		 * Note that read-only segments related to the elf binary
1032		 * are marked MAP_ENTRY_NOCOREDUMP now so we no longer
1033		 * need to arbitrarily ignore such segments.
1034		 */
1035		if (elf_legacy_coredump) {
1036			if ((entry->protection & VM_PROT_RW) != VM_PROT_RW)
1037				continue;
1038		} else {
1039			if ((entry->protection & VM_PROT_ALL) == 0)
1040				continue;
1041		}
1042
1043		/*
1044		 * Dont include memory segment in the coredump if
1045		 * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in
1046		 * madvise(2).  Do not dump submaps (i.e. parts of the
1047		 * kernel map).
1048		 */
1049		if (entry->eflags & (MAP_ENTRY_NOCOREDUMP|MAP_ENTRY_IS_SUB_MAP))
1050			continue;
1051
1052		if ((object = entry->object.vm_object) == NULL)
1053			continue;
1054
1055		/* Ignore memory-mapped devices and such things. */
1056		VM_OBJECT_LOCK(object);
1057		while ((backing_object = object->backing_object) != NULL) {
1058			VM_OBJECT_LOCK(backing_object);
1059			VM_OBJECT_UNLOCK(object);
1060			object = backing_object;
1061		}
1062		ignore_entry = object->type != OBJT_DEFAULT &&
1063		    object->type != OBJT_SWAP && object->type != OBJT_VNODE;
1064		VM_OBJECT_UNLOCK(object);
1065		if (ignore_entry)
1066			continue;
1067
1068		(*func)(entry, closure);
1069	}
1070	vm_map_unlock_read(map);
1071}
1072
1073/*
1074 * Write the core file header to the file, including padding up to
1075 * the page boundary.
1076 */
1077static int
1078__elfN(corehdr)(td, vp, cred, numsegs, hdr, hdrsize)
1079	struct thread *td;
1080	struct vnode *vp;
1081	struct ucred *cred;
1082	int numsegs;
1083	size_t hdrsize;
1084	void *hdr;
1085{
1086	size_t off;
1087
1088	/* Fill in the header. */
1089	bzero(hdr, hdrsize);
1090	off = 0;
1091	__elfN(puthdr)(td, hdr, &off, numsegs);
1092
1093	/* Write it to the core file. */
1094	return (vn_rdwr_inchunks(UIO_WRITE, vp, hdr, hdrsize, (off_t)0,
1095	    UIO_SYSSPACE, IO_UNIT | IO_DIRECT, cred, NOCRED, NULL,
1096	    td)); /* XXXKSE */
1097}
1098
1099#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
1100typedef struct prstatus32 elf_prstatus_t;
1101typedef struct prpsinfo32 elf_prpsinfo_t;
1102typedef struct fpreg32 elf_prfpregset_t;
1103typedef struct fpreg32 elf_fpregset_t;
1104typedef struct reg32 elf_gregset_t;
1105#else
1106typedef prstatus_t elf_prstatus_t;
1107typedef prpsinfo_t elf_prpsinfo_t;
1108typedef prfpregset_t elf_prfpregset_t;
1109typedef prfpregset_t elf_fpregset_t;
1110typedef gregset_t elf_gregset_t;
1111#endif
1112
1113static void
1114__elfN(puthdr)(struct thread *td, void *dst, size_t *off, int numsegs)
1115{
1116	struct {
1117		elf_prstatus_t status;
1118		elf_prfpregset_t fpregset;
1119		elf_prpsinfo_t psinfo;
1120	} *tempdata;
1121	elf_prstatus_t *status;
1122	elf_prfpregset_t *fpregset;
1123	elf_prpsinfo_t *psinfo;
1124	struct proc *p;
1125	struct thread *thr;
1126	size_t ehoff, noteoff, notesz, phoff;
1127
1128	p = td->td_proc;
1129
1130	ehoff = *off;
1131	*off += sizeof(Elf_Ehdr);
1132
1133	phoff = *off;
1134	*off += (numsegs + 1) * sizeof(Elf_Phdr);
1135
1136	noteoff = *off;
1137	/*
1138	 * Don't allocate space for the notes if we're just calculating
1139	 * the size of the header. We also don't collect the data.
1140	 */
1141	if (dst != NULL) {
1142		tempdata = malloc(sizeof(*tempdata), M_TEMP, M_ZERO|M_WAITOK);
1143		status = &tempdata->status;
1144		fpregset = &tempdata->fpregset;
1145		psinfo = &tempdata->psinfo;
1146	} else {
1147		tempdata = NULL;
1148		status = NULL;
1149		fpregset = NULL;
1150		psinfo = NULL;
1151	}
1152
1153	if (dst != NULL) {
1154		psinfo->pr_version = PRPSINFO_VERSION;
1155		psinfo->pr_psinfosz = sizeof(elf_prpsinfo_t);
1156		strlcpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname));
1157		/*
1158		 * XXX - We don't fill in the command line arguments properly
1159		 * yet.
1160		 */
1161		strlcpy(psinfo->pr_psargs, p->p_comm,
1162		    sizeof(psinfo->pr_psargs));
1163	}
1164	__elfN(putnote)(dst, off, "FreeBSD", NT_PRPSINFO, psinfo,
1165	    sizeof *psinfo);
1166
1167	/*
1168	 * To have the debugger select the right thread (LWP) as the initial
1169	 * thread, we dump the state of the thread passed to us in td first.
1170	 * This is the thread that causes the core dump and thus likely to
1171	 * be the right thread one wants to have selected in the debugger.
1172	 */
1173	thr = td;
1174	while (thr != NULL) {
1175		if (dst != NULL) {
1176			status->pr_version = PRSTATUS_VERSION;
1177			status->pr_statussz = sizeof(elf_prstatus_t);
1178			status->pr_gregsetsz = sizeof(elf_gregset_t);
1179			status->pr_fpregsetsz = sizeof(elf_fpregset_t);
1180			status->pr_osreldate = osreldate;
1181			status->pr_cursig = p->p_sig;
1182			status->pr_pid = thr->td_tid;
1183#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
1184			fill_regs32(thr, &status->pr_reg);
1185			fill_fpregs32(thr, fpregset);
1186#else
1187			fill_regs(thr, &status->pr_reg);
1188			fill_fpregs(thr, fpregset);
1189#endif
1190		}
1191		__elfN(putnote)(dst, off, "FreeBSD", NT_PRSTATUS, status,
1192		    sizeof *status);
1193		__elfN(putnote)(dst, off, "FreeBSD", NT_FPREGSET, fpregset,
1194		    sizeof *fpregset);
1195		/*
1196		 * Allow for MD specific notes, as well as any MD
1197		 * specific preparations for writing MI notes.
1198		 */
1199		__elfN(dump_thread)(thr, dst, off);
1200
1201		thr = (thr == td) ? TAILQ_FIRST(&p->p_threads) :
1202		    TAILQ_NEXT(thr, td_plist);
1203		if (thr == td)
1204			thr = TAILQ_NEXT(thr, td_plist);
1205	}
1206
1207	notesz = *off - noteoff;
1208
1209	if (dst != NULL)
1210		free(tempdata, M_TEMP);
1211
1212	/* Align up to a page boundary for the program segments. */
1213	*off = round_page(*off);
1214
1215	if (dst != NULL) {
1216		Elf_Ehdr *ehdr;
1217		Elf_Phdr *phdr;
1218		struct phdr_closure phc;
1219
1220		/*
1221		 * Fill in the ELF header.
1222		 */
1223		ehdr = (Elf_Ehdr *)((char *)dst + ehoff);
1224		ehdr->e_ident[EI_MAG0] = ELFMAG0;
1225		ehdr->e_ident[EI_MAG1] = ELFMAG1;
1226		ehdr->e_ident[EI_MAG2] = ELFMAG2;
1227		ehdr->e_ident[EI_MAG3] = ELFMAG3;
1228		ehdr->e_ident[EI_CLASS] = ELF_CLASS;
1229		ehdr->e_ident[EI_DATA] = ELF_DATA;
1230		ehdr->e_ident[EI_VERSION] = EV_CURRENT;
1231		ehdr->e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
1232		ehdr->e_ident[EI_ABIVERSION] = 0;
1233		ehdr->e_ident[EI_PAD] = 0;
1234		ehdr->e_type = ET_CORE;
1235#if defined(COMPAT_IA32) && __ELF_WORD_SIZE == 32
1236		ehdr->e_machine = EM_386;
1237#else
1238		ehdr->e_machine = ELF_ARCH;
1239#endif
1240		ehdr->e_version = EV_CURRENT;
1241		ehdr->e_entry = 0;
1242		ehdr->e_phoff = phoff;
1243		ehdr->e_flags = 0;
1244		ehdr->e_ehsize = sizeof(Elf_Ehdr);
1245		ehdr->e_phentsize = sizeof(Elf_Phdr);
1246		ehdr->e_phnum = numsegs + 1;
1247		ehdr->e_shentsize = sizeof(Elf_Shdr);
1248		ehdr->e_shnum = 0;
1249		ehdr->e_shstrndx = SHN_UNDEF;
1250
1251		/*
1252		 * Fill in the program header entries.
1253		 */
1254		phdr = (Elf_Phdr *)((char *)dst + phoff);
1255
1256		/* The note segement. */
1257		phdr->p_type = PT_NOTE;
1258		phdr->p_offset = noteoff;
1259		phdr->p_vaddr = 0;
1260		phdr->p_paddr = 0;
1261		phdr->p_filesz = notesz;
1262		phdr->p_memsz = 0;
1263		phdr->p_flags = 0;
1264		phdr->p_align = 0;
1265		phdr++;
1266
1267		/* All the writable segments from the program. */
1268		phc.phdr = phdr;
1269		phc.offset = *off;
1270		each_writable_segment(td, cb_put_phdr, &phc);
1271	}
1272}
1273
1274static void
1275__elfN(putnote)(void *dst, size_t *off, const char *name, int type,
1276    const void *desc, size_t descsz)
1277{
1278	Elf_Note note;
1279
1280	note.n_namesz = strlen(name) + 1;
1281	note.n_descsz = descsz;
1282	note.n_type = type;
1283	if (dst != NULL)
1284		bcopy(&note, (char *)dst + *off, sizeof note);
1285	*off += sizeof note;
1286	if (dst != NULL)
1287		bcopy(name, (char *)dst + *off, note.n_namesz);
1288	*off += roundup2(note.n_namesz, sizeof(Elf_Size));
1289	if (dst != NULL)
1290		bcopy(desc, (char *)dst + *off, note.n_descsz);
1291	*off += roundup2(note.n_descsz, sizeof(Elf_Size));
1292}
1293
1294/*
1295 * Tell kern_execve.c about it, with a little help from the linker.
1296 */
1297static struct execsw __elfN(execsw) = {
1298	__CONCAT(exec_, __elfN(imgact)),
1299	__XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
1300};
1301EXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw));
1302