imgact_elf.c revision 72200
1/*-
2 * Copyright (c) 1995-1996 S�ren Schmidt
3 * Copyright (c) 1996 Peter Wemm
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer
11 *    in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software withough specific prior written permission
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * $FreeBSD: head/sys/kern/imgact_elf.c 72200 2001-02-09 06:11:45Z bmilekic $
30 */
31
32#include "opt_rlimit.h"
33
34#include <sys/param.h>
35#include <sys/exec.h>
36#include <sys/fcntl.h>
37#include <sys/imgact.h>
38#include <sys/imgact_elf.h>
39#include <sys/kernel.h>
40#include <sys/malloc.h>
41#include <sys/mutex.h>
42#include <sys/mman.h>
43#include <sys/namei.h>
44#include <sys/pioctl.h>
45#include <sys/proc.h>
46#include <sys/procfs.h>
47#include <sys/resourcevar.h>
48#include <sys/systm.h>
49#include <sys/signalvar.h>
50#include <sys/stat.h>
51#include <sys/syscall.h>
52#include <sys/sysctl.h>
53#include <sys/sysent.h>
54#include <sys/vnode.h>
55
56#include <vm/vm.h>
57#include <vm/vm_kern.h>
58#include <vm/vm_param.h>
59#include <vm/pmap.h>
60#include <sys/lock.h>
61#include <vm/vm_map.h>
62#include <vm/vm_object.h>
63#include <vm/vm_extern.h>
64
65#include <machine/elf.h>
66#include <machine/md_var.h>
67
68#define OLD_EI_BRAND	8
69
70__ElfType(Brandinfo);
71__ElfType(Auxargs);
72
73static int elf_check_header __P((const Elf_Ehdr *hdr));
74static int elf_freebsd_fixup __P((register_t **stack_base,
75    struct image_params *imgp));
76static int elf_load_file __P((struct proc *p, const char *file, u_long *addr,
77    u_long *entry));
78static int elf_load_section __P((struct proc *p,
79    struct vmspace *vmspace, struct vnode *vp,
80    vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz,
81    vm_prot_t prot));
82static int exec_elf_imgact __P((struct image_params *imgp));
83
84static int elf_trace = 0;
85SYSCTL_INT(_debug, OID_AUTO, elf_trace, CTLFLAG_RW, &elf_trace, 0, "");
86
87struct sysentvec elf_freebsd_sysvec = {
88        SYS_MAXSYSCALL,
89        sysent,
90        0,
91        0,
92        0,
93        0,
94        0,
95        0,
96        elf_freebsd_fixup,
97        sendsig,
98        sigcode,
99        &szsigcode,
100        0,
101	"FreeBSD ELF",
102	elf_coredump,
103	NULL,
104	MINSIGSTKSZ
105};
106
107static Elf_Brandinfo freebsd_brand_info = {
108						ELFOSABI_FREEBSD,
109						"",
110						"/usr/libexec/ld-elf.so.1",
111						&elf_freebsd_sysvec
112					  };
113static Elf_Brandinfo *elf_brand_list[MAX_BRANDS] = {
114							&freebsd_brand_info,
115							NULL, NULL, NULL,
116							NULL, NULL, NULL, NULL
117						    };
118
119int
120elf_insert_brand_entry(Elf_Brandinfo *entry)
121{
122	int i;
123
124	for (i=1; i<MAX_BRANDS; i++) {
125		if (elf_brand_list[i] == NULL) {
126			elf_brand_list[i] = entry;
127			break;
128		}
129	}
130	if (i == MAX_BRANDS)
131		return -1;
132	return 0;
133}
134
135int
136elf_remove_brand_entry(Elf_Brandinfo *entry)
137{
138	int i;
139
140	for (i=1; i<MAX_BRANDS; i++) {
141		if (elf_brand_list[i] == entry) {
142			elf_brand_list[i] = NULL;
143			break;
144		}
145	}
146	if (i == MAX_BRANDS)
147		return -1;
148	return 0;
149}
150
151int
152elf_brand_inuse(Elf_Brandinfo *entry)
153{
154	struct proc *p;
155	int rval = FALSE;
156
157	ALLPROC_LOCK(AP_SHARED);
158	LIST_FOREACH(p, &allproc, p_list) {
159		if (p->p_sysent == entry->sysvec) {
160			rval = TRUE;
161			break;
162		}
163	}
164	ALLPROC_LOCK(AP_RELEASE);
165
166	return (rval);
167}
168
169static int
170elf_check_header(const Elf_Ehdr *hdr)
171{
172	if (!IS_ELF(*hdr) ||
173	    hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
174	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
175	    hdr->e_ident[EI_VERSION] != EV_CURRENT)
176		return ENOEXEC;
177
178	if (!ELF_MACHINE_OK(hdr->e_machine))
179		return ENOEXEC;
180
181	if (hdr->e_version != ELF_TARG_VER)
182		return ENOEXEC;
183
184	return 0;
185}
186
187static int
188elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot)
189{
190	size_t map_len;
191	vm_offset_t map_addr;
192	int error, rv;
193	size_t copy_len;
194	vm_object_t object;
195	vm_offset_t file_addr;
196	vm_offset_t data_buf = 0;
197
198	VOP_GETVOBJECT(vp, &object);
199	error = 0;
200
201	/*
202	 * It's necessary to fail if the filsz + offset taken from the
203	 * header is greater than the actual file pager object's size.
204	 * If we were to allow this, then the vm_map_find() below would
205	 * walk right off the end of the file object and into the ether.
206	 *
207	 * While I'm here, might as well check for something else that
208	 * is invalid: filsz cannot be greater than memsz.
209	 */
210	if ((off_t)filsz + offset > object->un_pager.vnp.vnp_size ||
211	    filsz > memsz) {
212		uprintf("elf_load_section: truncated ELF file\n");
213		return (ENOEXEC);
214	}
215
216	map_addr = trunc_page((vm_offset_t)vmaddr);
217	file_addr = trunc_page(offset);
218
219	/*
220	 * We have two choices.  We can either clear the data in the last page
221	 * of an oversized mapping, or we can start the anon mapping a page
222	 * early and copy the initialized data into that first page.  We
223	 * choose the second..
224	 */
225	if (memsz > filsz)
226		map_len = trunc_page(offset+filsz) - file_addr;
227	else
228		map_len = round_page(offset+filsz) - file_addr;
229
230	if (map_len != 0) {
231		vm_object_reference(object);
232		vm_map_lock(&vmspace->vm_map);
233		rv = vm_map_insert(&vmspace->vm_map,
234				      object,
235				      file_addr,	/* file offset */
236				      map_addr,		/* virtual start */
237				      map_addr + map_len,/* virtual end */
238				      prot,
239				      VM_PROT_ALL,
240				      MAP_COPY_ON_WRITE | MAP_PREFAULT);
241		vm_map_unlock(&vmspace->vm_map);
242		if (rv != KERN_SUCCESS) {
243			vm_object_deallocate(object);
244			return EINVAL;
245		}
246
247		/* we can stop now if we've covered it all */
248		if (memsz == filsz)
249			return 0;
250	}
251
252
253	/*
254	 * We have to get the remaining bit of the file into the first part
255	 * of the oversized map segment.  This is normally because the .data
256	 * segment in the file is extended to provide bss.  It's a neat idea
257	 * to try and save a page, but it's a pain in the behind to implement.
258	 */
259	copy_len = (offset + filsz) - trunc_page(offset + filsz);
260	map_addr = trunc_page((vm_offset_t)vmaddr + filsz);
261	map_len = round_page((vm_offset_t)vmaddr + memsz) - map_addr;
262
263	/* This had damn well better be true! */
264        if (map_len != 0) {
265		vm_map_lock(&vmspace->vm_map);
266		rv = vm_map_insert(&vmspace->vm_map, NULL, 0,
267					map_addr, map_addr + map_len,
268					VM_PROT_ALL, VM_PROT_ALL, 0);
269		vm_map_unlock(&vmspace->vm_map);
270		if (rv != KERN_SUCCESS)
271			return EINVAL;
272	}
273
274	if (copy_len != 0) {
275		vm_object_reference(object);
276		rv = vm_map_find(exec_map,
277				 object,
278				 trunc_page(offset + filsz),
279				 &data_buf,
280				 PAGE_SIZE,
281				 TRUE,
282				 VM_PROT_READ,
283				 VM_PROT_ALL,
284				 MAP_COPY_ON_WRITE | MAP_PREFAULT_PARTIAL);
285		if (rv != KERN_SUCCESS) {
286			vm_object_deallocate(object);
287			return EINVAL;
288		}
289
290		/* send the page fragment to user space */
291		error = copyout((caddr_t)data_buf, (caddr_t)map_addr, copy_len);
292		vm_map_remove(exec_map, data_buf, data_buf + PAGE_SIZE);
293		if (error)
294			return (error);
295	}
296
297	/*
298	 * set it to the specified protection
299	 */
300	vm_map_protect(&vmspace->vm_map, map_addr, map_addr + map_len,  prot,
301		       FALSE);
302
303	return error;
304}
305
306/*
307 * Load the file "file" into memory.  It may be either a shared object
308 * or an executable.
309 *
310 * The "addr" reference parameter is in/out.  On entry, it specifies
311 * the address where a shared object should be loaded.  If the file is
312 * an executable, this value is ignored.  On exit, "addr" specifies
313 * where the file was actually loaded.
314 *
315 * The "entry" reference parameter is out only.  On exit, it specifies
316 * the entry point for the loaded file.
317 */
318static int
319elf_load_file(struct proc *p, const char *file, u_long *addr, u_long *entry)
320{
321	const Elf_Ehdr *hdr = NULL;
322	const Elf_Phdr *phdr = NULL;
323	struct nameidata nd;
324	struct vmspace *vmspace = p->p_vmspace;
325	struct vattr attr;
326	struct image_params image_params, *imgp;
327	vm_prot_t prot;
328	u_long rbase;
329	u_long base_addr = 0;
330	int error, i, numsegs;
331
332	imgp = &image_params;
333	/*
334	 * Initialize part of the common data
335	 */
336	imgp->proc = p;
337	imgp->uap = NULL;
338	imgp->attr = &attr;
339	imgp->firstpage = NULL;
340	imgp->image_header = (char *)kmem_alloc_wait(exec_map, PAGE_SIZE);
341
342	if (imgp->image_header == NULL) {
343		nd.ni_vp = NULL;
344		error = ENOMEM;
345		goto fail;
346	}
347
348        NDINIT(&nd, LOOKUP, LOCKLEAF|FOLLOW, UIO_SYSSPACE, file, p);
349
350	if ((error = namei(&nd)) != 0) {
351		nd.ni_vp = NULL;
352		goto fail;
353	}
354	NDFREE(&nd, NDF_ONLY_PNBUF);
355	imgp->vp = nd.ni_vp;
356
357	/*
358	 * Check permissions, modes, uid, etc on the file, and "open" it.
359	 */
360	error = exec_check_permissions(imgp);
361	if (error) {
362		VOP_UNLOCK(nd.ni_vp, 0, p);
363		goto fail;
364	}
365
366	error = exec_map_first_page(imgp);
367	/*
368	 * Also make certain that the interpreter stays the same, so set
369	 * its VTEXT flag, too.
370	 */
371	if (error == 0)
372		nd.ni_vp->v_flag |= VTEXT;
373	VOP_UNLOCK(nd.ni_vp, 0, p);
374	if (error)
375                goto fail;
376
377	hdr = (const Elf_Ehdr *)imgp->image_header;
378	if ((error = elf_check_header(hdr)) != 0)
379		goto fail;
380	if (hdr->e_type == ET_DYN)
381		rbase = *addr;
382	else if (hdr->e_type == ET_EXEC)
383		rbase = 0;
384	else {
385		error = ENOEXEC;
386		goto fail;
387	}
388
389	/* Only support headers that fit within first page for now */
390	if ((hdr->e_phoff > PAGE_SIZE) ||
391	    (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) {
392		error = ENOEXEC;
393		goto fail;
394	}
395
396	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
397
398	for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) {
399		if (phdr[i].p_type == PT_LOAD) {	/* Loadable segment */
400			prot = 0;
401			if (phdr[i].p_flags & PF_X)
402  				prot |= VM_PROT_EXECUTE;
403			if (phdr[i].p_flags & PF_W)
404  				prot |= VM_PROT_WRITE;
405			if (phdr[i].p_flags & PF_R)
406  				prot |= VM_PROT_READ;
407
408			if ((error = elf_load_section(p, vmspace, nd.ni_vp,
409  						     phdr[i].p_offset,
410  						     (caddr_t)phdr[i].p_vaddr +
411							rbase,
412  						     phdr[i].p_memsz,
413  						     phdr[i].p_filesz, prot)) != 0)
414				goto fail;
415			/*
416			 * Establish the base address if this is the
417			 * first segment.
418			 */
419			if (numsegs == 0)
420  				base_addr = trunc_page(phdr[i].p_vaddr + rbase);
421			numsegs++;
422		}
423	}
424	*addr = base_addr;
425	*entry=(unsigned long)hdr->e_entry + rbase;
426
427fail:
428	if (imgp->firstpage)
429		exec_unmap_first_page(imgp);
430	if (imgp->image_header)
431		kmem_free_wakeup(exec_map, (vm_offset_t)imgp->image_header,
432			PAGE_SIZE);
433	if (nd.ni_vp)
434		vrele(nd.ni_vp);
435
436	return error;
437}
438
439static int fallback_elf_brand = ELFOSABI_FREEBSD;
440SYSCTL_INT(_kern, OID_AUTO, fallback_elf_brand, CTLFLAG_RW,
441		&fallback_elf_brand, ELFOSABI_FREEBSD,
442		"ELF brand of last resort");
443
444static int
445exec_elf_imgact(struct image_params *imgp)
446{
447	const Elf_Ehdr *hdr = (const Elf_Ehdr *) imgp->image_header;
448	const Elf_Phdr *phdr;
449	Elf_Auxargs *elf_auxargs = NULL;
450	struct vmspace *vmspace;
451	vm_prot_t prot;
452	u_long text_size = 0, data_size = 0;
453	u_long text_addr = 0, data_addr = 0;
454	u_long addr, entry = 0, proghdr = 0;
455	int error, i;
456	const char *interp = NULL;
457	Elf_Brandinfo *brand_info;
458	char path[MAXPATHLEN];
459
460	/*
461	 * Do we have a valid ELF header ?
462	 */
463	if (elf_check_header(hdr) != 0 || hdr->e_type != ET_EXEC)
464		return -1;
465
466	/*
467	 * From here on down, we return an errno, not -1, as we've
468	 * detected an ELF file.
469	 */
470
471	if ((hdr->e_phoff > PAGE_SIZE) ||
472	    (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) {
473		/* Only support headers in first page for now */
474		return ENOEXEC;
475	}
476	phdr = (const Elf_Phdr*)(imgp->image_header + hdr->e_phoff);
477
478	/*
479	 * From this point on, we may have resources that need to be freed.
480	 */
481
482	/*
483	 * Yeah, I'm paranoid.  There is every reason in the world to get
484	 * VTEXT now since from here on out, there are places we can have
485	 * a context switch.  Better safe than sorry; I really don't want
486	 * the file to change while it's being loaded.
487	 */
488	mtx_lock(&imgp->vp->v_interlock);
489	imgp->vp->v_flag |= VTEXT;
490	mtx_unlock(&imgp->vp->v_interlock);
491
492	if ((error = exec_extract_strings(imgp)) != 0)
493		goto fail;
494
495	exec_new_vmspace(imgp);
496
497	vmspace = imgp->proc->p_vmspace;
498
499	for (i = 0; i < hdr->e_phnum; i++) {
500		switch(phdr[i].p_type) {
501
502		case PT_LOAD:	/* Loadable segment */
503			prot = 0;
504			if (phdr[i].p_flags & PF_X)
505  				prot |= VM_PROT_EXECUTE;
506			if (phdr[i].p_flags & PF_W)
507  				prot |= VM_PROT_WRITE;
508			if (phdr[i].p_flags & PF_R)
509  				prot |= VM_PROT_READ;
510
511			if ((error = elf_load_section(imgp->proc,
512						     vmspace, imgp->vp,
513  						     phdr[i].p_offset,
514  						     (caddr_t)phdr[i].p_vaddr,
515  						     phdr[i].p_memsz,
516  						     phdr[i].p_filesz, prot)) != 0)
517  				goto fail;
518
519			/*
520			 * Is this .text or .data ??
521			 *
522			 * We only handle one each of those yet XXX
523			 */
524			if (hdr->e_entry >= phdr[i].p_vaddr &&
525			hdr->e_entry <(phdr[i].p_vaddr+phdr[i].p_memsz)) {
526  				text_addr = trunc_page(phdr[i].p_vaddr);
527  				text_size = round_page(phdr[i].p_memsz +
528						       phdr[i].p_vaddr -
529						       text_addr);
530				entry = (u_long)hdr->e_entry;
531			} else {
532  				data_addr = trunc_page(phdr[i].p_vaddr);
533  				data_size = round_page(phdr[i].p_memsz +
534						       phdr[i].p_vaddr -
535						       data_addr);
536			}
537			break;
538	  	case PT_INTERP:	/* Path to interpreter */
539			if (phdr[i].p_filesz > MAXPATHLEN ||
540			    phdr[i].p_offset + phdr[i].p_filesz > PAGE_SIZE) {
541				error = ENOEXEC;
542				goto fail;
543			}
544			interp = imgp->image_header + phdr[i].p_offset;
545			break;
546		case PT_PHDR: 	/* Program header table info */
547			proghdr = phdr[i].p_vaddr;
548			break;
549		default:
550			break;
551		}
552	}
553
554	vmspace->vm_tsize = text_size >> PAGE_SHIFT;
555	vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
556	vmspace->vm_dsize = data_size >> PAGE_SHIFT;
557	vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr;
558
559	addr = ELF_RTLD_ADDR(vmspace);
560
561	imgp->entry_addr = entry;
562
563	brand_info = NULL;
564
565	/* XXX  For now we look for the magic "FreeBSD" that we used to put
566	 * into the ELF header at the EI_ABIVERSION location.  If found use
567	 * that information rather than figuring out the ABI from proper
568	 * branding.  This should be removed for 5.0-RELEASE. The Linux caes
569	 * can be figured out from the `interp_path' field.
570	 */
571	if (strcmp("FreeBSD", (const char *)&hdr->e_ident[OLD_EI_BRAND]) == 0)
572		brand_info = &freebsd_brand_info;
573
574	/* If the executable has a brand, search for it in the brand list. */
575	if (brand_info == NULL) {
576		for (i = 0;  i < MAX_BRANDS;  i++) {
577			Elf_Brandinfo *bi = elf_brand_list[i];
578
579			if (bi != NULL && hdr->e_ident[EI_OSABI] == bi->brand) {
580				brand_info = bi;
581				break;
582			}
583		}
584	}
585
586	/* Lacking a known brand, search for a recognized interpreter. */
587	if (brand_info == NULL && interp != NULL) {
588		for (i = 0;  i < MAX_BRANDS;  i++) {
589			Elf_Brandinfo *bi = elf_brand_list[i];
590
591			if (bi != NULL &&
592			    strcmp(interp, bi->interp_path) == 0) {
593				brand_info = bi;
594				break;
595			}
596		}
597	}
598
599	/* Lacking a recognized interpreter, try the default brand */
600	if (brand_info == NULL) {
601		for (i = 0; i < MAX_BRANDS; i++) {
602			Elf_Brandinfo *bi = elf_brand_list[i];
603
604			if (bi != NULL && fallback_elf_brand == bi->brand) {
605				brand_info = bi;
606				break;
607			}
608		}
609	}
610
611	/* XXX - Assume FreeBSD after the branding method change. */
612	if (brand_info == NULL)
613		brand_info = &freebsd_brand_info;
614
615	if (brand_info == NULL) {
616		uprintf("ELF binary type \"%u\" not known.\n",
617		    hdr->e_ident[EI_OSABI]);
618		error = ENOEXEC;
619		goto fail;
620	}
621
622	imgp->proc->p_sysent = brand_info->sysvec;
623	if (interp != NULL) {
624	        snprintf(path, sizeof(path), "%s%s",
625			 brand_info->emul_path, interp);
626		if ((error = elf_load_file(imgp->proc, path, &addr,
627					   &imgp->entry_addr)) != 0) {
628		        if ((error = elf_load_file(imgp->proc, interp, &addr,
629						   &imgp->entry_addr)) != 0) {
630			        uprintf("ELF interpreter %s not found\n", path);
631				goto fail;
632			}
633                }
634	}
635
636	/*
637	 * Construct auxargs table (used by the fixup routine)
638	 */
639	elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK);
640	elf_auxargs->execfd = -1;
641	elf_auxargs->phdr = proghdr;
642	elf_auxargs->phent = hdr->e_phentsize;
643	elf_auxargs->phnum = hdr->e_phnum;
644	elf_auxargs->pagesz = PAGE_SIZE;
645	elf_auxargs->base = addr;
646	elf_auxargs->flags = 0;
647	elf_auxargs->entry = entry;
648	elf_auxargs->trace = elf_trace;
649
650	imgp->auxargs = elf_auxargs;
651	imgp->interpreted = 0;
652
653fail:
654	return error;
655}
656
657static int
658elf_freebsd_fixup(register_t **stack_base, struct image_params *imgp)
659{
660	Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs;
661	register_t *pos;
662
663	pos = *stack_base + (imgp->argc + imgp->envc + 2);
664
665	if (args->trace) {
666		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
667	}
668	if (args->execfd != -1) {
669		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
670	}
671	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
672	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
673	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
674	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
675	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
676	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
677	AUXARGS_ENTRY(pos, AT_BASE, args->base);
678	AUXARGS_ENTRY(pos, AT_NULL, 0);
679
680	free(imgp->auxargs, M_TEMP);
681	imgp->auxargs = NULL;
682
683	(*stack_base)--;
684	suword(*stack_base, (long) imgp->argc);
685	return 0;
686}
687
688/*
689 * Code for generating ELF core dumps.
690 */
691
692typedef void (*segment_callback) __P((vm_map_entry_t, void *));
693
694/* Closure for cb_put_phdr(). */
695struct phdr_closure {
696	Elf_Phdr *phdr;		/* Program header to fill in */
697	Elf_Off offset;		/* Offset of segment in core file */
698};
699
700/* Closure for cb_size_segment(). */
701struct sseg_closure {
702	int count;		/* Count of writable segments. */
703	size_t size;		/* Total size of all writable segments. */
704};
705
706static void cb_put_phdr __P((vm_map_entry_t, void *));
707static void cb_size_segment __P((vm_map_entry_t, void *));
708static void each_writable_segment __P((struct proc *, segment_callback,
709    void *));
710static int elf_corehdr __P((struct proc *, struct vnode *, struct ucred *,
711    int, void *, size_t));
712static void elf_puthdr __P((struct proc *, void *, size_t *,
713    const prstatus_t *, const prfpregset_t *, const prpsinfo_t *, int));
714static void elf_putnote __P((void *, size_t *, const char *, int,
715    const void *, size_t));
716
717extern int osreldate;
718
719int
720elf_coredump(p, vp, limit)
721	register struct proc *p;
722	register struct vnode *vp;
723	off_t limit;
724{
725	register struct ucred *cred = p->p_ucred;
726	int error = 0;
727	struct sseg_closure seginfo;
728	void *hdr;
729	size_t hdrsize;
730
731	/* Size the program segments. */
732	seginfo.count = 0;
733	seginfo.size = 0;
734	each_writable_segment(p, cb_size_segment, &seginfo);
735
736	/*
737	 * Calculate the size of the core file header area by making
738	 * a dry run of generating it.  Nothing is written, but the
739	 * size is calculated.
740	 */
741	hdrsize = 0;
742	elf_puthdr((struct proc *)NULL, (void *)NULL, &hdrsize,
743	    (const prstatus_t *)NULL, (const prfpregset_t *)NULL,
744	    (const prpsinfo_t *)NULL, seginfo.count);
745
746	if (hdrsize + seginfo.size >= limit)
747		return (EFAULT);
748
749	/*
750	 * Allocate memory for building the header, fill it up,
751	 * and write it out.
752	 */
753	hdr = malloc(hdrsize, M_TEMP, M_WAITOK);
754	if (hdr == NULL) {
755		return EINVAL;
756	}
757	error = elf_corehdr(p, vp, cred, seginfo.count, hdr, hdrsize);
758
759	/* Write the contents of all of the writable segments. */
760	if (error == 0) {
761		Elf_Phdr *php;
762		off_t offset;
763		int i;
764
765		php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
766		offset = hdrsize;
767		for (i = 0;  i < seginfo.count;  i++) {
768			error = vn_rdwr(UIO_WRITE, vp, (caddr_t)php->p_vaddr,
769			    php->p_filesz, offset, UIO_USERSPACE,
770			    IO_NODELOCKED|IO_UNIT, cred, (int *)NULL, p);
771			if (error != 0)
772				break;
773			offset += php->p_filesz;
774			php++;
775		}
776	}
777	free(hdr, M_TEMP);
778
779	return error;
780}
781
782/*
783 * A callback for each_writable_segment() to write out the segment's
784 * program header entry.
785 */
786static void
787cb_put_phdr(entry, closure)
788	vm_map_entry_t entry;
789	void *closure;
790{
791	struct phdr_closure *phc = (struct phdr_closure *)closure;
792	Elf_Phdr *phdr = phc->phdr;
793
794	phc->offset = round_page(phc->offset);
795
796	phdr->p_type = PT_LOAD;
797	phdr->p_offset = phc->offset;
798	phdr->p_vaddr = entry->start;
799	phdr->p_paddr = 0;
800	phdr->p_filesz = phdr->p_memsz = entry->end - entry->start;
801	phdr->p_align = PAGE_SIZE;
802	phdr->p_flags = 0;
803	if (entry->protection & VM_PROT_READ)
804		phdr->p_flags |= PF_R;
805	if (entry->protection & VM_PROT_WRITE)
806		phdr->p_flags |= PF_W;
807	if (entry->protection & VM_PROT_EXECUTE)
808		phdr->p_flags |= PF_X;
809
810	phc->offset += phdr->p_filesz;
811	phc->phdr++;
812}
813
814/*
815 * A callback for each_writable_segment() to gather information about
816 * the number of segments and their total size.
817 */
818static void
819cb_size_segment(entry, closure)
820	vm_map_entry_t entry;
821	void *closure;
822{
823	struct sseg_closure *ssc = (struct sseg_closure *)closure;
824
825	ssc->count++;
826	ssc->size += entry->end - entry->start;
827}
828
829/*
830 * For each writable segment in the process's memory map, call the given
831 * function with a pointer to the map entry and some arbitrary
832 * caller-supplied data.
833 */
834static void
835each_writable_segment(p, func, closure)
836	struct proc *p;
837	segment_callback func;
838	void *closure;
839{
840	vm_map_t map = &p->p_vmspace->vm_map;
841	vm_map_entry_t entry;
842
843	for (entry = map->header.next;  entry != &map->header;
844	    entry = entry->next) {
845		vm_object_t obj;
846
847		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) ||
848		    (entry->protection & (VM_PROT_READ|VM_PROT_WRITE)) !=
849		    (VM_PROT_READ|VM_PROT_WRITE))
850			continue;
851
852		/*
853		** Dont include memory segment in the coredump if
854		** MAP_NOCORE is set in mmap(2) or MADV_NOCORE in
855		** madvise(2).
856		*/
857		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
858			continue;
859
860		if ((obj = entry->object.vm_object) == NULL)
861			continue;
862
863		/* Find the deepest backing object. */
864		while (obj->backing_object != NULL)
865			obj = obj->backing_object;
866
867		/* Ignore memory-mapped devices and such things. */
868		if (obj->type != OBJT_DEFAULT &&
869		    obj->type != OBJT_SWAP &&
870		    obj->type != OBJT_VNODE)
871			continue;
872
873		(*func)(entry, closure);
874	}
875}
876
877/*
878 * Write the core file header to the file, including padding up to
879 * the page boundary.
880 */
881static int
882elf_corehdr(p, vp, cred, numsegs, hdr, hdrsize)
883	struct proc *p;
884	struct vnode *vp;
885	struct ucred *cred;
886	int numsegs;
887	size_t hdrsize;
888	void *hdr;
889{
890	size_t off;
891	prstatus_t status;
892	prfpregset_t fpregset;
893	prpsinfo_t psinfo;
894
895	/* Gather the information for the header. */
896	bzero(&status, sizeof status);
897	status.pr_version = PRSTATUS_VERSION;
898	status.pr_statussz = sizeof(prstatus_t);
899	status.pr_gregsetsz = sizeof(gregset_t);
900	status.pr_fpregsetsz = sizeof(fpregset_t);
901	status.pr_osreldate = osreldate;
902	status.pr_cursig = p->p_sig;
903	status.pr_pid = p->p_pid;
904	fill_regs(p, &status.pr_reg);
905
906	fill_fpregs(p, &fpregset);
907
908	bzero(&psinfo, sizeof psinfo);
909	psinfo.pr_version = PRPSINFO_VERSION;
910	psinfo.pr_psinfosz = sizeof(prpsinfo_t);
911	strncpy(psinfo.pr_fname, p->p_comm, MAXCOMLEN);
912	/* XXX - We don't fill in the command line arguments properly yet. */
913	strncpy(psinfo.pr_psargs, p->p_comm, PRARGSZ);
914
915	/* Fill in the header. */
916	bzero(hdr, hdrsize);
917	off = 0;
918	elf_puthdr(p, hdr, &off, &status, &fpregset, &psinfo, numsegs);
919
920	/* Write it to the core file. */
921	return vn_rdwr(UIO_WRITE, vp, hdr, hdrsize, (off_t)0,
922	    UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, NULL, p);
923}
924
925static void
926elf_puthdr(struct proc *p, void *dst, size_t *off, const prstatus_t *status,
927    const prfpregset_t *fpregset, const prpsinfo_t *psinfo, int numsegs)
928{
929	size_t ehoff;
930	size_t phoff;
931	size_t noteoff;
932	size_t notesz;
933
934	ehoff = *off;
935	*off += sizeof(Elf_Ehdr);
936
937	phoff = *off;
938	*off += (numsegs + 1) * sizeof(Elf_Phdr);
939
940	noteoff = *off;
941	elf_putnote(dst, off, "FreeBSD", NT_PRSTATUS, status,
942	    sizeof *status);
943	elf_putnote(dst, off, "FreeBSD", NT_FPREGSET, fpregset,
944	    sizeof *fpregset);
945	elf_putnote(dst, off, "FreeBSD", NT_PRPSINFO, psinfo,
946	    sizeof *psinfo);
947	notesz = *off - noteoff;
948
949	/* Align up to a page boundary for the program segments. */
950	*off = round_page(*off);
951
952	if (dst != NULL) {
953		Elf_Ehdr *ehdr;
954		Elf_Phdr *phdr;
955		struct phdr_closure phc;
956
957		/*
958		 * Fill in the ELF header.
959		 */
960		ehdr = (Elf_Ehdr *)((char *)dst + ehoff);
961		ehdr->e_ident[EI_MAG0] = ELFMAG0;
962		ehdr->e_ident[EI_MAG1] = ELFMAG1;
963		ehdr->e_ident[EI_MAG2] = ELFMAG2;
964		ehdr->e_ident[EI_MAG3] = ELFMAG3;
965		ehdr->e_ident[EI_CLASS] = ELF_CLASS;
966		ehdr->e_ident[EI_DATA] = ELF_DATA;
967		ehdr->e_ident[EI_VERSION] = EV_CURRENT;
968		ehdr->e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
969		ehdr->e_ident[EI_ABIVERSION] = 0;
970		ehdr->e_ident[EI_PAD] = 0;
971		ehdr->e_type = ET_CORE;
972		ehdr->e_machine = ELF_ARCH;
973		ehdr->e_version = EV_CURRENT;
974		ehdr->e_entry = 0;
975		ehdr->e_phoff = phoff;
976		ehdr->e_flags = 0;
977		ehdr->e_ehsize = sizeof(Elf_Ehdr);
978		ehdr->e_phentsize = sizeof(Elf_Phdr);
979		ehdr->e_phnum = numsegs + 1;
980		ehdr->e_shentsize = sizeof(Elf_Shdr);
981		ehdr->e_shnum = 0;
982		ehdr->e_shstrndx = SHN_UNDEF;
983
984		/*
985		 * Fill in the program header entries.
986		 */
987		phdr = (Elf_Phdr *)((char *)dst + phoff);
988
989		/* The note segement. */
990		phdr->p_type = PT_NOTE;
991		phdr->p_offset = noteoff;
992		phdr->p_vaddr = 0;
993		phdr->p_paddr = 0;
994		phdr->p_filesz = notesz;
995		phdr->p_memsz = 0;
996		phdr->p_flags = 0;
997		phdr->p_align = 0;
998		phdr++;
999
1000		/* All the writable segments from the program. */
1001		phc.phdr = phdr;
1002		phc.offset = *off;
1003		each_writable_segment(p, cb_put_phdr, &phc);
1004	}
1005}
1006
1007static void
1008elf_putnote(void *dst, size_t *off, const char *name, int type,
1009    const void *desc, size_t descsz)
1010{
1011	Elf_Note note;
1012
1013	note.n_namesz = strlen(name) + 1;
1014	note.n_descsz = descsz;
1015	note.n_type = type;
1016	if (dst != NULL)
1017		bcopy(&note, (char *)dst + *off, sizeof note);
1018	*off += sizeof note;
1019	if (dst != NULL)
1020		bcopy(name, (char *)dst + *off, note.n_namesz);
1021	*off += roundup2(note.n_namesz, sizeof(Elf_Size));
1022	if (dst != NULL)
1023		bcopy(desc, (char *)dst + *off, note.n_descsz);
1024	*off += roundup2(note.n_descsz, sizeof(Elf_Size));
1025}
1026
1027/*
1028 * Tell kern_execve.c about it, with a little help from the linker.
1029 */
1030static struct execsw elf_execsw = {exec_elf_imgact, "ELF"};
1031EXEC_SET(elf, elf_execsw);
1032