imgact_elf.c revision 39311
1/*-
2 * Copyright (c) 1995-1996 S�ren Schmidt
3 * Copyright (c) 1996 Peter Wemm
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer
11 *    in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software withough specific prior written permission
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 *	$Id: imgact_elf.c,v 1.32 1998/09/15 21:46:34 jdp Exp $
30 */
31
32#include "opt_rlimit.h"
33
34#include <sys/param.h>
35#include <sys/acct.h>
36#include <sys/exec.h>
37#include <sys/fcntl.h>
38#include <sys/imgact.h>
39#include <sys/imgact_elf.h>
40#include <sys/kernel.h>
41#include <sys/malloc.h>
42#include <sys/mman.h>
43#include <sys/namei.h>
44#include <sys/pioctl.h>
45#include <sys/proc.h>
46#include <sys/procfs.h>
47#include <sys/resourcevar.h>
48#include <sys/signalvar.h>
49#include <sys/stat.h>
50#include <sys/syscall.h>
51#include <sys/sysctl.h>
52#include <sys/sysent.h>
53#include <sys/systm.h>
54#include <sys/vnode.h>
55
56#include <vm/vm.h>
57#include <vm/vm_kern.h>
58#include <vm/vm_param.h>
59#include <vm/pmap.h>
60#include <sys/lock.h>
61#include <vm/vm_map.h>
62#include <vm/vm_object.h>
63#include <vm/vm_prot.h>
64#include <vm/vm_extern.h>
65
66#include <machine/md_var.h>
67
68#define MAX_PHDR	32	/* XXX enough ? */
69
70#if ELF_TARG_CLASS == ELFCLASS32
71
72#define Elf_Ehdr	Elf32_Ehdr
73#define Elf_Phdr	Elf32_Phdr
74#define Elf_Auxargs	Elf32_Auxargs
75#define Elf_Brandinfo	Elf32_Brandinfo
76
77#else
78
79#define Elf_Ehdr	Elf64_Ehdr
80#define Elf_Phdr	Elf64_Phdr
81#define Elf_Auxargs	Elf64_Auxargs
82#define Elf_Brandinfo	Elf64_Brandinfo
83
84#endif
85
86
87static int elf_check_header __P((const Elf_Ehdr *hdr, int type));
88static int elf_freebsd_fixup __P((long **stack_base,
89    struct image_params *imgp));
90static int elf_load_file __P((struct proc *p, char *file, u_long *addr,
91    u_long *entry));
92static int elf_load_section __P((struct vmspace *vmspace, struct vnode *vp,
93    vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz,
94    vm_prot_t prot));
95static int exec_elf_imgact __P((struct image_params *imgp));
96
97static int elf_trace = 0;
98SYSCTL_INT(_debug, OID_AUTO, elf_trace, CTLFLAG_RW, &elf_trace, 0, "");
99#define UPRINTF if (elf_trace) uprintf
100
101static struct sysentvec elf_freebsd_sysvec = {
102        SYS_MAXSYSCALL,
103        sysent,
104        0,
105        0,
106        0,
107        0,
108        0,
109        0,
110        elf_freebsd_fixup,
111        sendsig,
112        sigcode,
113        &szsigcode,
114        0,
115	"FreeBSD ELF",
116	elf_coredump
117};
118
119static Elf_Brandinfo freebsd_brand_info = {
120						"FreeBSD",
121						"",
122						"/usr/libexec/ld-elf.so.1",
123						&elf_freebsd_sysvec
124					  };
125static Elf_Brandinfo *elf_brand_list[MAX_BRANDS] = {
126							&freebsd_brand_info,
127							NULL, NULL, NULL,
128							NULL, NULL, NULL, NULL
129						    };
130
131int
132elf_insert_brand_entry(Elf_Brandinfo *entry)
133{
134	int i;
135
136	for (i=1; i<MAX_BRANDS; i++) {
137		if (elf_brand_list[i] == NULL) {
138			elf_brand_list[i] = entry;
139			break;
140		}
141	}
142	if (i == MAX_BRANDS)
143		return -1;
144	return 0;
145}
146
147int
148elf_remove_brand_entry(Elf_Brandinfo *entry)
149{
150	int i;
151
152	for (i=1; i<MAX_BRANDS; i++) {
153		if (elf_brand_list[i] == entry) {
154			elf_brand_list[i] = NULL;
155			break;
156		}
157	}
158	if (i == MAX_BRANDS)
159		return -1;
160	return 0;
161}
162
163static int
164elf_check_header(const Elf_Ehdr *hdr, int type)
165{
166	if (!(hdr->e_ident[EI_MAG0] == ELFMAG0 &&
167	      hdr->e_ident[EI_MAG1] == ELFMAG1 &&
168	      hdr->e_ident[EI_MAG2] == ELFMAG2 &&
169	      hdr->e_ident[EI_MAG3] == ELFMAG3))
170		return ENOEXEC;
171
172#ifdef __i386__
173	if (hdr->e_machine != EM_386 && hdr->e_machine != EM_486)
174#endif
175#ifdef __alpha__
176	if (hdr->e_machine != EM_ALPHA)
177#endif
178		return ENOEXEC;
179
180
181	if (hdr->e_type != type)
182		return ENOEXEC;
183
184	return 0;
185}
186
187static int
188elf_load_section(struct vmspace *vmspace, struct vnode *vp, vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot)
189{
190	size_t map_len;
191	vm_offset_t map_addr;
192	int error;
193	unsigned char *data_buf = 0;
194	size_t copy_len;
195
196	map_addr = trunc_page(vmaddr);
197
198	if (memsz > filsz)
199		map_len = trunc_page(offset+filsz) - trunc_page(offset);
200	else
201		map_len = round_page(offset+filsz) - trunc_page(offset);
202
203	if (error = vm_mmap (&vmspace->vm_map,
204			     &map_addr,
205			     map_len,
206			     prot,
207			     VM_PROT_ALL,
208			     MAP_PRIVATE | MAP_FIXED,
209			     (caddr_t)vp,
210			     trunc_page(offset)))
211		return error;
212
213	if (memsz == filsz)
214		return 0;
215
216	/*
217	 * We have to map the remaining bit of the file into the kernel's
218	 * memory map, allocate some anonymous memory, and copy that last
219	 * bit into it. The remaining space should be .bss...
220	 */
221	copy_len = (offset + filsz) - trunc_page(offset + filsz);
222	map_addr = trunc_page(vmaddr + filsz);
223	map_len = round_page(vmaddr + memsz) - map_addr;
224
225        if (map_len != 0) {
226		if (error = vm_map_find(&vmspace->vm_map, NULL, 0,
227					&map_addr, map_len, FALSE,
228					VM_PROT_ALL, VM_PROT_ALL,0))
229			return error;
230	}
231
232	if (error = vm_mmap(exec_map,
233			    (vm_offset_t *)&data_buf,
234			    PAGE_SIZE,
235			    VM_PROT_READ,
236			    VM_PROT_READ,
237			    0,
238			    (caddr_t)vp,
239			    trunc_page(offset + filsz)))
240		return error;
241
242	error = copyout(data_buf, (caddr_t)map_addr, copy_len);
243
244        vm_map_remove(exec_map, (vm_offset_t)data_buf,
245		      (vm_offset_t)data_buf + PAGE_SIZE);
246
247	/*
248	 * set it to the specified protection
249	 */
250	vm_map_protect(&vmspace->vm_map, map_addr, map_addr + map_len,  prot,
251		       FALSE);
252
253	UPRINTF("bss size %d (%x)\n", map_len-copy_len, map_len-copy_len);
254	return error;
255}
256
257static int
258elf_load_file(struct proc *p, char *file, u_long *addr, u_long *entry)
259{
260	Elf_Ehdr *hdr = NULL;
261	Elf_Phdr *phdr = NULL;
262	struct nameidata nd;
263	struct vmspace *vmspace = p->p_vmspace;
264	struct vattr attr;
265	struct image_params image_params, *imgp;
266	vm_prot_t prot = 0;
267	unsigned long text_size = 0, data_size = 0;
268	unsigned long text_addr = 0, data_addr = 0;
269	int header_size = 0;
270        int error, i;
271
272	imgp = &image_params;
273	/*
274	 * Initialize part of the common data
275	 */
276	imgp->proc = p;
277	imgp->uap = NULL;
278	imgp->attr = &attr;
279	imgp->firstpage = NULL;
280	imgp->image_header = (char *)kmem_alloc_wait(exec_map, PAGE_SIZE);
281
282	if (imgp->image_header == NULL) {
283		nd.ni_vp = NULL;
284		error = ENOMEM;
285		goto fail;
286	}
287
288        NDINIT(&nd, LOOKUP, LOCKLEAF|FOLLOW, UIO_SYSSPACE, file, p);
289
290	if (error = namei(&nd)) {
291		nd.ni_vp = NULL;
292		goto fail;
293	}
294
295	imgp->vp = nd.ni_vp;
296
297	/*
298	 * Check permissions, modes, uid, etc on the file, and "open" it.
299	 */
300	error = exec_check_permissions(imgp);
301	if (error) {
302		VOP_UNLOCK(nd.ni_vp, 0, p);
303		goto fail;
304	}
305
306	error = exec_map_first_page(imgp);
307	VOP_UNLOCK(nd.ni_vp, 0, p);
308	if (error)
309                goto fail;
310
311	hdr = (Elf_Ehdr *)imgp->image_header;
312	if (error = elf_check_header(hdr, ET_DYN))
313		goto fail;
314
315	/*
316	 * ouch, need to bounds check in case user gives us a corrupted
317	 * file with an insane header size
318	 */
319	if (hdr->e_phnum > MAX_PHDR) {	/* XXX: ever more than this? */
320		error = ENOEXEC;
321		goto fail;
322	}
323
324	header_size = hdr->e_phentsize * hdr->e_phnum;
325
326	/* Only support headers that fit within first page for now */
327	if (header_size + hdr->e_phoff > PAGE_SIZE) {
328		error = ENOEXEC;
329		goto fail;
330	}
331
332	phdr = (Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
333
334	for (i = 0; i < hdr->e_phnum; i++) {
335		switch(phdr[i].p_type) {
336
337	   	case PT_NULL:	/* NULL section */
338	    		UPRINTF ("ELF(file) PT_NULL section\n");
339			break;
340		case PT_LOAD:	/* Loadable segment */
341		{
342	    		UPRINTF ("ELF(file) PT_LOAD section ");
343			if (phdr[i].p_flags & PF_X)
344  				prot |= VM_PROT_EXECUTE;
345			if (phdr[i].p_flags & PF_W)
346  				prot |= VM_PROT_WRITE;
347			if (phdr[i].p_flags & PF_R)
348  				prot |= VM_PROT_READ;
349
350			if (error = elf_load_section(vmspace, nd.ni_vp,
351  						     phdr[i].p_offset,
352  						     (caddr_t)phdr[i].p_vaddr +
353							(*addr),
354  						     phdr[i].p_memsz,
355  						     phdr[i].p_filesz, prot))
356				goto fail;
357
358			/*
359			 * Is this .text or .data ??
360			 *
361			 * We only handle one each of those yet XXX
362			 */
363			if (hdr->e_entry >= phdr[i].p_vaddr &&
364			hdr->e_entry <(phdr[i].p_vaddr+phdr[i].p_memsz)) {
365  				text_addr = trunc_page(phdr[i].p_vaddr+(*addr));
366  				text_size = round_page(phdr[i].p_memsz +
367						       phdr[i].p_vaddr -
368						       trunc_page(phdr[i].p_vaddr));
369				*entry=(unsigned long)hdr->e_entry+(*addr);
370	    			UPRINTF(".text <%08lx,%08lx> entry=%08lx\n",
371					text_addr, text_size, *entry);
372			} else {
373  				data_addr = trunc_page(phdr[i].p_vaddr+(*addr));
374  				data_size = round_page(phdr[i].p_memsz +
375						       phdr[i].p_vaddr -
376						       trunc_page(phdr[i].p_vaddr));
377	    			UPRINTF(".data <%08lx,%08lx>\n",
378					data_addr, data_size);
379			}
380		}
381		break;
382
383	   	case PT_DYNAMIC:/* Dynamic link information */
384	    		UPRINTF ("ELF(file) PT_DYNAMIC section\n");
385			break;
386	  	case PT_INTERP:	/* Path to interpreter */
387	    		UPRINTF ("ELF(file) PT_INTERP section\n");
388			break;
389	  	case PT_NOTE:	/* Note section */
390	    		UPRINTF ("ELF(file) PT_NOTE section\n");
391			break;
392	  	case PT_SHLIB:	/* Shared lib section  */
393	    		UPRINTF ("ELF(file) PT_SHLIB section\n");
394			break;
395		case PT_PHDR: 	/* Program header table info */
396	    		UPRINTF ("ELF(file) PT_PHDR section\n");
397			break;
398		default:
399	    		UPRINTF ("ELF(file) %d section ??\n", phdr[i].p_type );
400		}
401	}
402
403fail:
404	if (imgp->firstpage)
405		exec_unmap_first_page(imgp);
406	if (imgp->image_header)
407		kmem_free_wakeup(exec_map, (vm_offset_t)imgp->image_header,
408			PAGE_SIZE);
409	if (nd.ni_vp)
410		vrele(nd.ni_vp);
411
412	return error;
413}
414
415static int
416exec_elf_imgact(struct image_params *imgp)
417{
418	const Elf_Ehdr *hdr = (const Elf_Ehdr *) imgp->image_header;
419	const Elf_Phdr *phdr, *mapped_phdr = NULL;
420	Elf_Auxargs *elf_auxargs = NULL;
421	struct vmspace *vmspace;
422	vm_prot_t prot = 0;
423	u_long text_size = 0, data_size = 0;
424	u_long text_addr = 0, data_addr = 0;
425	u_long addr, entry = 0, proghdr = 0;
426	int error, i, header_size = 0;
427	const char *interp = NULL;
428	char *brand = NULL;
429	char path[MAXPATHLEN];
430
431	/*
432	 * Do we have a valid ELF header ?
433	 */
434	if (elf_check_header(hdr, ET_EXEC))
435		return -1;
436
437	/*
438	 * From here on down, we return an errno, not -1, as we've
439	 * detected an ELF file.
440	 */
441
442	/*
443	 * ouch, need to bounds check in case user gives us a corrupted
444	 * file with an insane header size
445	 */
446	if (hdr->e_phnum > MAX_PHDR) {	/* XXX: ever more than this? */
447		return ENOEXEC;
448	}
449
450	header_size = hdr->e_phentsize * hdr->e_phnum;
451
452	if ((hdr->e_phoff > PAGE_SIZE) ||
453	    (hdr->e_phoff + header_size) > PAGE_SIZE) {
454		/* Only support headers in first page for now */
455		return ENOEXEC;
456	} else {
457		phdr = (const Elf_Phdr*)
458		       ((const char *)imgp->image_header + hdr->e_phoff);
459	}
460
461	/*
462	 * From this point on, we may have resources that need to be freed.
463	 */
464	if (error = exec_extract_strings(imgp))
465		goto fail;
466
467	exec_new_vmspace(imgp);
468
469	vmspace = imgp->proc->p_vmspace;
470
471	for (i = 0; i < hdr->e_phnum; i++) {
472		switch(phdr[i].p_type) {
473
474	   	case PT_NULL:	/* NULL section */
475	    		UPRINTF ("ELF PT_NULL section\n");
476			break;
477		case PT_LOAD:	/* Loadable segment */
478		{
479	    		UPRINTF ("ELF PT_LOAD section ");
480			if (phdr[i].p_flags & PF_X)
481  				prot |= VM_PROT_EXECUTE;
482			if (phdr[i].p_flags & PF_W)
483  				prot |= VM_PROT_WRITE;
484			if (phdr[i].p_flags & PF_R)
485  				prot |= VM_PROT_READ;
486
487			if (error = elf_load_section(vmspace, imgp->vp,
488  						     phdr[i].p_offset,
489  						     (caddr_t)phdr[i].p_vaddr,
490  						     phdr[i].p_memsz,
491  						     phdr[i].p_filesz, prot))
492  				goto fail;
493
494			/*
495			 * Is this .text or .data ??
496			 *
497			 * We only handle one each of those yet XXX
498			 */
499			if (hdr->e_entry >= phdr[i].p_vaddr &&
500			hdr->e_entry <(phdr[i].p_vaddr+phdr[i].p_memsz)) {
501  				text_addr = trunc_page(phdr[i].p_vaddr);
502  				text_size = round_page(phdr[i].p_memsz +
503						       phdr[i].p_vaddr -
504						       text_addr);
505				entry = (u_long)hdr->e_entry;
506	    			UPRINTF(".text <%08lx,%08lx> entry=%08lx\n",
507					text_addr, text_size, entry);
508			} else {
509  				data_addr = trunc_page(phdr[i].p_vaddr);
510  				data_size = round_page(phdr[i].p_memsz +
511						       phdr[i].p_vaddr -
512						       data_addr);
513	    			UPRINTF(".data <%08lx,%08lx>\n",
514					data_addr, data_size);
515			}
516		}
517		break;
518
519	   	case PT_DYNAMIC:/* Dynamic link information */
520	    		UPRINTF ("ELF PT_DYNAMIC section ??\n");
521			break;
522	  	case PT_INTERP:	/* Path to interpreter */
523	    		UPRINTF ("ELF PT_INTERP section ");
524			if (phdr[i].p_filesz > MAXPATHLEN ||
525			    phdr[i].p_offset + phdr[i].p_filesz > PAGE_SIZE) {
526				error = ENOEXEC;
527				goto fail;
528			}
529			interp = imgp->image_header + phdr[i].p_offset;
530			UPRINTF("<%s>\n", interp);
531			break;
532	  	case PT_NOTE:	/* Note section */
533	    		UPRINTF ("ELF PT_NOTE section\n");
534			break;
535	  	case PT_SHLIB:	/* Shared lib section  */
536	    		UPRINTF ("ELF PT_SHLIB section\n");
537			break;
538		case PT_PHDR: 	/* Program header table info */
539	    		UPRINTF ("ELF PT_PHDR section <%x>\n", phdr[i].p_vaddr);
540			proghdr = phdr[i].p_vaddr;
541			break;
542		default:
543	    		UPRINTF ("ELF %d section ??\n", phdr[i].p_type);
544		}
545	}
546
547	vmspace->vm_tsize = text_size >> PAGE_SHIFT;
548	vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
549	vmspace->vm_dsize = data_size >> PAGE_SHIFT;
550	vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr;
551
552	addr = 2L*MAXDSIZ; /* May depend on OS type XXX */
553
554	imgp->entry_addr = entry;
555
556	/*
557	 * So which kind (brand) of ELF binary do we have at hand
558	 * FreeBSD, Linux, SVR4 or something else ??
559	 * If its has a interpreter section try that first
560	 */
561        if (interp) {
562                for (i=0; i<MAX_BRANDS; i++) {
563                        if (elf_brand_list[i] != NULL) {
564                                if (!strcmp(interp, elf_brand_list[i]->interp_path)) {
565                                        imgp->proc->p_sysent =
566                                                elf_brand_list[i]->sysvec;
567                                        strcpy(path, elf_brand_list[i]->emul_path);
568                                        strcat(path, elf_brand_list[i]->interp_path);
569                                        UPRINTF("interpreter=<%s> %s\n",
570                                                elf_brand_list[i]->interp_path,
571                                                elf_brand_list[i]->emul_path);
572                                        break;
573                                }
574                        }
575                }
576        }
577
578	/*
579	 * If there is no interpreter, or recognition of it
580	 * failed, se if the binary is branded.
581	 */
582	if (!interp || i == MAX_BRANDS) {
583		brand = (char *)&(hdr->e_ident[EI_BRAND]);
584		for (i=0; i<MAX_BRANDS; i++) {
585			if (elf_brand_list[i] != NULL) {
586				if (!strcmp(brand, elf_brand_list[i]->brand)) {
587					imgp->proc->p_sysent = elf_brand_list[i]->sysvec;
588					if (interp) {
589						strcpy(path, elf_brand_list[i]->emul_path);
590						strcat(path, elf_brand_list[i]->interp_path);
591						UPRINTF("interpreter=<%s> %s\n",
592						elf_brand_list[i]->interp_path,
593						elf_brand_list[i]->emul_path);
594					}
595					break;
596				}
597			}
598		}
599	}
600	if (i == MAX_BRANDS) {
601#ifndef __alpha__
602		uprintf("ELF binary type not known\n");
603		error = ENOEXEC;
604		goto fail;
605#else
606		i = 0;		/* assume freebsd */
607		imgp->proc->p_sysent = elf_brand_list[i]->sysvec;
608		if (interp) {
609			strcpy(path, elf_brand_list[i]->emul_path);
610			strcat(path, elf_brand_list[i]->interp_path);
611			UPRINTF("interpreter=<%s> %s\n",
612				elf_brand_list[i]->interp_path,
613				elf_brand_list[i]->emul_path);
614		}
615#endif
616	}
617	if (interp) {
618                if (error = elf_load_file(imgp->proc,
619                                          path,
620                                          &addr,        /* XXX */
621                                          &imgp->entry_addr)) {
622                        uprintf("ELF interpreter %s not found\n", path);
623                        goto fail;
624                }
625	}
626
627	UPRINTF("Executing %s binary\n", elf_brand_list[i]->brand);
628
629	/*
630	 * Construct auxargs table (used by the fixup routine)
631	 */
632	elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK);
633	elf_auxargs->execfd = -1;
634	elf_auxargs->phdr = proghdr;
635	elf_auxargs->phent = hdr->e_phentsize;
636	elf_auxargs->phnum = hdr->e_phnum;
637	elf_auxargs->pagesz = PAGE_SIZE;
638	elf_auxargs->base = addr;
639	elf_auxargs->flags = 0;
640	elf_auxargs->entry = entry;
641	elf_auxargs->trace = elf_trace;
642
643	imgp->auxargs = elf_auxargs;
644	imgp->interpreted = 0;
645
646	/* don't allow modifying the file while we run it */
647	imgp->vp->v_flag |= VTEXT;
648
649fail:
650	return error;
651}
652
653static int
654elf_freebsd_fixup(long **stack_base, struct image_params *imgp)
655{
656	Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs;
657	long *pos;
658
659	pos = *stack_base + (imgp->argc + imgp->envc + 2);
660
661	if (args->trace) {
662		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
663	}
664	if (args->execfd != -1) {
665		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
666	}
667	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
668	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
669	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
670	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
671	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
672	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
673	AUXARGS_ENTRY(pos, AT_BASE, args->base);
674	AUXARGS_ENTRY(pos, AT_NULL, 0);
675
676	free(imgp->auxargs, M_TEMP);
677	imgp->auxargs = NULL;
678
679	(*stack_base)--;
680	**stack_base = (long)imgp->argc;
681	return 0;
682}
683
684/*
685 * Code for generating ELF core dumps.
686 */
687
688typedef void (*segment_callback) __P((vm_map_entry_t, void *));
689
690/* Closure for cb_put_phdr(). */
691struct phdr_closure {
692	Elf_Phdr *phdr;		/* Program header to fill in */
693	Elf_Off offset;		/* Offset of segment in core file */
694};
695
696/* Closure for cb_size_segment(). */
697struct sseg_closure {
698	int count;		/* Count of writable segments. */
699	size_t size;		/* Total size of all writable segments. */
700};
701
702/* Closure for cb_write_segment(). */
703struct wseg_closure {
704	struct proc *p;
705	struct vnode *vp;
706	struct ucred *cred;
707	off_t offset;		/* Position in file at which to write. */
708	int error;
709};
710
711static void cb_put_phdr __P((vm_map_entry_t, void *));
712static void cb_size_segment __P((vm_map_entry_t, void *));
713static void cb_write_segment __P((vm_map_entry_t, void *));
714static void each_writable_segment __P((struct proc *, segment_callback,
715    void *));
716static int elf_corehdr __P((struct proc *, struct vnode *, struct ucred *,
717    int, size_t));
718static void elf_puthdr __P((struct proc *, void *, size_t *,
719    const prstatus_t *, const prfpregset_t *, const prpsinfo_t *, int));
720static void elf_putnote __P((void *, size_t *, const char *, int,
721    const void *, size_t));
722
723extern int osreldate;
724
725int
726elf_coredump(p)
727	register struct proc *p;
728{
729	register struct vnode *vp;
730	register struct ucred *cred = p->p_cred->pc_ucred;
731	register struct vmspace *vm = p->p_vmspace;
732	struct nameidata nd;
733	struct vattr vattr;
734	int error, error1;
735	char *name;			/* name of corefile */
736	struct sseg_closure seginfo;
737	size_t hdrsize;
738
739	STOPEVENT(p, S_CORE, 0);
740
741	if (sugid_coredump == 0 && p->p_flag & P_SUGID)
742		return (EFAULT);
743
744	/* Size the program segments. */
745	seginfo.count = 0;
746	seginfo.size = 0;
747	each_writable_segment(p, cb_size_segment, &seginfo);
748
749	/*
750	 * Calculate the size of the core file header area by making
751	 * a dry run of generating it.  Nothing is written, but the
752	 * size is calculated.
753	 */
754	hdrsize = 0;
755	elf_puthdr((struct proc *)NULL, (void *)NULL, &hdrsize,
756	    (const prstatus_t *)NULL, (const prfpregset_t *)NULL,
757	    (const prpsinfo_t *)NULL, seginfo.count);
758
759	if (hdrsize + seginfo.size >= p->p_rlimit[RLIMIT_CORE].rlim_cur)
760		return (EFAULT);
761	name = expand_name(p->p_comm, p->p_ucred->cr_uid, p->p_pid);
762	if (name == NULL)
763		return (EFAULT);	/* XXX -- not the best error */
764
765	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, name, p);
766	error = vn_open(&nd, O_CREAT | FWRITE, S_IRUSR | S_IWUSR);
767	free(name, M_TEMP);
768	if (error)
769		return (error);
770	vp = nd.ni_vp;
771
772	/* Don't dump to non-regular files or files with links. */
773	if (vp->v_type != VREG ||
774	    VOP_GETATTR(vp, &vattr, cred, p) || vattr.va_nlink != 1) {
775		error = EFAULT;
776		goto out;
777	}
778	VATTR_NULL(&vattr);
779	vattr.va_size = 0;
780	VOP_LEASE(vp, p, cred, LEASE_WRITE);
781	VOP_SETATTR(vp, &vattr, cred, p);
782	p->p_acflag |= ACORE;
783	error = elf_corehdr(p, vp, cred, seginfo.count, hdrsize);
784	if (error == 0) {
785		struct wseg_closure wsc;
786
787		wsc.p = p;
788		wsc.vp = vp;
789		wsc.cred = cred;
790		wsc.offset = hdrsize;
791		wsc.error = 0;
792
793		each_writable_segment(p, cb_write_segment, &wsc);
794		error = wsc.error;
795	}
796
797out:
798	VOP_UNLOCK(vp, 0, p);
799	error1 = vn_close(vp, FWRITE, cred, p);
800	if (error == 0)
801		error = error1;
802	return (error);
803}
804
805/*
806 * A callback for each_writable_segment() to write out the segment's
807 * program header entry.
808 */
809static void
810cb_put_phdr(entry, closure)
811	vm_map_entry_t entry;
812	void *closure;
813{
814	struct phdr_closure *phc = (struct phdr_closure *)closure;
815	Elf_Phdr *phdr = phc->phdr;
816
817	phc->offset = round_page(phc->offset);
818
819	phdr->p_type = PT_LOAD;
820	phdr->p_offset = phc->offset;
821	phdr->p_vaddr = entry->start;
822	phdr->p_paddr = 0;
823	phdr->p_filesz = phdr->p_memsz = entry->end - entry->start;
824	phdr->p_align = PAGE_SIZE;
825	phdr->p_flags = 0;
826	if (entry->protection & VM_PROT_READ)
827		phdr->p_flags |= PF_R;
828	if (entry->protection & VM_PROT_WRITE)
829		phdr->p_flags |= PF_W;
830	if (entry->protection & VM_PROT_EXECUTE)
831		phdr->p_flags |= PF_X;
832
833	phc->offset += phdr->p_filesz;
834	phc->phdr++;
835}
836
837/*
838 * A callback for each_writable_segment() to gather information about
839 * the number of segments and their total size.
840 */
841static void
842cb_size_segment(entry, closure)
843	vm_map_entry_t entry;
844	void *closure;
845{
846	struct sseg_closure *ssc = (struct sseg_closure *)closure;
847
848	ssc->count++;
849	ssc->size += entry->end - entry->start;
850}
851
852/*
853 * A callback for each_writable_segment() to write out the segment contents.
854 */
855static void
856cb_write_segment(entry, closure)
857	vm_map_entry_t entry;
858	void *closure;
859{
860	struct wseg_closure *wsc = (struct wseg_closure *)closure;
861
862	if (wsc->error == 0) {
863		wsc->error = vn_rdwr(UIO_WRITE, wsc->vp, (caddr_t)entry->start,
864		    entry->end - entry->start, wsc->offset, UIO_USERSPACE,
865		    IO_NODELOCKED|IO_UNIT, wsc->cred, (int *)NULL, wsc->p);
866		if (wsc->error == 0)
867			wsc->offset += entry->end - entry->start;
868	}
869}
870
871/*
872 * For each writable segment in the process's memory map, call the given
873 * function with a pointer to the map entry and some arbitrary
874 * caller-supplied data.
875 */
876static void
877each_writable_segment(p, func, closure)
878	struct proc *p;
879	segment_callback func;
880	void *closure;
881{
882	vm_map_t map = &p->p_vmspace->vm_map;
883	vm_map_entry_t entry;
884
885	if (map != &curproc->p_vmspace->vm_map)
886		vm_map_lock_read(map);
887
888	for (entry = map->header.next;  entry != &map->header;
889	    entry = entry->next) {
890		vm_object_t obj;
891		vm_object_t backobj;
892
893		if (entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP) ||
894		    (entry->protection & (VM_PROT_READ|VM_PROT_WRITE)) !=
895		    (VM_PROT_READ|VM_PROT_WRITE))
896			continue;
897
898		/* Find the deepest backing object. */
899		backobj = obj = entry->object.vm_object;
900		if (backobj != NULL)
901			while (backobj->backing_object != NULL)
902				backobj = backobj->backing_object;
903
904		/* Ignore memory-mapped devices and such things. */
905		if (backobj->type != OBJT_DEFAULT &&
906		    backobj->type != OBJT_SWAP &&
907		    backobj->type != OBJT_VNODE)
908			continue;
909
910		(*func)(entry, closure);
911	}
912
913	if (map != &curproc->p_vmspace->vm_map)
914		vm_map_unlock_read(map);
915}
916
917/*
918 * Write the core file header to the file, including padding up to
919 * the page boundary.
920 */
921static int
922elf_corehdr(p, vp, cred, numsegs, hdrsize)
923	struct proc *p;
924	struct vnode *vp;
925	struct ucred *cred;
926	int numsegs;
927	size_t hdrsize;
928{
929	struct vmspace *vm = p->p_vmspace;
930	size_t off;
931	prstatus_t status;
932	prfpregset_t fpregset;
933	prpsinfo_t psinfo;
934	void *hdr;
935	int error;
936
937	/* Gather the information for the header. */
938	bzero(&status, sizeof status);
939	status.pr_version = PRSTATUS_VERSION;
940	status.pr_statussz = sizeof(prstatus_t);
941	status.pr_gregsetsz = sizeof(gregset_t);
942	status.pr_fpregsetsz = sizeof(fpregset_t);
943	status.pr_osreldate = osreldate;
944	status.pr_cursig = p->p_sigacts->ps_sig;
945	status.pr_pid = p->p_pid;
946	fill_regs(p, &status.pr_reg);
947
948	fill_fpregs(p, &fpregset);
949
950	bzero(&psinfo, sizeof psinfo);
951	psinfo.pr_version = PRPSINFO_VERSION;
952	psinfo.pr_psinfosz = sizeof(prpsinfo_t);
953	strncpy(psinfo.pr_fname, p->p_comm, MAXCOMLEN);
954	/* XXX - We don't fill in the command line arguments properly yet. */
955	strncpy(psinfo.pr_psargs, p->p_comm, PRARGSZ);
956
957	/* Allocate memory for building the header. */
958	hdr = malloc(hdrsize, M_TEMP, M_WAITOK);
959	if (hdr == NULL)
960		return EINVAL;
961	bzero(hdr, hdrsize);
962
963	/* Fill in the header. */
964	off = 0;
965	elf_puthdr(p, hdr, &off, &status, &fpregset, &psinfo, numsegs);
966
967	/* Write it to the core file. */
968	error = vn_rdwr(UIO_WRITE, vp, hdr, hdrsize, (off_t)0,
969	    UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, NULL, p);
970
971	free(hdr, M_TEMP);
972	return error;
973}
974
975static void
976elf_puthdr(struct proc *p, void *dst, size_t *off, const prstatus_t *status,
977    const prfpregset_t *fpregset, const prpsinfo_t *psinfo, int numsegs)
978{
979	size_t ehoff;
980	size_t phoff;
981	size_t noteoff;
982	size_t notesz;
983
984	ehoff = *off;
985	*off += sizeof(Elf_Ehdr);
986
987	phoff = *off;
988	*off += (numsegs + 1) * sizeof(Elf_Phdr);
989
990	noteoff = *off;
991	elf_putnote(dst, off, "FreeBSD", NT_PRSTATUS, status,
992	    sizeof *status);
993	elf_putnote(dst, off, "FreeBSD", NT_FPREGSET, fpregset,
994	    sizeof *fpregset);
995	elf_putnote(dst, off, "FreeBSD", NT_PRPSINFO, psinfo,
996	    sizeof *psinfo);
997	notesz = *off - noteoff;
998
999	/* Align up to a page boundary for the program segments. */
1000	*off = round_page(*off);
1001
1002	if (dst != NULL) {
1003		Elf_Ehdr *ehdr;
1004		Elf_Phdr *phdr;
1005		struct phdr_closure phc;
1006
1007		/*
1008		 * Fill in the ELF header.
1009		 */
1010		ehdr = (Elf_Ehdr *)((char *)dst + ehoff);
1011		ehdr->e_ident[EI_MAG0] = ELFMAG0;
1012		ehdr->e_ident[EI_MAG1] = ELFMAG1;
1013		ehdr->e_ident[EI_MAG2] = ELFMAG2;
1014		ehdr->e_ident[EI_MAG3] = ELFMAG3;
1015		ehdr->e_ident[EI_CLASS] = ELF_CLASS;
1016		ehdr->e_ident[EI_DATA] = ELF_DATA;
1017		ehdr->e_ident[EI_VERSION] = EV_CURRENT;
1018		ehdr->e_ident[EI_PAD] = 0;
1019		strncpy(ehdr->e_ident + EI_BRAND, "FreeBSD",
1020		    EI_NIDENT - EI_BRAND);
1021		ehdr->e_type = ET_CORE;
1022		ehdr->e_machine = ELF_ARCH;
1023		ehdr->e_version = EV_CURRENT;
1024		ehdr->e_entry = 0;
1025		ehdr->e_phoff = phoff;
1026		ehdr->e_flags = 0;
1027		ehdr->e_ehsize = sizeof(Elf_Ehdr);
1028		ehdr->e_phentsize = sizeof(Elf_Phdr);
1029		ehdr->e_phnum = numsegs + 1;
1030		ehdr->e_shentsize = sizeof(Elf_Shdr);
1031		ehdr->e_shnum = 0;
1032		ehdr->e_shstrndx = SHN_UNDEF;
1033
1034		/*
1035		 * Fill in the program header entries.
1036		 */
1037		phdr = (Elf_Phdr *)((char *)dst + phoff);
1038
1039		/* The note segement. */
1040		phdr->p_type = PT_NOTE;
1041		phdr->p_offset = noteoff;
1042		phdr->p_vaddr = 0;
1043		phdr->p_paddr = 0;
1044		phdr->p_filesz = notesz;
1045		phdr->p_memsz = 0;
1046		phdr->p_flags = 0;
1047		phdr->p_align = 0;
1048		phdr++;
1049
1050		/* All the writable segments from the program. */
1051		phc.phdr = phdr;
1052		phc.offset = *off;
1053		each_writable_segment(p, cb_put_phdr, &phc);
1054	}
1055}
1056
1057static void
1058elf_putnote(void *dst, size_t *off, const char *name, int type,
1059    const void *desc, size_t descsz)
1060{
1061	Elf_Note note;
1062
1063	note.n_namesz = strlen(name) + 1;
1064	note.n_descsz = descsz;
1065	note.n_type = type;
1066	if (dst != NULL)
1067		bcopy(&note, (char *)dst + *off, sizeof note);
1068	*off += sizeof note;
1069	if (dst != NULL)
1070		bcopy(name, (char *)dst + *off, note.n_namesz);
1071	*off += roundup2(note.n_namesz, sizeof(Elf_Size));
1072	if (dst != NULL)
1073		bcopy(desc, (char *)dst + *off, note.n_descsz);
1074	*off += roundup2(note.n_descsz, sizeof(Elf_Size));
1075}
1076
1077/*
1078 * Tell kern_execve.c about it, with a little help from the linker.
1079 * Since `const' objects end up in the text segment, TEXT_SET is the
1080 * correct directive to use.
1081 */
1082static const struct execsw elf_execsw = {exec_elf_imgact, "ELF"};
1083TEXT_SET(execsw_set, elf_execsw);
1084
1085