imgact_elf.c revision 46803
1/*-
2 * Copyright (c) 1995-1996 S�ren Schmidt
3 * Copyright (c) 1996 Peter Wemm
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer
11 *    in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software withough specific prior written permission
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 *	$Id: imgact_elf.c,v 1.55 1999/02/20 23:52:34 jdp Exp $
30 */
31
32#include "opt_rlimit.h"
33
34#include <sys/param.h>
35#include <sys/acct.h>
36#include <sys/exec.h>
37#include <sys/fcntl.h>
38#include <sys/imgact.h>
39#include <sys/imgact_elf.h>
40#include <sys/kernel.h>
41#include <sys/malloc.h>
42#include <sys/mman.h>
43#include <sys/namei.h>
44#include <sys/pioctl.h>
45#include <sys/proc.h>
46#include <sys/procfs.h>
47#include <sys/resourcevar.h>
48#include <sys/signalvar.h>
49#include <sys/stat.h>
50#include <sys/syscall.h>
51#include <sys/sysctl.h>
52#include <sys/sysent.h>
53#include <sys/systm.h>
54#include <sys/vnode.h>
55
56#include <vm/vm.h>
57#include <vm/vm_kern.h>
58#include <vm/vm_param.h>
59#include <vm/pmap.h>
60#include <sys/lock.h>
61#include <vm/vm_map.h>
62#include <vm/vm_object.h>
63#include <vm/vm_prot.h>
64#include <vm/vm_extern.h>
65
66#include <machine/elf.h>
67#include <machine/md_var.h>
68
69__ElfType(Brandinfo);
70__ElfType(Auxargs);
71
72static int elf_check_header __P((const Elf_Ehdr *hdr));
73static int elf_freebsd_fixup __P((long **stack_base,
74    struct image_params *imgp));
75static int elf_load_file __P((struct proc *p, char *file, u_long *addr,
76    u_long *entry));
77static int elf_load_section __P((struct proc *p,
78    struct vmspace *vmspace, struct vnode *vp,
79    vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz,
80    vm_prot_t prot));
81static int exec_elf_imgact __P((struct image_params *imgp));
82
83static int elf_trace = 0;
84SYSCTL_INT(_debug, OID_AUTO, elf_trace, CTLFLAG_RW, &elf_trace, 0, "");
85
86/*
87 * XXX Maximum length of an ELF brand (sysctl wants a statically-allocated
88 * buffer).
89 */
90#define	MAXBRANDLEN	16
91
92static struct sysentvec elf_freebsd_sysvec = {
93        SYS_MAXSYSCALL,
94        sysent,
95        0,
96        0,
97        0,
98        0,
99        0,
100        0,
101        elf_freebsd_fixup,
102        sendsig,
103        sigcode,
104        &szsigcode,
105        0,
106	"FreeBSD ELF",
107	elf_coredump
108};
109
110static Elf_Brandinfo freebsd_brand_info = {
111						"FreeBSD",
112						"",
113						"/usr/libexec/ld-elf.so.1",
114						&elf_freebsd_sysvec
115					  };
116static Elf_Brandinfo *elf_brand_list[MAX_BRANDS] = {
117							&freebsd_brand_info,
118							NULL, NULL, NULL,
119							NULL, NULL, NULL, NULL
120						    };
121
122int
123elf_insert_brand_entry(Elf_Brandinfo *entry)
124{
125	int i;
126
127	for (i=1; i<MAX_BRANDS; i++) {
128		if (elf_brand_list[i] == NULL) {
129			elf_brand_list[i] = entry;
130			break;
131		}
132	}
133	if (i == MAX_BRANDS)
134		return -1;
135	return 0;
136}
137
138int
139elf_remove_brand_entry(Elf_Brandinfo *entry)
140{
141	int i;
142
143	for (i=1; i<MAX_BRANDS; i++) {
144		if (elf_brand_list[i] == entry) {
145			elf_brand_list[i] = NULL;
146			break;
147		}
148	}
149	if (i == MAX_BRANDS)
150		return -1;
151	return 0;
152}
153
154int
155elf_brand_inuse(Elf_Brandinfo *entry)
156{
157	struct proc *p;
158
159	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
160		if (p->p_sysent == entry->sysvec)
161			return TRUE;
162	}
163
164	return FALSE;
165}
166
167static int
168elf_check_header(const Elf_Ehdr *hdr)
169{
170	if (!IS_ELF(*hdr) ||
171	    hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
172	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
173	    hdr->e_ident[EI_VERSION] != EV_CURRENT)
174		return ENOEXEC;
175
176	if (!ELF_MACHINE_OK(hdr->e_machine))
177		return ENOEXEC;
178
179	if (hdr->e_version != ELF_TARG_VER)
180		return ENOEXEC;
181
182	return 0;
183}
184
185static int
186elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot)
187{
188	size_t map_len;
189	vm_offset_t map_addr;
190	int error, rv;
191	size_t copy_len;
192	vm_object_t object;
193	vm_offset_t file_addr;
194	vm_offset_t data_buf = 0;
195
196	object = vp->v_object;
197	error = 0;
198
199	map_addr = trunc_page((vm_offset_t)vmaddr);
200	file_addr = trunc_page(offset);
201
202	/*
203	 * We have two choices.  We can either clear the data in the last page
204	 * of an oversized mapping, or we can start the anon mapping a page
205	 * early and copy the initialized data into that first page.  We
206	 * choose the second..
207	 */
208	if (memsz > filsz)
209		map_len = trunc_page(offset+filsz) - file_addr;
210	else
211		map_len = round_page(offset+filsz) - file_addr;
212
213	if (map_len != 0) {
214		vm_object_reference(object);
215		vm_map_lock(&vmspace->vm_map);
216		rv = vm_map_insert(&vmspace->vm_map,
217				      object,
218				      file_addr,	/* file offset */
219				      map_addr,		/* virtual start */
220				      map_addr + map_len,/* virtual end */
221				      prot,
222				      VM_PROT_ALL,
223				      MAP_COPY_NEEDED | MAP_COPY_ON_WRITE);
224		vm_map_unlock(&vmspace->vm_map);
225		if (rv != KERN_SUCCESS)
226			return EINVAL;
227
228		/* prefault the page tables */
229		pmap_object_init_pt(vmspace_pmap(vmspace),
230				    map_addr,
231				    object,
232				    (vm_pindex_t) OFF_TO_IDX(file_addr),
233				    map_len,
234				    0);
235
236		/* we can stop now if we've covered it all */
237		if (memsz == filsz)
238			return 0;
239	}
240
241
242	/*
243	 * We have to get the remaining bit of the file into the first part
244	 * of the oversized map segment.  This is normally because the .data
245	 * segment in the file is extended to provide bss.  It's a neat idea
246	 * to try and save a page, but it's a pain in the behind to implement.
247	 */
248	copy_len = (offset + filsz) - trunc_page(offset + filsz);
249	map_addr = trunc_page((vm_offset_t)vmaddr + filsz);
250	map_len = round_page((vm_offset_t)vmaddr + memsz) - map_addr;
251
252	/* This had damn well better be true! */
253        if (map_len != 0) {
254		vm_map_lock(&vmspace->vm_map);
255		rv = vm_map_insert(&vmspace->vm_map, NULL, 0,
256					map_addr, map_addr + map_len,
257					VM_PROT_ALL, VM_PROT_ALL, 0);
258		vm_map_unlock(&vmspace->vm_map);
259		if (rv != KERN_SUCCESS)
260			return EINVAL;
261	}
262
263	if (copy_len != 0) {
264		vm_object_reference(object);
265		rv = vm_map_find(exec_map,
266				 object,
267				 trunc_page(offset + filsz),
268				 &data_buf,
269				 PAGE_SIZE,
270				 TRUE,
271				 VM_PROT_READ,
272				 VM_PROT_ALL,
273				 MAP_COPY_ON_WRITE | MAP_COPY_NEEDED);
274		if (rv != KERN_SUCCESS) {
275			vm_object_deallocate(object);
276			return EINVAL;
277		}
278		pmap_object_init_pt(exec_map->pmap, data_buf, object,
279			(vm_pindex_t) OFF_TO_IDX(trunc_page(offset + filsz)),
280			PAGE_SIZE, 1);
281
282		/* send the page fragment to user space */
283		error = copyout((caddr_t)data_buf, (caddr_t)map_addr, copy_len);
284		vm_map_remove(exec_map, data_buf, data_buf + PAGE_SIZE);
285		if (error)
286			return (error);
287	}
288
289	/*
290	 * set it to the specified protection
291	 */
292	vm_map_protect(&vmspace->vm_map, map_addr, map_addr + map_len,  prot,
293		       FALSE);
294
295	return error;
296}
297
298/*
299 * Load the file "file" into memory.  It may be either a shared object
300 * or an executable.
301 *
302 * The "addr" reference parameter is in/out.  On entry, it specifies
303 * the address where a shared object should be loaded.  If the file is
304 * an executable, this value is ignored.  On exit, "addr" specifies
305 * where the file was actually loaded.
306 *
307 * The "entry" reference parameter is out only.  On exit, it specifies
308 * the entry point for the loaded file.
309 */
310static int
311elf_load_file(struct proc *p, char *file, u_long *addr, u_long *entry)
312{
313	const Elf_Ehdr *hdr = NULL;
314	const Elf_Phdr *phdr = NULL;
315	struct nameidata nd;
316	struct vmspace *vmspace = p->p_vmspace;
317	struct vattr attr;
318	struct image_params image_params, *imgp;
319	vm_prot_t prot;
320	u_long rbase;
321	u_long base_addr = 0;
322	int error, i, numsegs;
323
324	imgp = &image_params;
325	/*
326	 * Initialize part of the common data
327	 */
328	imgp->proc = p;
329	imgp->uap = NULL;
330	imgp->attr = &attr;
331	imgp->firstpage = NULL;
332	imgp->image_header = (char *)kmem_alloc_wait(exec_map, PAGE_SIZE);
333
334	if (imgp->image_header == NULL) {
335		nd.ni_vp = NULL;
336		error = ENOMEM;
337		goto fail;
338	}
339
340        NDINIT(&nd, LOOKUP, LOCKLEAF|FOLLOW, UIO_SYSSPACE, file, p);
341
342	if ((error = namei(&nd)) != 0) {
343		nd.ni_vp = NULL;
344		goto fail;
345	}
346
347	imgp->vp = nd.ni_vp;
348
349	/*
350	 * Check permissions, modes, uid, etc on the file, and "open" it.
351	 */
352	error = exec_check_permissions(imgp);
353	if (error) {
354		VOP_UNLOCK(nd.ni_vp, 0, p);
355		goto fail;
356	}
357
358	error = exec_map_first_page(imgp);
359	VOP_UNLOCK(nd.ni_vp, 0, p);
360	if (error)
361                goto fail;
362
363	hdr = (const Elf_Ehdr *)imgp->image_header;
364	if ((error = elf_check_header(hdr)) != 0)
365		goto fail;
366	if (hdr->e_type == ET_DYN)
367		rbase = *addr;
368	else if (hdr->e_type == ET_EXEC)
369		rbase = 0;
370	else {
371		error = ENOEXEC;
372		goto fail;
373	}
374
375	/* Only support headers that fit within first page for now */
376	if ((hdr->e_phoff > PAGE_SIZE) ||
377	    (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) {
378		error = ENOEXEC;
379		goto fail;
380	}
381
382	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
383
384	for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) {
385		if (phdr[i].p_type == PT_LOAD) {	/* Loadable segment */
386			prot = 0;
387			if (phdr[i].p_flags & PF_X)
388  				prot |= VM_PROT_EXECUTE;
389			if (phdr[i].p_flags & PF_W)
390  				prot |= VM_PROT_WRITE;
391			if (phdr[i].p_flags & PF_R)
392  				prot |= VM_PROT_READ;
393
394			if ((error = elf_load_section(p, vmspace, nd.ni_vp,
395  						     phdr[i].p_offset,
396  						     (caddr_t)phdr[i].p_vaddr +
397							rbase,
398  						     phdr[i].p_memsz,
399  						     phdr[i].p_filesz, prot)) != 0)
400				goto fail;
401			/*
402			 * Establish the base address if this is the
403			 * first segment.
404			 */
405			if (numsegs == 0)
406  				base_addr = trunc_page(phdr[i].p_vaddr + rbase);
407			numsegs++;
408		}
409	}
410	*addr = base_addr;
411	*entry=(unsigned long)hdr->e_entry + rbase;
412
413fail:
414	if (imgp->firstpage)
415		exec_unmap_first_page(imgp);
416	if (imgp->image_header)
417		kmem_free_wakeup(exec_map, (vm_offset_t)imgp->image_header,
418			PAGE_SIZE);
419	if (nd.ni_vp)
420		vrele(nd.ni_vp);
421
422	return error;
423}
424
425static char fallback_elf_brand[MAXBRANDLEN+1] = { "none" };
426SYSCTL_STRING(_kern, OID_AUTO, fallback_elf_brand, CTLFLAG_RW,
427		fallback_elf_brand, sizeof(fallback_elf_brand),
428		"ELF brand of last resort");
429
430static int
431exec_elf_imgact(struct image_params *imgp)
432{
433	const Elf_Ehdr *hdr = (const Elf_Ehdr *) imgp->image_header;
434	const Elf_Phdr *phdr;
435	Elf_Auxargs *elf_auxargs = NULL;
436	struct vmspace *vmspace;
437	vm_prot_t prot;
438	u_long text_size = 0, data_size = 0;
439	u_long text_addr = 0, data_addr = 0;
440	u_long addr, entry = 0, proghdr = 0;
441	int error, i;
442	const char *interp = NULL;
443	Elf_Brandinfo *brand_info;
444	const char *brand;
445	char path[MAXPATHLEN];
446
447	/*
448	 * Do we have a valid ELF header ?
449	 */
450	if (elf_check_header(hdr) != 0 || hdr->e_type != ET_EXEC)
451		return -1;
452
453	/*
454	 * From here on down, we return an errno, not -1, as we've
455	 * detected an ELF file.
456	 */
457
458	if ((hdr->e_phoff > PAGE_SIZE) ||
459	    (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) {
460		/* Only support headers in first page for now */
461		return ENOEXEC;
462	}
463	phdr = (const Elf_Phdr*)(imgp->image_header + hdr->e_phoff);
464
465	/*
466	 * From this point on, we may have resources that need to be freed.
467	 */
468	if ((error = exec_extract_strings(imgp)) != 0)
469		goto fail;
470
471	exec_new_vmspace(imgp);
472
473	vmspace = imgp->proc->p_vmspace;
474
475	for (i = 0; i < hdr->e_phnum; i++) {
476		switch(phdr[i].p_type) {
477
478		case PT_LOAD:	/* Loadable segment */
479			prot = 0;
480			if (phdr[i].p_flags & PF_X)
481  				prot |= VM_PROT_EXECUTE;
482			if (phdr[i].p_flags & PF_W)
483  				prot |= VM_PROT_WRITE;
484			if (phdr[i].p_flags & PF_R)
485  				prot |= VM_PROT_READ;
486
487			if ((error = elf_load_section(imgp->proc,
488						     vmspace, imgp->vp,
489  						     phdr[i].p_offset,
490  						     (caddr_t)phdr[i].p_vaddr,
491  						     phdr[i].p_memsz,
492  						     phdr[i].p_filesz, prot)) != 0)
493  				goto fail;
494
495			/*
496			 * Is this .text or .data ??
497			 *
498			 * We only handle one each of those yet XXX
499			 */
500			if (hdr->e_entry >= phdr[i].p_vaddr &&
501			hdr->e_entry <(phdr[i].p_vaddr+phdr[i].p_memsz)) {
502  				text_addr = trunc_page(phdr[i].p_vaddr);
503  				text_size = round_page(phdr[i].p_memsz +
504						       phdr[i].p_vaddr -
505						       text_addr);
506				entry = (u_long)hdr->e_entry;
507			} else {
508  				data_addr = trunc_page(phdr[i].p_vaddr);
509  				data_size = round_page(phdr[i].p_memsz +
510						       phdr[i].p_vaddr -
511						       data_addr);
512			}
513			break;
514	  	case PT_INTERP:	/* Path to interpreter */
515			if (phdr[i].p_filesz > MAXPATHLEN ||
516			    phdr[i].p_offset + phdr[i].p_filesz > PAGE_SIZE) {
517				error = ENOEXEC;
518				goto fail;
519			}
520			interp = imgp->image_header + phdr[i].p_offset;
521			break;
522		case PT_PHDR: 	/* Program header table info */
523			proghdr = phdr[i].p_vaddr;
524			break;
525		default:
526			break;
527		}
528	}
529
530	vmspace->vm_tsize = text_size >> PAGE_SHIFT;
531	vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
532	vmspace->vm_dsize = data_size >> PAGE_SHIFT;
533	vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr;
534
535	addr = ELF_RTLD_ADDR(vmspace);
536
537	imgp->entry_addr = entry;
538
539	/* If the executable has a brand, search for it in the brand list. */
540	brand_info = NULL;
541	brand = (const char *)&hdr->e_ident[EI_BRAND];
542	if (brand[0] != '\0') {
543		for (i = 0;  i < MAX_BRANDS;  i++) {
544			Elf_Brandinfo *bi = elf_brand_list[i];
545
546			if (bi != NULL && strcmp(brand, bi->brand) == 0) {
547				brand_info = bi;
548				break;
549			}
550		}
551	}
552
553	/* Lacking a known brand, search for a recognized interpreter. */
554	if (brand_info == NULL && interp != NULL) {
555		for (i = 0;  i < MAX_BRANDS;  i++) {
556			Elf_Brandinfo *bi = elf_brand_list[i];
557
558			if (bi != NULL &&
559			    strcmp(interp, bi->interp_path) == 0) {
560				brand_info = bi;
561				break;
562			}
563		}
564	}
565
566	/* Lacking a recognized interpreter, try the default brand */
567	if (brand_info == NULL && fallback_elf_brand[0] != '\0') {
568		for (i = 0; i < MAX_BRANDS; i++) {
569			Elf_Brandinfo *bi = elf_brand_list[i];
570
571			if (bi != NULL
572			    && strcmp(fallback_elf_brand, bi->brand) == 0) {
573				brand_info = bi;
574				break;
575			}
576		}
577	}
578
579#ifdef __alpha__
580	/* XXX - Assume FreeBSD on the alpha. */
581	if (brand_info == NULL)
582		brand_info = &freebsd_brand_info;
583#endif
584
585	if (brand_info == NULL) {
586		if (brand[0] == 0)
587			uprintf("ELF binary type not known."
588			    "  Use \"brandelf\" to brand it.\n");
589		else
590			uprintf("ELF binary type \"%.*s\" not known.\n",
591			    EI_NIDENT - EI_BRAND, brand);
592		error = ENOEXEC;
593		goto fail;
594	}
595
596	imgp->proc->p_sysent = brand_info->sysvec;
597	if (interp != NULL) {
598		snprintf(path, sizeof(path), "%s%s",
599		    brand_info->emul_path, interp);
600                if ((error = elf_load_file(imgp->proc, path, &addr,
601		    &imgp->entry_addr)) != 0) {
602                        uprintf("ELF interpreter %s not found\n", path);
603                        goto fail;
604                }
605	}
606
607	/*
608	 * Construct auxargs table (used by the fixup routine)
609	 */
610	elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK);
611	elf_auxargs->execfd = -1;
612	elf_auxargs->phdr = proghdr;
613	elf_auxargs->phent = hdr->e_phentsize;
614	elf_auxargs->phnum = hdr->e_phnum;
615	elf_auxargs->pagesz = PAGE_SIZE;
616	elf_auxargs->base = addr;
617	elf_auxargs->flags = 0;
618	elf_auxargs->entry = entry;
619	elf_auxargs->trace = elf_trace;
620
621	imgp->auxargs = elf_auxargs;
622	imgp->interpreted = 0;
623
624	/* don't allow modifying the file while we run it */
625	imgp->vp->v_flag |= VTEXT;
626
627fail:
628	return error;
629}
630
631static int
632elf_freebsd_fixup(long **stack_base, struct image_params *imgp)
633{
634	Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs;
635	long *pos;
636
637	pos = *stack_base + (imgp->argc + imgp->envc + 2);
638
639	if (args->trace) {
640		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
641	}
642	if (args->execfd != -1) {
643		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
644	}
645	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
646	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
647	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
648	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
649	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
650	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
651	AUXARGS_ENTRY(pos, AT_BASE, args->base);
652	AUXARGS_ENTRY(pos, AT_NULL, 0);
653
654	free(imgp->auxargs, M_TEMP);
655	imgp->auxargs = NULL;
656
657	(*stack_base)--;
658	suword(*stack_base, (long) imgp->argc);
659	return 0;
660}
661
662/*
663 * Code for generating ELF core dumps.
664 */
665
666typedef void (*segment_callback) __P((vm_map_entry_t, void *));
667
668/* Closure for cb_put_phdr(). */
669struct phdr_closure {
670	Elf_Phdr *phdr;		/* Program header to fill in */
671	Elf_Off offset;		/* Offset of segment in core file */
672};
673
674/* Closure for cb_size_segment(). */
675struct sseg_closure {
676	int count;		/* Count of writable segments. */
677	size_t size;		/* Total size of all writable segments. */
678};
679
680static void cb_put_phdr __P((vm_map_entry_t, void *));
681static void cb_size_segment __P((vm_map_entry_t, void *));
682static void each_writable_segment __P((struct proc *, segment_callback,
683    void *));
684static int elf_corehdr __P((struct proc *, struct vnode *, struct ucred *,
685    int, void *, size_t));
686static void elf_puthdr __P((struct proc *, void *, size_t *,
687    const prstatus_t *, const prfpregset_t *, const prpsinfo_t *, int));
688static void elf_putnote __P((void *, size_t *, const char *, int,
689    const void *, size_t));
690
691extern int osreldate;
692
693int
694elf_coredump(p)
695	register struct proc *p;
696{
697	register struct vnode *vp;
698	register struct ucred *cred = p->p_cred->pc_ucred;
699	struct nameidata nd;
700	struct vattr vattr;
701	int error, error1;
702	char *name;			/* name of corefile */
703	struct sseg_closure seginfo;
704	void *hdr;
705	size_t hdrsize;
706
707	STOPEVENT(p, S_CORE, 0);
708
709	if (sugid_coredump == 0 && p->p_flag & P_SUGID)
710		return (EFAULT);
711
712	/* Size the program segments. */
713	seginfo.count = 0;
714	seginfo.size = 0;
715	each_writable_segment(p, cb_size_segment, &seginfo);
716
717	/*
718	 * Calculate the size of the core file header area by making
719	 * a dry run of generating it.  Nothing is written, but the
720	 * size is calculated.
721	 */
722	hdrsize = 0;
723	elf_puthdr((struct proc *)NULL, (void *)NULL, &hdrsize,
724	    (const prstatus_t *)NULL, (const prfpregset_t *)NULL,
725	    (const prpsinfo_t *)NULL, seginfo.count);
726
727	if (hdrsize + seginfo.size >= p->p_rlimit[RLIMIT_CORE].rlim_cur)
728		return (EFAULT);
729	name = expand_name(p->p_comm, p->p_ucred->cr_uid, p->p_pid);
730	if (name == NULL)
731		return (EFAULT);	/* XXX -- not the best error */
732
733	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, name, p);
734	error = vn_open(&nd, O_CREAT | FWRITE, S_IRUSR | S_IWUSR);
735	free(name, M_TEMP);
736	if (error)
737		return (error);
738	vp = nd.ni_vp;
739
740	/* Don't dump to non-regular files or files with links. */
741	if (vp->v_type != VREG ||
742	    VOP_GETATTR(vp, &vattr, cred, p) || vattr.va_nlink != 1) {
743		error = EFAULT;
744		goto out;
745	}
746	VATTR_NULL(&vattr);
747	vattr.va_size = 0;
748	VOP_LEASE(vp, p, cred, LEASE_WRITE);
749	VOP_SETATTR(vp, &vattr, cred, p);
750	p->p_acflag |= ACORE;
751
752
753	/*
754	 * Allocate memory for building the header, fill it up,
755	 * and write it out.
756	 */
757	hdr = malloc(hdrsize, M_TEMP, M_WAITOK);
758	if (hdr == NULL) {
759		error = EINVAL;
760		goto out;
761	}
762	error = elf_corehdr(p, vp, cred, seginfo.count, hdr, hdrsize);
763
764	/* Write the contents of all of the writable segments. */
765	if (error == 0) {
766		Elf_Phdr *php;
767		off_t offset;
768		int i;
769
770		php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
771		offset = hdrsize;
772		for (i = 0;  i < seginfo.count;  i++) {
773			error = vn_rdwr(UIO_WRITE, vp, (caddr_t)php->p_vaddr,
774			    php->p_filesz, offset, UIO_USERSPACE,
775			    IO_NODELOCKED|IO_UNIT, cred, (int *)NULL, p);
776			if (error != 0)
777				break;
778			offset += php->p_filesz;
779			php++;
780		}
781	}
782	free(hdr, M_TEMP);
783
784out:
785	VOP_UNLOCK(vp, 0, p);
786	error1 = vn_close(vp, FWRITE, cred, p);
787	if (error == 0)
788		error = error1;
789	return (error);
790}
791
792/*
793 * A callback for each_writable_segment() to write out the segment's
794 * program header entry.
795 */
796static void
797cb_put_phdr(entry, closure)
798	vm_map_entry_t entry;
799	void *closure;
800{
801	struct phdr_closure *phc = (struct phdr_closure *)closure;
802	Elf_Phdr *phdr = phc->phdr;
803
804	phc->offset = round_page(phc->offset);
805
806	phdr->p_type = PT_LOAD;
807	phdr->p_offset = phc->offset;
808	phdr->p_vaddr = entry->start;
809	phdr->p_paddr = 0;
810	phdr->p_filesz = phdr->p_memsz = entry->end - entry->start;
811	phdr->p_align = PAGE_SIZE;
812	phdr->p_flags = 0;
813	if (entry->protection & VM_PROT_READ)
814		phdr->p_flags |= PF_R;
815	if (entry->protection & VM_PROT_WRITE)
816		phdr->p_flags |= PF_W;
817	if (entry->protection & VM_PROT_EXECUTE)
818		phdr->p_flags |= PF_X;
819
820	phc->offset += phdr->p_filesz;
821	phc->phdr++;
822}
823
824/*
825 * A callback for each_writable_segment() to gather information about
826 * the number of segments and their total size.
827 */
828static void
829cb_size_segment(entry, closure)
830	vm_map_entry_t entry;
831	void *closure;
832{
833	struct sseg_closure *ssc = (struct sseg_closure *)closure;
834
835	ssc->count++;
836	ssc->size += entry->end - entry->start;
837}
838
839/*
840 * For each writable segment in the process's memory map, call the given
841 * function with a pointer to the map entry and some arbitrary
842 * caller-supplied data.
843 */
844static void
845each_writable_segment(p, func, closure)
846	struct proc *p;
847	segment_callback func;
848	void *closure;
849{
850	vm_map_t map = &p->p_vmspace->vm_map;
851	vm_map_entry_t entry;
852
853	for (entry = map->header.next;  entry != &map->header;
854	    entry = entry->next) {
855		vm_object_t obj;
856
857		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) ||
858		    (entry->protection & (VM_PROT_READ|VM_PROT_WRITE)) !=
859		    (VM_PROT_READ|VM_PROT_WRITE))
860			continue;
861
862		if ((obj = entry->object.vm_object) == NULL)
863			continue;
864
865		/* Find the deepest backing object. */
866		while (obj->backing_object != NULL)
867			obj = obj->backing_object;
868
869		/* Ignore memory-mapped devices and such things. */
870		if (obj->type != OBJT_DEFAULT &&
871		    obj->type != OBJT_SWAP &&
872		    obj->type != OBJT_VNODE)
873			continue;
874
875		(*func)(entry, closure);
876	}
877}
878
879/*
880 * Write the core file header to the file, including padding up to
881 * the page boundary.
882 */
883static int
884elf_corehdr(p, vp, cred, numsegs, hdr, hdrsize)
885	struct proc *p;
886	struct vnode *vp;
887	struct ucred *cred;
888	int numsegs;
889	size_t hdrsize;
890	void *hdr;
891{
892	size_t off;
893	prstatus_t status;
894	prfpregset_t fpregset;
895	prpsinfo_t psinfo;
896
897	/* Gather the information for the header. */
898	bzero(&status, sizeof status);
899	status.pr_version = PRSTATUS_VERSION;
900	status.pr_statussz = sizeof(prstatus_t);
901	status.pr_gregsetsz = sizeof(gregset_t);
902	status.pr_fpregsetsz = sizeof(fpregset_t);
903	status.pr_osreldate = osreldate;
904	status.pr_cursig = p->p_sig;
905	status.pr_pid = p->p_pid;
906	fill_regs(p, &status.pr_reg);
907
908	fill_fpregs(p, &fpregset);
909
910	bzero(&psinfo, sizeof psinfo);
911	psinfo.pr_version = PRPSINFO_VERSION;
912	psinfo.pr_psinfosz = sizeof(prpsinfo_t);
913	strncpy(psinfo.pr_fname, p->p_comm, MAXCOMLEN);
914	/* XXX - We don't fill in the command line arguments properly yet. */
915	strncpy(psinfo.pr_psargs, p->p_comm, PRARGSZ);
916
917	/* Fill in the header. */
918	bzero(hdr, hdrsize);
919	off = 0;
920	elf_puthdr(p, hdr, &off, &status, &fpregset, &psinfo, numsegs);
921
922	/* Write it to the core file. */
923	return vn_rdwr(UIO_WRITE, vp, hdr, hdrsize, (off_t)0,
924	    UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, NULL, p);
925}
926
927static void
928elf_puthdr(struct proc *p, void *dst, size_t *off, const prstatus_t *status,
929    const prfpregset_t *fpregset, const prpsinfo_t *psinfo, int numsegs)
930{
931	size_t ehoff;
932	size_t phoff;
933	size_t noteoff;
934	size_t notesz;
935
936	ehoff = *off;
937	*off += sizeof(Elf_Ehdr);
938
939	phoff = *off;
940	*off += (numsegs + 1) * sizeof(Elf_Phdr);
941
942	noteoff = *off;
943	elf_putnote(dst, off, "FreeBSD", NT_PRSTATUS, status,
944	    sizeof *status);
945	elf_putnote(dst, off, "FreeBSD", NT_FPREGSET, fpregset,
946	    sizeof *fpregset);
947	elf_putnote(dst, off, "FreeBSD", NT_PRPSINFO, psinfo,
948	    sizeof *psinfo);
949	notesz = *off - noteoff;
950
951	/* Align up to a page boundary for the program segments. */
952	*off = round_page(*off);
953
954	if (dst != NULL) {
955		Elf_Ehdr *ehdr;
956		Elf_Phdr *phdr;
957		struct phdr_closure phc;
958
959		/*
960		 * Fill in the ELF header.
961		 */
962		ehdr = (Elf_Ehdr *)((char *)dst + ehoff);
963		ehdr->e_ident[EI_MAG0] = ELFMAG0;
964		ehdr->e_ident[EI_MAG1] = ELFMAG1;
965		ehdr->e_ident[EI_MAG2] = ELFMAG2;
966		ehdr->e_ident[EI_MAG3] = ELFMAG3;
967		ehdr->e_ident[EI_CLASS] = ELF_CLASS;
968		ehdr->e_ident[EI_DATA] = ELF_DATA;
969		ehdr->e_ident[EI_VERSION] = EV_CURRENT;
970		ehdr->e_ident[EI_PAD] = 0;
971		strncpy(ehdr->e_ident + EI_BRAND, "FreeBSD",
972		    EI_NIDENT - EI_BRAND);
973		ehdr->e_type = ET_CORE;
974		ehdr->e_machine = ELF_ARCH;
975		ehdr->e_version = EV_CURRENT;
976		ehdr->e_entry = 0;
977		ehdr->e_phoff = phoff;
978		ehdr->e_flags = 0;
979		ehdr->e_ehsize = sizeof(Elf_Ehdr);
980		ehdr->e_phentsize = sizeof(Elf_Phdr);
981		ehdr->e_phnum = numsegs + 1;
982		ehdr->e_shentsize = sizeof(Elf_Shdr);
983		ehdr->e_shnum = 0;
984		ehdr->e_shstrndx = SHN_UNDEF;
985
986		/*
987		 * Fill in the program header entries.
988		 */
989		phdr = (Elf_Phdr *)((char *)dst + phoff);
990
991		/* The note segement. */
992		phdr->p_type = PT_NOTE;
993		phdr->p_offset = noteoff;
994		phdr->p_vaddr = 0;
995		phdr->p_paddr = 0;
996		phdr->p_filesz = notesz;
997		phdr->p_memsz = 0;
998		phdr->p_flags = 0;
999		phdr->p_align = 0;
1000		phdr++;
1001
1002		/* All the writable segments from the program. */
1003		phc.phdr = phdr;
1004		phc.offset = *off;
1005		each_writable_segment(p, cb_put_phdr, &phc);
1006	}
1007}
1008
1009static void
1010elf_putnote(void *dst, size_t *off, const char *name, int type,
1011    const void *desc, size_t descsz)
1012{
1013	Elf_Note note;
1014
1015	note.n_namesz = strlen(name) + 1;
1016	note.n_descsz = descsz;
1017	note.n_type = type;
1018	if (dst != NULL)
1019		bcopy(&note, (char *)dst + *off, sizeof note);
1020	*off += sizeof note;
1021	if (dst != NULL)
1022		bcopy(name, (char *)dst + *off, note.n_namesz);
1023	*off += roundup2(note.n_namesz, sizeof(Elf_Size));
1024	if (dst != NULL)
1025		bcopy(desc, (char *)dst + *off, note.n_descsz);
1026	*off += roundup2(note.n_descsz, sizeof(Elf_Size));
1027}
1028
1029/*
1030 * Tell kern_execve.c about it, with a little help from the linker.
1031 */
1032static struct execsw elf_execsw = {exec_elf_imgact, "ELF"};
1033EXEC_SET(elf, elf_execsw);
1034