imgact_elf.c revision 43596
1/*-
2 * Copyright (c) 1995-1996 S�ren Schmidt
3 * Copyright (c) 1996 Peter Wemm
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer
11 *    in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software withough specific prior written permission
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 *	$Id: imgact_elf.c,v 1.47 1999/01/29 22:59:43 dillon Exp $
30 */
31
32#include "opt_rlimit.h"
33
34#include <sys/param.h>
35#include <sys/acct.h>
36#include <sys/exec.h>
37#include <sys/fcntl.h>
38#include <sys/imgact.h>
39#include <sys/imgact_elf.h>
40#include <sys/kernel.h>
41#include <sys/malloc.h>
42#include <sys/mman.h>
43#include <sys/namei.h>
44#include <sys/pioctl.h>
45#include <sys/proc.h>
46#include <sys/procfs.h>
47#include <sys/resourcevar.h>
48#include <sys/signalvar.h>
49#include <sys/stat.h>
50#include <sys/syscall.h>
51#include <sys/sysctl.h>
52#include <sys/sysent.h>
53#include <sys/systm.h>
54#include <sys/vnode.h>
55
56#include <vm/vm.h>
57#include <vm/vm_kern.h>
58#include <vm/vm_param.h>
59#include <vm/pmap.h>
60#include <sys/lock.h>
61#include <vm/vm_map.h>
62#include <vm/vm_object.h>
63#include <vm/vm_prot.h>
64#include <vm/vm_extern.h>
65
66#include <machine/elf.h>
67#include <machine/md_var.h>
68
69__ElfType(Brandinfo);
70__ElfType(Auxargs);
71
72static int elf_check_header __P((const Elf_Ehdr *hdr, int type));
73static int elf_freebsd_fixup __P((long **stack_base,
74    struct image_params *imgp));
75static int elf_load_file __P((struct proc *p, char *file, u_long *addr,
76    u_long *entry));
77static int elf_load_section __P((struct proc *p,
78    struct vmspace *vmspace, struct vnode *vp,
79    vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz,
80    vm_prot_t prot));
81static int exec_elf_imgact __P((struct image_params *imgp));
82
83static int elf_trace = 0;
84SYSCTL_INT(_debug, OID_AUTO, elf_trace, CTLFLAG_RW, &elf_trace, 0, "");
85
86static struct sysentvec elf_freebsd_sysvec = {
87        SYS_MAXSYSCALL,
88        sysent,
89        0,
90        0,
91        0,
92        0,
93        0,
94        0,
95        elf_freebsd_fixup,
96        sendsig,
97        sigcode,
98        &szsigcode,
99        0,
100	"FreeBSD ELF",
101	elf_coredump
102};
103
104static Elf_Brandinfo freebsd_brand_info = {
105						"FreeBSD",
106						"",
107						"/usr/libexec/ld-elf.so.1",
108						&elf_freebsd_sysvec
109					  };
110static Elf_Brandinfo *elf_brand_list[MAX_BRANDS] = {
111							&freebsd_brand_info,
112							NULL, NULL, NULL,
113							NULL, NULL, NULL, NULL
114						    };
115
116int
117elf_insert_brand_entry(Elf_Brandinfo *entry)
118{
119	int i;
120
121	for (i=1; i<MAX_BRANDS; i++) {
122		if (elf_brand_list[i] == NULL) {
123			elf_brand_list[i] = entry;
124			break;
125		}
126	}
127	if (i == MAX_BRANDS)
128		return -1;
129	return 0;
130}
131
132int
133elf_remove_brand_entry(Elf_Brandinfo *entry)
134{
135	int i;
136
137	for (i=1; i<MAX_BRANDS; i++) {
138		if (elf_brand_list[i] == entry) {
139			elf_brand_list[i] = NULL;
140			break;
141		}
142	}
143	if (i == MAX_BRANDS)
144		return -1;
145	return 0;
146}
147
148int
149elf_brand_inuse(Elf_Brandinfo *entry)
150{
151	struct proc *p;
152
153	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
154		if (p->p_sysent == entry->sysvec)
155			return TRUE;
156	}
157
158	return FALSE;
159}
160
161static int
162elf_check_header(const Elf_Ehdr *hdr, int type)
163{
164	if (!IS_ELF(*hdr) ||
165	    hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
166	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
167	    hdr->e_ident[EI_VERSION] != EV_CURRENT)
168		return ENOEXEC;
169
170	if (!ELF_MACHINE_OK(hdr->e_machine))
171		return ENOEXEC;
172
173	if (hdr->e_type != type || hdr->e_version != ELF_TARG_VER)
174		return ENOEXEC;
175
176	return 0;
177}
178
179static int
180elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot)
181{
182	size_t map_len;
183	vm_offset_t map_addr;
184	int error, rv;
185	size_t copy_len;
186	vm_object_t object;
187	vm_offset_t file_addr;
188	vm_offset_t data_buf = 0;
189
190	object = vp->v_object;
191	error = 0;
192
193	map_addr = trunc_page((vm_offset_t)vmaddr);
194	file_addr = trunc_page(offset);
195
196	/*
197	 * We have two choices.  We can either clear the data in the last page
198	 * of an oversized mapping, or we can start the anon mapping a page
199	 * early and copy the initialized data into that first page.  We
200	 * choose the second..
201	 */
202	if (memsz > filsz)
203		map_len = trunc_page(offset+filsz) - file_addr;
204	else
205		map_len = round_page(offset+filsz) - file_addr;
206
207	if (map_len != 0) {
208		vm_object_reference(object);
209		vm_map_lock(&vmspace->vm_map);
210		rv = vm_map_insert(&vmspace->vm_map,
211				      object,
212				      file_addr,	/* file offset */
213				      map_addr,		/* virtual start */
214				      map_addr + map_len,/* virtual end */
215				      prot,
216				      VM_PROT_ALL,
217				      MAP_COPY_NEEDED | MAP_COPY_ON_WRITE);
218		vm_map_unlock(&vmspace->vm_map);
219		if (rv != KERN_SUCCESS)
220			return EINVAL;
221
222		/* prefault the page tables */
223		pmap_object_init_pt(&vmspace->vm_pmap,
224				    map_addr,
225				    object,
226				    (vm_pindex_t) OFF_TO_IDX(file_addr),
227				    map_len,
228				    0);
229
230		/* we can stop now if we've covered it all */
231		if (memsz == filsz)
232			return 0;
233	}
234
235
236	/*
237	 * We have to get the remaining bit of the file into the first part
238	 * of the oversized map segment.  This is normally because the .data
239	 * segment in the file is extended to provide bss.  It's a neat idea
240	 * to try and save a page, but it's a pain in the behind to implement.
241	 */
242	copy_len = (offset + filsz) - trunc_page(offset + filsz);
243	map_addr = trunc_page((vm_offset_t)vmaddr + filsz);
244	map_len = round_page((vm_offset_t)vmaddr + memsz) - map_addr;
245
246	/* This had damn well better be true! */
247        if (map_len != 0) {
248		vm_map_lock(&vmspace->vm_map);
249		rv = vm_map_insert(&vmspace->vm_map, NULL, 0,
250					map_addr, map_addr + map_len,
251					VM_PROT_ALL, VM_PROT_ALL, 0);
252		vm_map_unlock(&vmspace->vm_map);
253		if (rv != KERN_SUCCESS)
254			return EINVAL;
255	}
256
257	if (copy_len != 0) {
258		vm_object_reference(object);
259		rv = vm_map_find(exec_map,
260				 object,
261				 trunc_page(offset + filsz),
262				 &data_buf,
263				 PAGE_SIZE,
264				 TRUE,
265				 VM_PROT_READ,
266				 VM_PROT_ALL,
267				 MAP_COPY_ON_WRITE | MAP_COPY_NEEDED);
268		if (rv != KERN_SUCCESS) {
269			vm_object_deallocate(object);
270			return EINVAL;
271		}
272		pmap_object_init_pt(exec_map->pmap, data_buf, object,
273			(vm_pindex_t) OFF_TO_IDX(trunc_page(offset + filsz)),
274			PAGE_SIZE, 1);
275
276		/* send the page fragment to user space */
277		error = copyout((caddr_t)data_buf, (caddr_t)map_addr, copy_len);
278		vm_map_remove(exec_map, data_buf, data_buf + PAGE_SIZE);
279		if (error)
280			return (error);
281	}
282
283	/*
284	 * set it to the specified protection
285	 */
286	vm_map_protect(&vmspace->vm_map, map_addr, map_addr + map_len,  prot,
287		       FALSE);
288
289	return error;
290}
291
292static int
293elf_load_file(struct proc *p, char *file, u_long *addr, u_long *entry)
294{
295	const Elf_Ehdr *hdr = NULL;
296	const Elf_Phdr *phdr = NULL;
297	struct nameidata nd;
298	struct vmspace *vmspace = p->p_vmspace;
299	struct vattr attr;
300	struct image_params image_params, *imgp;
301	vm_prot_t prot;
302	unsigned long text_size = 0, data_size = 0;
303	unsigned long text_addr = 0, data_addr = 0;
304        int error, i;
305
306	imgp = &image_params;
307	/*
308	 * Initialize part of the common data
309	 */
310	imgp->proc = p;
311	imgp->uap = NULL;
312	imgp->attr = &attr;
313	imgp->firstpage = NULL;
314	imgp->image_header = (char *)kmem_alloc_wait(exec_map, PAGE_SIZE);
315
316	if (imgp->image_header == NULL) {
317		nd.ni_vp = NULL;
318		error = ENOMEM;
319		goto fail;
320	}
321
322        NDINIT(&nd, LOOKUP, LOCKLEAF|FOLLOW, UIO_SYSSPACE, file, p);
323
324	if ((error = namei(&nd)) != 0) {
325		nd.ni_vp = NULL;
326		goto fail;
327	}
328
329	imgp->vp = nd.ni_vp;
330
331	/*
332	 * Check permissions, modes, uid, etc on the file, and "open" it.
333	 */
334	error = exec_check_permissions(imgp);
335	if (error) {
336		VOP_UNLOCK(nd.ni_vp, 0, p);
337		goto fail;
338	}
339
340	error = exec_map_first_page(imgp);
341	VOP_UNLOCK(nd.ni_vp, 0, p);
342	if (error)
343                goto fail;
344
345	hdr = (const Elf_Ehdr *)imgp->image_header;
346	if ((error = elf_check_header(hdr, ET_DYN)) != 0)
347		goto fail;
348
349	/* Only support headers that fit within first page for now */
350	if ((hdr->e_phoff > PAGE_SIZE) ||
351	    (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) {
352		error = ENOEXEC;
353		goto fail;
354	}
355
356	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
357
358	for (i = 0; i < hdr->e_phnum; i++) {
359		if (phdr[i].p_type == PT_LOAD) {	/* Loadable segment */
360			prot = 0;
361			if (phdr[i].p_flags & PF_X)
362  				prot |= VM_PROT_EXECUTE;
363			if (phdr[i].p_flags & PF_W)
364  				prot |= VM_PROT_WRITE;
365			if (phdr[i].p_flags & PF_R)
366  				prot |= VM_PROT_READ;
367
368			if ((error = elf_load_section(p, vmspace, nd.ni_vp,
369  						     phdr[i].p_offset,
370  						     (caddr_t)phdr[i].p_vaddr +
371							(*addr),
372  						     phdr[i].p_memsz,
373  						     phdr[i].p_filesz, prot)) != 0)
374				goto fail;
375
376			/*
377			 * Is this .text or .data ??
378			 *
379			 * We only handle one each of those yet XXX
380			 */
381			if (hdr->e_entry >= phdr[i].p_vaddr &&
382			hdr->e_entry <(phdr[i].p_vaddr+phdr[i].p_memsz)) {
383  				text_addr = trunc_page(phdr[i].p_vaddr+(*addr));
384  				text_size = round_page(phdr[i].p_memsz +
385						       phdr[i].p_vaddr -
386						       trunc_page(phdr[i].p_vaddr));
387				*entry=(unsigned long)hdr->e_entry+(*addr);
388			} else {
389  				data_addr = trunc_page(phdr[i].p_vaddr+(*addr));
390  				data_size = round_page(phdr[i].p_memsz +
391						       phdr[i].p_vaddr -
392						       trunc_page(phdr[i].p_vaddr));
393			}
394		}
395	}
396
397fail:
398	if (imgp->firstpage)
399		exec_unmap_first_page(imgp);
400	if (imgp->image_header)
401		kmem_free_wakeup(exec_map, (vm_offset_t)imgp->image_header,
402			PAGE_SIZE);
403	if (nd.ni_vp)
404		vrele(nd.ni_vp);
405
406	return error;
407}
408
409static int
410exec_elf_imgact(struct image_params *imgp)
411{
412	const Elf_Ehdr *hdr = (const Elf_Ehdr *) imgp->image_header;
413	const Elf_Phdr *phdr;
414	Elf_Auxargs *elf_auxargs = NULL;
415	struct vmspace *vmspace;
416	vm_prot_t prot;
417	u_long text_size = 0, data_size = 0;
418	u_long text_addr = 0, data_addr = 0;
419	u_long addr, entry = 0, proghdr = 0;
420	int error, i;
421	const char *interp = NULL;
422	Elf_Brandinfo *brand_info;
423	const char *brand;
424	char path[MAXPATHLEN];
425
426	/*
427	 * Do we have a valid ELF header ?
428	 */
429	if (elf_check_header(hdr, ET_EXEC))
430		return -1;
431
432	/*
433	 * From here on down, we return an errno, not -1, as we've
434	 * detected an ELF file.
435	 */
436
437	if ((hdr->e_phoff > PAGE_SIZE) ||
438	    (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) {
439		/* Only support headers in first page for now */
440		return ENOEXEC;
441	}
442	phdr = (const Elf_Phdr*)(imgp->image_header + hdr->e_phoff);
443
444	/*
445	 * From this point on, we may have resources that need to be freed.
446	 */
447	if ((error = exec_extract_strings(imgp)) != 0)
448		goto fail;
449
450	exec_new_vmspace(imgp);
451
452	vmspace = imgp->proc->p_vmspace;
453
454	for (i = 0; i < hdr->e_phnum; i++) {
455		switch(phdr[i].p_type) {
456
457		case PT_LOAD:	/* Loadable segment */
458			prot = 0;
459			if (phdr[i].p_flags & PF_X)
460  				prot |= VM_PROT_EXECUTE;
461			if (phdr[i].p_flags & PF_W)
462  				prot |= VM_PROT_WRITE;
463			if (phdr[i].p_flags & PF_R)
464  				prot |= VM_PROT_READ;
465
466			if ((error = elf_load_section(imgp->proc,
467						     vmspace, imgp->vp,
468  						     phdr[i].p_offset,
469  						     (caddr_t)phdr[i].p_vaddr,
470  						     phdr[i].p_memsz,
471  						     phdr[i].p_filesz, prot)) != 0)
472  				goto fail;
473
474			/*
475			 * Is this .text or .data ??
476			 *
477			 * We only handle one each of those yet XXX
478			 */
479			if (hdr->e_entry >= phdr[i].p_vaddr &&
480			hdr->e_entry <(phdr[i].p_vaddr+phdr[i].p_memsz)) {
481  				text_addr = trunc_page(phdr[i].p_vaddr);
482  				text_size = round_page(phdr[i].p_memsz +
483						       phdr[i].p_vaddr -
484						       text_addr);
485				entry = (u_long)hdr->e_entry;
486			} else {
487  				data_addr = trunc_page(phdr[i].p_vaddr);
488  				data_size = round_page(phdr[i].p_memsz +
489						       phdr[i].p_vaddr -
490						       data_addr);
491			}
492			break;
493	  	case PT_INTERP:	/* Path to interpreter */
494			if (phdr[i].p_filesz > MAXPATHLEN ||
495			    phdr[i].p_offset + phdr[i].p_filesz > PAGE_SIZE) {
496				error = ENOEXEC;
497				goto fail;
498			}
499			interp = imgp->image_header + phdr[i].p_offset;
500			break;
501		case PT_PHDR: 	/* Program header table info */
502			proghdr = phdr[i].p_vaddr;
503			break;
504		default:
505			break;
506		}
507	}
508
509	vmspace->vm_tsize = text_size >> PAGE_SHIFT;
510	vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
511	vmspace->vm_dsize = data_size >> PAGE_SHIFT;
512	vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr;
513
514	addr = 2L*MAXDSIZ; /* May depend on OS type XXX */
515
516	imgp->entry_addr = entry;
517
518	/* If the executable has a brand, search for it in the brand list. */
519	brand_info = NULL;
520	brand = (const char *)&hdr->e_ident[EI_BRAND];
521	if (brand[0] != '\0') {
522		for (i = 0;  i < MAX_BRANDS;  i++) {
523			Elf_Brandinfo *bi = elf_brand_list[i];
524
525			if (bi != NULL && strcmp(brand, bi->brand) == 0) {
526				brand_info = bi;
527				break;
528			}
529		}
530	}
531
532	/* Lacking a known brand, search for a recognized interpreter. */
533	if (brand_info == NULL && interp != NULL) {
534		for (i = 0;  i < MAX_BRANDS;  i++) {
535			Elf_Brandinfo *bi = elf_brand_list[i];
536
537			if (bi != NULL &&
538			    strcmp(interp, bi->interp_path) == 0) {
539				brand_info = bi;
540				break;
541			}
542		}
543	}
544
545#ifdef __alpha__
546	/* XXX - Assume FreeBSD on the alpha. */
547	if (brand_info == NULL)
548		brand_info = &freebsd_brand_info;
549#endif
550
551	if (brand_info == NULL) {
552		if (brand[0] == 0)
553			uprintf("ELF binary type not known."
554			    "  Use \"brandelf\" to brand it.\n");
555		else
556			uprintf("ELF binary type \"%.*s\" not known.\n",
557			    EI_NIDENT - EI_BRAND, brand);
558		error = ENOEXEC;
559		goto fail;
560	}
561
562	imgp->proc->p_sysent = brand_info->sysvec;
563	if (interp != NULL) {
564		snprintf(path, sizeof(path), "%s%s",
565		    brand_info->emul_path, interp);
566                if ((error = elf_load_file(imgp->proc, path, &addr,
567		    &imgp->entry_addr)) != 0) {
568                        uprintf("ELF interpreter %s not found\n", path);
569                        goto fail;
570                }
571	}
572
573	/*
574	 * Construct auxargs table (used by the fixup routine)
575	 */
576	elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK);
577	elf_auxargs->execfd = -1;
578	elf_auxargs->phdr = proghdr;
579	elf_auxargs->phent = hdr->e_phentsize;
580	elf_auxargs->phnum = hdr->e_phnum;
581	elf_auxargs->pagesz = PAGE_SIZE;
582	elf_auxargs->base = addr;
583	elf_auxargs->flags = 0;
584	elf_auxargs->entry = entry;
585	elf_auxargs->trace = elf_trace;
586
587	imgp->auxargs = elf_auxargs;
588	imgp->interpreted = 0;
589
590	/* don't allow modifying the file while we run it */
591	imgp->vp->v_flag |= VTEXT;
592
593fail:
594	return error;
595}
596
597static int
598elf_freebsd_fixup(long **stack_base, struct image_params *imgp)
599{
600	Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs;
601	long *pos;
602
603	pos = *stack_base + (imgp->argc + imgp->envc + 2);
604
605	if (args->trace) {
606		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
607	}
608	if (args->execfd != -1) {
609		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
610	}
611	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
612	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
613	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
614	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
615	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
616	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
617	AUXARGS_ENTRY(pos, AT_BASE, args->base);
618	AUXARGS_ENTRY(pos, AT_NULL, 0);
619
620	free(imgp->auxargs, M_TEMP);
621	imgp->auxargs = NULL;
622
623	(*stack_base)--;
624	suword(*stack_base, (long) imgp->argc);
625	return 0;
626}
627
628/*
629 * Code for generating ELF core dumps.
630 */
631
632typedef void (*segment_callback) __P((vm_map_entry_t, void *));
633
634/* Closure for cb_put_phdr(). */
635struct phdr_closure {
636	Elf_Phdr *phdr;		/* Program header to fill in */
637	Elf_Off offset;		/* Offset of segment in core file */
638};
639
640/* Closure for cb_size_segment(). */
641struct sseg_closure {
642	int count;		/* Count of writable segments. */
643	size_t size;		/* Total size of all writable segments. */
644};
645
646static void cb_put_phdr __P((vm_map_entry_t, void *));
647static void cb_size_segment __P((vm_map_entry_t, void *));
648static void each_writable_segment __P((struct proc *, segment_callback,
649    void *));
650static int elf_corehdr __P((struct proc *, struct vnode *, struct ucred *,
651    int, void *, size_t));
652static void elf_puthdr __P((struct proc *, void *, size_t *,
653    const prstatus_t *, const prfpregset_t *, const prpsinfo_t *, int));
654static void elf_putnote __P((void *, size_t *, const char *, int,
655    const void *, size_t));
656
657extern int osreldate;
658
659int
660elf_coredump(p)
661	register struct proc *p;
662{
663	register struct vnode *vp;
664	register struct ucred *cred = p->p_cred->pc_ucred;
665	struct nameidata nd;
666	struct vattr vattr;
667	int error, error1;
668	char *name;			/* name of corefile */
669	struct sseg_closure seginfo;
670	void *hdr;
671	size_t hdrsize;
672
673	STOPEVENT(p, S_CORE, 0);
674
675	if (sugid_coredump == 0 && p->p_flag & P_SUGID)
676		return (EFAULT);
677
678	/* Size the program segments. */
679	seginfo.count = 0;
680	seginfo.size = 0;
681	each_writable_segment(p, cb_size_segment, &seginfo);
682
683	/*
684	 * Calculate the size of the core file header area by making
685	 * a dry run of generating it.  Nothing is written, but the
686	 * size is calculated.
687	 */
688	hdrsize = 0;
689	elf_puthdr((struct proc *)NULL, (void *)NULL, &hdrsize,
690	    (const prstatus_t *)NULL, (const prfpregset_t *)NULL,
691	    (const prpsinfo_t *)NULL, seginfo.count);
692
693	if (hdrsize + seginfo.size >= p->p_rlimit[RLIMIT_CORE].rlim_cur)
694		return (EFAULT);
695	name = expand_name(p->p_comm, p->p_ucred->cr_uid, p->p_pid);
696	if (name == NULL)
697		return (EFAULT);	/* XXX -- not the best error */
698
699	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, name, p);
700	error = vn_open(&nd, O_CREAT | FWRITE, S_IRUSR | S_IWUSR);
701	free(name, M_TEMP);
702	if (error)
703		return (error);
704	vp = nd.ni_vp;
705
706	/* Don't dump to non-regular files or files with links. */
707	if (vp->v_type != VREG ||
708	    VOP_GETATTR(vp, &vattr, cred, p) || vattr.va_nlink != 1) {
709		error = EFAULT;
710		goto out;
711	}
712	VATTR_NULL(&vattr);
713	vattr.va_size = 0;
714	VOP_LEASE(vp, p, cred, LEASE_WRITE);
715	VOP_SETATTR(vp, &vattr, cred, p);
716	p->p_acflag |= ACORE;
717
718
719	/*
720	 * Allocate memory for building the header, fill it up,
721	 * and write it out.
722	 */
723	hdr = malloc(hdrsize, M_TEMP, M_WAITOK);
724	if (hdr == NULL) {
725		error = EINVAL;
726		goto out;
727	}
728	error = elf_corehdr(p, vp, cred, seginfo.count, hdr, hdrsize);
729
730	/* Write the contents of all of the writable segments. */
731	if (error == 0) {
732		Elf_Phdr *php;
733		off_t offset;
734		int i;
735
736		php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
737		offset = hdrsize;
738		for (i = 0;  i < seginfo.count;  i++) {
739			error = vn_rdwr(UIO_WRITE, vp, (caddr_t)php->p_vaddr,
740			    php->p_filesz, offset, UIO_USERSPACE,
741			    IO_NODELOCKED|IO_UNIT, cred, (int *)NULL, p);
742			if (error != 0)
743				break;
744			offset += php->p_filesz;
745			php++;
746		}
747	}
748	free(hdr, M_TEMP);
749
750out:
751	VOP_UNLOCK(vp, 0, p);
752	error1 = vn_close(vp, FWRITE, cred, p);
753	if (error == 0)
754		error = error1;
755	return (error);
756}
757
758/*
759 * A callback for each_writable_segment() to write out the segment's
760 * program header entry.
761 */
762static void
763cb_put_phdr(entry, closure)
764	vm_map_entry_t entry;
765	void *closure;
766{
767	struct phdr_closure *phc = (struct phdr_closure *)closure;
768	Elf_Phdr *phdr = phc->phdr;
769
770	phc->offset = round_page(phc->offset);
771
772	phdr->p_type = PT_LOAD;
773	phdr->p_offset = phc->offset;
774	phdr->p_vaddr = entry->start;
775	phdr->p_paddr = 0;
776	phdr->p_filesz = phdr->p_memsz = entry->end - entry->start;
777	phdr->p_align = PAGE_SIZE;
778	phdr->p_flags = 0;
779	if (entry->protection & VM_PROT_READ)
780		phdr->p_flags |= PF_R;
781	if (entry->protection & VM_PROT_WRITE)
782		phdr->p_flags |= PF_W;
783	if (entry->protection & VM_PROT_EXECUTE)
784		phdr->p_flags |= PF_X;
785
786	phc->offset += phdr->p_filesz;
787	phc->phdr++;
788}
789
790/*
791 * A callback for each_writable_segment() to gather information about
792 * the number of segments and their total size.
793 */
794static void
795cb_size_segment(entry, closure)
796	vm_map_entry_t entry;
797	void *closure;
798{
799	struct sseg_closure *ssc = (struct sseg_closure *)closure;
800
801	ssc->count++;
802	ssc->size += entry->end - entry->start;
803}
804
805/*
806 * For each writable segment in the process's memory map, call the given
807 * function with a pointer to the map entry and some arbitrary
808 * caller-supplied data.
809 */
810static void
811each_writable_segment(p, func, closure)
812	struct proc *p;
813	segment_callback func;
814	void *closure;
815{
816	vm_map_t map = &p->p_vmspace->vm_map;
817	vm_map_entry_t entry;
818
819	for (entry = map->header.next;  entry != &map->header;
820	    entry = entry->next) {
821		vm_object_t obj;
822
823		if (entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP) ||
824		    (entry->protection & (VM_PROT_READ|VM_PROT_WRITE)) !=
825		    (VM_PROT_READ|VM_PROT_WRITE))
826			continue;
827
828		if ((obj = entry->object.vm_object) == NULL)
829			continue;
830
831		/* Find the deepest backing object. */
832		while (obj->backing_object != NULL)
833			obj = obj->backing_object;
834
835		/* Ignore memory-mapped devices and such things. */
836		if (obj->type != OBJT_DEFAULT &&
837		    obj->type != OBJT_SWAP &&
838		    obj->type != OBJT_VNODE)
839			continue;
840
841		(*func)(entry, closure);
842	}
843}
844
845/*
846 * Write the core file header to the file, including padding up to
847 * the page boundary.
848 */
849static int
850elf_corehdr(p, vp, cred, numsegs, hdr, hdrsize)
851	struct proc *p;
852	struct vnode *vp;
853	struct ucred *cred;
854	int numsegs;
855	size_t hdrsize;
856	void *hdr;
857{
858	size_t off;
859	prstatus_t status;
860	prfpregset_t fpregset;
861	prpsinfo_t psinfo;
862
863	/* Gather the information for the header. */
864	bzero(&status, sizeof status);
865	status.pr_version = PRSTATUS_VERSION;
866	status.pr_statussz = sizeof(prstatus_t);
867	status.pr_gregsetsz = sizeof(gregset_t);
868	status.pr_fpregsetsz = sizeof(fpregset_t);
869	status.pr_osreldate = osreldate;
870	status.pr_cursig = p->p_sig;
871	status.pr_pid = p->p_pid;
872	fill_regs(p, &status.pr_reg);
873
874	fill_fpregs(p, &fpregset);
875
876	bzero(&psinfo, sizeof psinfo);
877	psinfo.pr_version = PRPSINFO_VERSION;
878	psinfo.pr_psinfosz = sizeof(prpsinfo_t);
879	strncpy(psinfo.pr_fname, p->p_comm, MAXCOMLEN);
880	/* XXX - We don't fill in the command line arguments properly yet. */
881	strncpy(psinfo.pr_psargs, p->p_comm, PRARGSZ);
882
883	/* Fill in the header. */
884	bzero(hdr, hdrsize);
885	off = 0;
886	elf_puthdr(p, hdr, &off, &status, &fpregset, &psinfo, numsegs);
887
888	/* Write it to the core file. */
889	return vn_rdwr(UIO_WRITE, vp, hdr, hdrsize, (off_t)0,
890	    UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, NULL, p);
891}
892
893static void
894elf_puthdr(struct proc *p, void *dst, size_t *off, const prstatus_t *status,
895    const prfpregset_t *fpregset, const prpsinfo_t *psinfo, int numsegs)
896{
897	size_t ehoff;
898	size_t phoff;
899	size_t noteoff;
900	size_t notesz;
901
902	ehoff = *off;
903	*off += sizeof(Elf_Ehdr);
904
905	phoff = *off;
906	*off += (numsegs + 1) * sizeof(Elf_Phdr);
907
908	noteoff = *off;
909	elf_putnote(dst, off, "FreeBSD", NT_PRSTATUS, status,
910	    sizeof *status);
911	elf_putnote(dst, off, "FreeBSD", NT_FPREGSET, fpregset,
912	    sizeof *fpregset);
913	elf_putnote(dst, off, "FreeBSD", NT_PRPSINFO, psinfo,
914	    sizeof *psinfo);
915	notesz = *off - noteoff;
916
917	/* Align up to a page boundary for the program segments. */
918	*off = round_page(*off);
919
920	if (dst != NULL) {
921		Elf_Ehdr *ehdr;
922		Elf_Phdr *phdr;
923		struct phdr_closure phc;
924
925		/*
926		 * Fill in the ELF header.
927		 */
928		ehdr = (Elf_Ehdr *)((char *)dst + ehoff);
929		ehdr->e_ident[EI_MAG0] = ELFMAG0;
930		ehdr->e_ident[EI_MAG1] = ELFMAG1;
931		ehdr->e_ident[EI_MAG2] = ELFMAG2;
932		ehdr->e_ident[EI_MAG3] = ELFMAG3;
933		ehdr->e_ident[EI_CLASS] = ELF_CLASS;
934		ehdr->e_ident[EI_DATA] = ELF_DATA;
935		ehdr->e_ident[EI_VERSION] = EV_CURRENT;
936		ehdr->e_ident[EI_PAD] = 0;
937		strncpy(ehdr->e_ident + EI_BRAND, "FreeBSD",
938		    EI_NIDENT - EI_BRAND);
939		ehdr->e_type = ET_CORE;
940		ehdr->e_machine = ELF_ARCH;
941		ehdr->e_version = EV_CURRENT;
942		ehdr->e_entry = 0;
943		ehdr->e_phoff = phoff;
944		ehdr->e_flags = 0;
945		ehdr->e_ehsize = sizeof(Elf_Ehdr);
946		ehdr->e_phentsize = sizeof(Elf_Phdr);
947		ehdr->e_phnum = numsegs + 1;
948		ehdr->e_shentsize = sizeof(Elf_Shdr);
949		ehdr->e_shnum = 0;
950		ehdr->e_shstrndx = SHN_UNDEF;
951
952		/*
953		 * Fill in the program header entries.
954		 */
955		phdr = (Elf_Phdr *)((char *)dst + phoff);
956
957		/* The note segement. */
958		phdr->p_type = PT_NOTE;
959		phdr->p_offset = noteoff;
960		phdr->p_vaddr = 0;
961		phdr->p_paddr = 0;
962		phdr->p_filesz = notesz;
963		phdr->p_memsz = 0;
964		phdr->p_flags = 0;
965		phdr->p_align = 0;
966		phdr++;
967
968		/* All the writable segments from the program. */
969		phc.phdr = phdr;
970		phc.offset = *off;
971		each_writable_segment(p, cb_put_phdr, &phc);
972	}
973}
974
975static void
976elf_putnote(void *dst, size_t *off, const char *name, int type,
977    const void *desc, size_t descsz)
978{
979	Elf_Note note;
980
981	note.n_namesz = strlen(name) + 1;
982	note.n_descsz = descsz;
983	note.n_type = type;
984	if (dst != NULL)
985		bcopy(&note, (char *)dst + *off, sizeof note);
986	*off += sizeof note;
987	if (dst != NULL)
988		bcopy(name, (char *)dst + *off, note.n_namesz);
989	*off += roundup2(note.n_namesz, sizeof(Elf_Size));
990	if (dst != NULL)
991		bcopy(desc, (char *)dst + *off, note.n_descsz);
992	*off += roundup2(note.n_descsz, sizeof(Elf_Size));
993}
994
995/*
996 * Tell kern_execve.c about it, with a little help from the linker.
997 * Since `const' objects end up in the text segment, TEXT_SET is the
998 * correct directive to use.
999 */
1000static struct execsw elf_execsw = {exec_elf_imgact, "ELF"};
1001EXEC_SET(elf, elf_execsw);
1002