imgact_elf.c revision 215679
1/*-
2 * Copyright (c) 2000 David O'Brien
3 * Copyright (c) 1995-1996 S�ren Schmidt
4 * Copyright (c) 1996 Peter Wemm
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer
12 *    in this position and unchanged.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 *    derived from this software without specific prior written permission
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: head/sys/kern/imgact_elf.c 215679 2010-11-22 14:42:13Z attilio $");
33
34#include "opt_compat.h"
35#include "opt_core.h"
36
37#include <sys/param.h>
38#include <sys/exec.h>
39#include <sys/fcntl.h>
40#include <sys/imgact.h>
41#include <sys/imgact_elf.h>
42#include <sys/kernel.h>
43#include <sys/lock.h>
44#include <sys/malloc.h>
45#include <sys/mount.h>
46#include <sys/mutex.h>
47#include <sys/mman.h>
48#include <sys/namei.h>
49#include <sys/pioctl.h>
50#include <sys/proc.h>
51#include <sys/procfs.h>
52#include <sys/resourcevar.h>
53#include <sys/sf_buf.h>
54#include <sys/smp.h>
55#include <sys/systm.h>
56#include <sys/signalvar.h>
57#include <sys/stat.h>
58#include <sys/sx.h>
59#include <sys/syscall.h>
60#include <sys/sysctl.h>
61#include <sys/sysent.h>
62#include <sys/vnode.h>
63#include <sys/syslog.h>
64#include <sys/eventhandler.h>
65
66#include <net/zlib.h>
67
68#include <vm/vm.h>
69#include <vm/vm_kern.h>
70#include <vm/vm_param.h>
71#include <vm/pmap.h>
72#include <vm/vm_map.h>
73#include <vm/vm_object.h>
74#include <vm/vm_extern.h>
75
76#include <machine/elf.h>
77#include <machine/md_var.h>
78
79#define OLD_EI_BRAND	8
80
81static int __elfN(check_header)(const Elf_Ehdr *hdr);
82static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp,
83    const char *interp, int32_t *osrel);
84static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
85    u_long *entry, size_t pagesize);
86static int __elfN(load_section)(struct vmspace *vmspace, vm_object_t object,
87    vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz,
88    vm_prot_t prot, size_t pagesize);
89static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp);
90static boolean_t __elfN(freebsd_trans_osrel)(const Elf_Note *note,
91    int32_t *osrel);
92static boolean_t kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel);
93static boolean_t __elfN(check_note)(struct image_params *imgp,
94    Elf_Brandnote *checknote, int32_t *osrel);
95
96SYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE), CTLFLAG_RW, 0,
97    "");
98
99#ifdef COMPRESS_USER_CORES
100static int compress_core(gzFile, char *, char *, unsigned int,
101    struct thread * td);
102#define CORE_BUF_SIZE	(16 * 1024)
103#endif
104
105int __elfN(fallback_brand) = -1;
106SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
107    fallback_brand, CTLFLAG_RW, &__elfN(fallback_brand), 0,
108    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort");
109TUNABLE_INT("kern.elf" __XSTRING(__ELF_WORD_SIZE) ".fallback_brand",
110    &__elfN(fallback_brand));
111
112static int elf_legacy_coredump = 0;
113SYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW,
114    &elf_legacy_coredump, 0, "");
115
116static Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
117
118#define	trunc_page_ps(va, ps)	((va) & ~(ps - 1))
119#define	round_page_ps(va, ps)	(((va) + (ps - 1)) & ~(ps - 1))
120#define	aligned(a, t)	(trunc_page_ps((u_long)(a), sizeof(t)) == (u_long)(a))
121
122static const char FREEBSD_ABI_VENDOR[] = "FreeBSD";
123
124Elf_Brandnote __elfN(freebsd_brandnote) = {
125	.hdr.n_namesz	= sizeof(FREEBSD_ABI_VENDOR),
126	.hdr.n_descsz	= sizeof(int32_t),
127	.hdr.n_type	= 1,
128	.vendor		= FREEBSD_ABI_VENDOR,
129	.flags		= BN_TRANSLATE_OSREL,
130	.trans_osrel	= __elfN(freebsd_trans_osrel)
131};
132
133static boolean_t
134__elfN(freebsd_trans_osrel)(const Elf_Note *note, int32_t *osrel)
135{
136	uintptr_t p;
137
138	p = (uintptr_t)(note + 1);
139	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
140	*osrel = *(const int32_t *)(p);
141
142	return (TRUE);
143}
144
145static const char GNU_ABI_VENDOR[] = "GNU";
146static int GNU_KFREEBSD_ABI_DESC = 3;
147
148Elf_Brandnote __elfN(kfreebsd_brandnote) = {
149	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
150	.hdr.n_descsz	= 16,	/* XXX at least 16 */
151	.hdr.n_type	= 1,
152	.vendor		= GNU_ABI_VENDOR,
153	.flags		= BN_TRANSLATE_OSREL,
154	.trans_osrel	= kfreebsd_trans_osrel
155};
156
157static boolean_t
158kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel)
159{
160	const Elf32_Word *desc;
161	uintptr_t p;
162
163	p = (uintptr_t)(note + 1);
164	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
165
166	desc = (const Elf32_Word *)p;
167	if (desc[0] != GNU_KFREEBSD_ABI_DESC)
168		return (FALSE);
169
170	/*
171	 * Debian GNU/kFreeBSD embed the earliest compatible kernel version
172	 * (__FreeBSD_version: <major><two digit minor>Rxx) in the LSB way.
173	 */
174	*osrel = desc[1] * 100000 + desc[2] * 1000 + desc[3];
175
176	return (TRUE);
177}
178
179int
180__elfN(insert_brand_entry)(Elf_Brandinfo *entry)
181{
182	int i;
183
184	for (i = 0; i < MAX_BRANDS; i++) {
185		if (elf_brand_list[i] == NULL) {
186			elf_brand_list[i] = entry;
187			break;
188		}
189	}
190	if (i == MAX_BRANDS) {
191		printf("WARNING: %s: could not insert brandinfo entry: %p\n",
192			__func__, entry);
193		return (-1);
194	}
195	return (0);
196}
197
198int
199__elfN(remove_brand_entry)(Elf_Brandinfo *entry)
200{
201	int i;
202
203	for (i = 0; i < MAX_BRANDS; i++) {
204		if (elf_brand_list[i] == entry) {
205			elf_brand_list[i] = NULL;
206			break;
207		}
208	}
209	if (i == MAX_BRANDS)
210		return (-1);
211	return (0);
212}
213
214int
215__elfN(brand_inuse)(Elf_Brandinfo *entry)
216{
217	struct proc *p;
218	int rval = FALSE;
219
220	sx_slock(&allproc_lock);
221	FOREACH_PROC_IN_SYSTEM(p) {
222		if (p->p_sysent == entry->sysvec) {
223			rval = TRUE;
224			break;
225		}
226	}
227	sx_sunlock(&allproc_lock);
228
229	return (rval);
230}
231
232static Elf_Brandinfo *
233__elfN(get_brandinfo)(struct image_params *imgp, const char *interp,
234    int32_t *osrel)
235{
236	const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
237	Elf_Brandinfo *bi;
238	boolean_t ret;
239	int i;
240
241	/*
242	 * We support four types of branding -- (1) the ELF EI_OSABI field
243	 * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string
244	 * branding w/in the ELF header, (3) path of the `interp_path'
245	 * field, and (4) the ".note.ABI-tag" ELF section.
246	 */
247
248	/* Look for an ".note.ABI-tag" ELF section */
249	for (i = 0; i < MAX_BRANDS; i++) {
250		bi = elf_brand_list[i];
251		if (bi == NULL)
252			continue;
253		if (hdr->e_machine == bi->machine && (bi->flags &
254		    (BI_BRAND_NOTE|BI_BRAND_NOTE_MANDATORY)) != 0) {
255			ret = __elfN(check_note)(imgp, bi->brand_note, osrel);
256			if (ret)
257				return (bi);
258		}
259	}
260
261	/* If the executable has a brand, search for it in the brand list. */
262	for (i = 0; i < MAX_BRANDS; i++) {
263		bi = elf_brand_list[i];
264		if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY)
265			continue;
266		if (hdr->e_machine == bi->machine &&
267		    (hdr->e_ident[EI_OSABI] == bi->brand ||
268		    strncmp((const char *)&hdr->e_ident[OLD_EI_BRAND],
269		    bi->compat_3_brand, strlen(bi->compat_3_brand)) == 0))
270			return (bi);
271	}
272
273	/* Lacking a known brand, search for a recognized interpreter. */
274	if (interp != NULL) {
275		for (i = 0; i < MAX_BRANDS; i++) {
276			bi = elf_brand_list[i];
277			if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY)
278				continue;
279			if (hdr->e_machine == bi->machine &&
280			    strcmp(interp, bi->interp_path) == 0)
281				return (bi);
282		}
283	}
284
285	/* Lacking a recognized interpreter, try the default brand */
286	for (i = 0; i < MAX_BRANDS; i++) {
287		bi = elf_brand_list[i];
288		if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY)
289			continue;
290		if (hdr->e_machine == bi->machine &&
291		    __elfN(fallback_brand) == bi->brand)
292			return (bi);
293	}
294	return (NULL);
295}
296
297static int
298__elfN(check_header)(const Elf_Ehdr *hdr)
299{
300	Elf_Brandinfo *bi;
301	int i;
302
303	if (!IS_ELF(*hdr) ||
304	    hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
305	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
306	    hdr->e_ident[EI_VERSION] != EV_CURRENT ||
307	    hdr->e_phentsize != sizeof(Elf_Phdr) ||
308	    hdr->e_version != ELF_TARG_VER)
309		return (ENOEXEC);
310
311	/*
312	 * Make sure we have at least one brand for this machine.
313	 */
314
315	for (i = 0; i < MAX_BRANDS; i++) {
316		bi = elf_brand_list[i];
317		if (bi != NULL && bi->machine == hdr->e_machine)
318			break;
319	}
320	if (i == MAX_BRANDS)
321		return (ENOEXEC);
322
323	return (0);
324}
325
326static int
327__elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
328    vm_offset_t start, vm_offset_t end, vm_prot_t prot)
329{
330	struct sf_buf *sf;
331	int error;
332	vm_offset_t off;
333
334	/*
335	 * Create the page if it doesn't exist yet. Ignore errors.
336	 */
337	vm_map_lock(map);
338	vm_map_insert(map, NULL, 0, trunc_page(start), round_page(end),
339	    VM_PROT_ALL, VM_PROT_ALL, 0);
340	vm_map_unlock(map);
341
342	/*
343	 * Find the page from the underlying object.
344	 */
345	if (object) {
346		sf = vm_imgact_map_page(object, offset);
347		if (sf == NULL)
348			return (KERN_FAILURE);
349		off = offset - trunc_page(offset);
350		error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start,
351		    end - start);
352		vm_imgact_unmap_page(sf);
353		if (error) {
354			return (KERN_FAILURE);
355		}
356	}
357
358	return (KERN_SUCCESS);
359}
360
361static int
362__elfN(map_insert)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
363    vm_offset_t start, vm_offset_t end, vm_prot_t prot, int cow)
364{
365	struct sf_buf *sf;
366	vm_offset_t off;
367	vm_size_t sz;
368	int error, rv;
369
370	if (start != trunc_page(start)) {
371		rv = __elfN(map_partial)(map, object, offset, start,
372		    round_page(start), prot);
373		if (rv)
374			return (rv);
375		offset += round_page(start) - start;
376		start = round_page(start);
377	}
378	if (end != round_page(end)) {
379		rv = __elfN(map_partial)(map, object, offset +
380		    trunc_page(end) - start, trunc_page(end), end, prot);
381		if (rv)
382			return (rv);
383		end = trunc_page(end);
384	}
385	if (end > start) {
386		if (offset & PAGE_MASK) {
387			/*
388			 * The mapping is not page aligned. This means we have
389			 * to copy the data. Sigh.
390			 */
391			rv = vm_map_find(map, NULL, 0, &start, end - start,
392			    FALSE, prot | VM_PROT_WRITE, VM_PROT_ALL, 0);
393			if (rv)
394				return (rv);
395			if (object == NULL)
396				return (KERN_SUCCESS);
397			for (; start < end; start += sz) {
398				sf = vm_imgact_map_page(object, offset);
399				if (sf == NULL)
400					return (KERN_FAILURE);
401				off = offset - trunc_page(offset);
402				sz = end - start;
403				if (sz > PAGE_SIZE - off)
404					sz = PAGE_SIZE - off;
405				error = copyout((caddr_t)sf_buf_kva(sf) + off,
406				    (caddr_t)start, sz);
407				vm_imgact_unmap_page(sf);
408				if (error) {
409					return (KERN_FAILURE);
410				}
411				offset += sz;
412			}
413			rv = KERN_SUCCESS;
414		} else {
415			vm_object_reference(object);
416			vm_map_lock(map);
417			rv = vm_map_insert(map, object, offset, start, end,
418			    prot, VM_PROT_ALL, cow);
419			vm_map_unlock(map);
420			if (rv != KERN_SUCCESS)
421				vm_object_deallocate(object);
422		}
423		return (rv);
424	} else {
425		return (KERN_SUCCESS);
426	}
427}
428
429static int
430__elfN(load_section)(struct vmspace *vmspace,
431	vm_object_t object, vm_offset_t offset,
432	caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot,
433	size_t pagesize)
434{
435	struct sf_buf *sf;
436	size_t map_len;
437	vm_offset_t map_addr;
438	int error, rv, cow;
439	size_t copy_len;
440	vm_offset_t file_addr;
441
442	/*
443	 * It's necessary to fail if the filsz + offset taken from the
444	 * header is greater than the actual file pager object's size.
445	 * If we were to allow this, then the vm_map_find() below would
446	 * walk right off the end of the file object and into the ether.
447	 *
448	 * While I'm here, might as well check for something else that
449	 * is invalid: filsz cannot be greater than memsz.
450	 */
451	if ((off_t)filsz + offset > object->un_pager.vnp.vnp_size ||
452	    filsz > memsz) {
453		uprintf("elf_load_section: truncated ELF file\n");
454		return (ENOEXEC);
455	}
456
457	map_addr = trunc_page_ps((vm_offset_t)vmaddr, pagesize);
458	file_addr = trunc_page_ps(offset, pagesize);
459
460	/*
461	 * We have two choices.  We can either clear the data in the last page
462	 * of an oversized mapping, or we can start the anon mapping a page
463	 * early and copy the initialized data into that first page.  We
464	 * choose the second..
465	 */
466	if (memsz > filsz)
467		map_len = trunc_page_ps(offset + filsz, pagesize) - file_addr;
468	else
469		map_len = round_page_ps(offset + filsz, pagesize) - file_addr;
470
471	if (map_len != 0) {
472		/* cow flags: don't dump readonly sections in core */
473		cow = MAP_COPY_ON_WRITE | MAP_PREFAULT |
474		    (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP);
475
476		rv = __elfN(map_insert)(&vmspace->vm_map,
477				      object,
478				      file_addr,	/* file offset */
479				      map_addr,		/* virtual start */
480				      map_addr + map_len,/* virtual end */
481				      prot,
482				      cow);
483		if (rv != KERN_SUCCESS)
484			return (EINVAL);
485
486		/* we can stop now if we've covered it all */
487		if (memsz == filsz) {
488			return (0);
489		}
490	}
491
492
493	/*
494	 * We have to get the remaining bit of the file into the first part
495	 * of the oversized map segment.  This is normally because the .data
496	 * segment in the file is extended to provide bss.  It's a neat idea
497	 * to try and save a page, but it's a pain in the behind to implement.
498	 */
499	copy_len = (offset + filsz) - trunc_page_ps(offset + filsz, pagesize);
500	map_addr = trunc_page_ps((vm_offset_t)vmaddr + filsz, pagesize);
501	map_len = round_page_ps((vm_offset_t)vmaddr + memsz, pagesize) -
502	    map_addr;
503
504	/* This had damn well better be true! */
505	if (map_len != 0) {
506		rv = __elfN(map_insert)(&vmspace->vm_map, NULL, 0, map_addr,
507		    map_addr + map_len, VM_PROT_ALL, 0);
508		if (rv != KERN_SUCCESS) {
509			return (EINVAL);
510		}
511	}
512
513	if (copy_len != 0) {
514		vm_offset_t off;
515
516		sf = vm_imgact_map_page(object, offset + filsz);
517		if (sf == NULL)
518			return (EIO);
519
520		/* send the page fragment to user space */
521		off = trunc_page_ps(offset + filsz, pagesize) -
522		    trunc_page(offset + filsz);
523		error = copyout((caddr_t)sf_buf_kva(sf) + off,
524		    (caddr_t)map_addr, copy_len);
525		vm_imgact_unmap_page(sf);
526		if (error) {
527			return (error);
528		}
529	}
530
531	/*
532	 * set it to the specified protection.
533	 * XXX had better undo the damage from pasting over the cracks here!
534	 */
535	vm_map_protect(&vmspace->vm_map, trunc_page(map_addr),
536	    round_page(map_addr + map_len),  prot, FALSE);
537
538	return (0);
539}
540
541/*
542 * Load the file "file" into memory.  It may be either a shared object
543 * or an executable.
544 *
545 * The "addr" reference parameter is in/out.  On entry, it specifies
546 * the address where a shared object should be loaded.  If the file is
547 * an executable, this value is ignored.  On exit, "addr" specifies
548 * where the file was actually loaded.
549 *
550 * The "entry" reference parameter is out only.  On exit, it specifies
551 * the entry point for the loaded file.
552 */
553static int
554__elfN(load_file)(struct proc *p, const char *file, u_long *addr,
555	u_long *entry, size_t pagesize)
556{
557	struct {
558		struct nameidata nd;
559		struct vattr attr;
560		struct image_params image_params;
561	} *tempdata;
562	const Elf_Ehdr *hdr = NULL;
563	const Elf_Phdr *phdr = NULL;
564	struct nameidata *nd;
565	struct vmspace *vmspace = p->p_vmspace;
566	struct vattr *attr;
567	struct image_params *imgp;
568	vm_prot_t prot;
569	u_long rbase;
570	u_long base_addr = 0;
571	int vfslocked, error, i, numsegs;
572
573	tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK);
574	nd = &tempdata->nd;
575	attr = &tempdata->attr;
576	imgp = &tempdata->image_params;
577
578	/*
579	 * Initialize part of the common data
580	 */
581	imgp->proc = p;
582	imgp->attr = attr;
583	imgp->firstpage = NULL;
584	imgp->image_header = NULL;
585	imgp->object = NULL;
586	imgp->execlabel = NULL;
587
588	NDINIT(nd, LOOKUP, MPSAFE|LOCKLEAF|FOLLOW, UIO_SYSSPACE, file,
589	    curthread);
590	vfslocked = 0;
591	if ((error = namei(nd)) != 0) {
592		nd->ni_vp = NULL;
593		goto fail;
594	}
595	vfslocked = NDHASGIANT(nd);
596	NDFREE(nd, NDF_ONLY_PNBUF);
597	imgp->vp = nd->ni_vp;
598
599	/*
600	 * Check permissions, modes, uid, etc on the file, and "open" it.
601	 */
602	error = exec_check_permissions(imgp);
603	if (error)
604		goto fail;
605
606	error = exec_map_first_page(imgp);
607	if (error)
608		goto fail;
609
610	/*
611	 * Also make certain that the interpreter stays the same, so set
612	 * its VV_TEXT flag, too.
613	 */
614	nd->ni_vp->v_vflag |= VV_TEXT;
615
616	imgp->object = nd->ni_vp->v_object;
617
618	hdr = (const Elf_Ehdr *)imgp->image_header;
619	if ((error = __elfN(check_header)(hdr)) != 0)
620		goto fail;
621	if (hdr->e_type == ET_DYN)
622		rbase = *addr;
623	else if (hdr->e_type == ET_EXEC)
624		rbase = 0;
625	else {
626		error = ENOEXEC;
627		goto fail;
628	}
629
630	/* Only support headers that fit within first page for now      */
631	/*    (multiplication of two Elf_Half fields will not overflow) */
632	if ((hdr->e_phoff > PAGE_SIZE) ||
633	    (hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE - hdr->e_phoff) {
634		error = ENOEXEC;
635		goto fail;
636	}
637
638	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
639	if (!aligned(phdr, Elf_Addr)) {
640		error = ENOEXEC;
641		goto fail;
642	}
643
644	for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) {
645		if (phdr[i].p_type == PT_LOAD && phdr[i].p_memsz != 0) {
646			/* Loadable segment */
647			prot = 0;
648			if (phdr[i].p_flags & PF_X)
649  				prot |= VM_PROT_EXECUTE;
650			if (phdr[i].p_flags & PF_W)
651  				prot |= VM_PROT_WRITE;
652			if (phdr[i].p_flags & PF_R)
653  				prot |= VM_PROT_READ;
654
655			if ((error = __elfN(load_section)(vmspace,
656			    imgp->object, phdr[i].p_offset,
657			    (caddr_t)(uintptr_t)phdr[i].p_vaddr + rbase,
658			    phdr[i].p_memsz, phdr[i].p_filesz, prot,
659			    pagesize)) != 0)
660				goto fail;
661			/*
662			 * Establish the base address if this is the
663			 * first segment.
664			 */
665			if (numsegs == 0)
666  				base_addr = trunc_page(phdr[i].p_vaddr +
667				    rbase);
668			numsegs++;
669		}
670	}
671	*addr = base_addr;
672	*entry = (unsigned long)hdr->e_entry + rbase;
673
674fail:
675	if (imgp->firstpage)
676		exec_unmap_first_page(imgp);
677
678	if (nd->ni_vp)
679		vput(nd->ni_vp);
680
681	VFS_UNLOCK_GIANT(vfslocked);
682	free(tempdata, M_TEMP);
683
684	return (error);
685}
686
687static int
688__CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
689{
690	const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
691	const Elf_Phdr *phdr;
692	Elf_Auxargs *elf_auxargs;
693	struct vmspace *vmspace;
694	vm_prot_t prot;
695	u_long text_size = 0, data_size = 0, total_size = 0;
696	u_long text_addr = 0, data_addr = 0;
697	u_long seg_size, seg_addr;
698	u_long addr, baddr, et_dyn_addr, entry = 0, proghdr = 0;
699	int32_t osrel = 0;
700	int error = 0, i, n;
701	const char *interp = NULL, *newinterp = NULL;
702	Elf_Brandinfo *brand_info;
703	char *path;
704	struct sysentvec *sv;
705
706	/*
707	 * Do we have a valid ELF header ?
708	 *
709	 * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later
710	 * if particular brand doesn't support it.
711	 */
712	if (__elfN(check_header)(hdr) != 0 ||
713	    (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN))
714		return (-1);
715
716	/*
717	 * From here on down, we return an errno, not -1, as we've
718	 * detected an ELF file.
719	 */
720
721	if ((hdr->e_phoff > PAGE_SIZE) ||
722	    (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) {
723		/* Only support headers in first page for now */
724		return (ENOEXEC);
725	}
726	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
727	if (!aligned(phdr, Elf_Addr))
728		return (ENOEXEC);
729	n = 0;
730	baddr = 0;
731	for (i = 0; i < hdr->e_phnum; i++) {
732		if (phdr[i].p_type == PT_LOAD) {
733			if (n == 0)
734				baddr = phdr[i].p_vaddr;
735			n++;
736			continue;
737		}
738		if (phdr[i].p_type == PT_INTERP) {
739			/* Path to interpreter */
740			if (phdr[i].p_filesz > MAXPATHLEN ||
741			    phdr[i].p_offset + phdr[i].p_filesz > PAGE_SIZE)
742				return (ENOEXEC);
743			interp = imgp->image_header + phdr[i].p_offset;
744			continue;
745		}
746	}
747
748	brand_info = __elfN(get_brandinfo)(imgp, interp, &osrel);
749	if (brand_info == NULL) {
750		uprintf("ELF binary type \"%u\" not known.\n",
751		    hdr->e_ident[EI_OSABI]);
752		return (ENOEXEC);
753	}
754	if (hdr->e_type == ET_DYN) {
755		if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0)
756			return (ENOEXEC);
757		/*
758		 * Honour the base load address from the dso if it is
759		 * non-zero for some reason.
760		 */
761		if (baddr == 0)
762			et_dyn_addr = ET_DYN_LOAD_ADDR;
763		else
764			et_dyn_addr = 0;
765	} else
766		et_dyn_addr = 0;
767	sv = brand_info->sysvec;
768	if (interp != NULL && brand_info->interp_newpath != NULL)
769		newinterp = brand_info->interp_newpath;
770
771	/*
772	 * Avoid a possible deadlock if the current address space is destroyed
773	 * and that address space maps the locked vnode.  In the common case,
774	 * the locked vnode's v_usecount is decremented but remains greater
775	 * than zero.  Consequently, the vnode lock is not needed by vrele().
776	 * However, in cases where the vnode lock is external, such as nullfs,
777	 * v_usecount may become zero.
778	 */
779	VOP_UNLOCK(imgp->vp, 0);
780
781	error = exec_new_vmspace(imgp, sv);
782	imgp->proc->p_sysent = sv;
783
784	vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
785	if (error)
786		return (error);
787
788	vmspace = imgp->proc->p_vmspace;
789
790	for (i = 0; i < hdr->e_phnum; i++) {
791		switch (phdr[i].p_type) {
792		case PT_LOAD:	/* Loadable segment */
793			if (phdr[i].p_memsz == 0)
794				break;
795			prot = 0;
796			if (phdr[i].p_flags & PF_X)
797  				prot |= VM_PROT_EXECUTE;
798			if (phdr[i].p_flags & PF_W)
799  				prot |= VM_PROT_WRITE;
800			if (phdr[i].p_flags & PF_R)
801  				prot |= VM_PROT_READ;
802
803#if defined(__ia64__) && __ELF_WORD_SIZE == 32 && defined(IA32_ME_HARDER)
804			/*
805			 * Some x86 binaries assume read == executable,
806			 * notably the M3 runtime and therefore cvsup
807			 */
808			if (prot & VM_PROT_READ)
809				prot |= VM_PROT_EXECUTE;
810#endif
811
812			if ((error = __elfN(load_section)(vmspace,
813			    imgp->object, phdr[i].p_offset,
814			    (caddr_t)(uintptr_t)phdr[i].p_vaddr + et_dyn_addr,
815			    phdr[i].p_memsz, phdr[i].p_filesz, prot,
816			    sv->sv_pagesize)) != 0)
817				return (error);
818
819			/*
820			 * If this segment contains the program headers,
821			 * remember their virtual address for the AT_PHDR
822			 * aux entry. Static binaries don't usually include
823			 * a PT_PHDR entry.
824			 */
825			if (phdr[i].p_offset == 0 &&
826			    hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize
827				<= phdr[i].p_filesz)
828				proghdr = phdr[i].p_vaddr + hdr->e_phoff +
829				    et_dyn_addr;
830
831			seg_addr = trunc_page(phdr[i].p_vaddr + et_dyn_addr);
832			seg_size = round_page(phdr[i].p_memsz +
833			    phdr[i].p_vaddr + et_dyn_addr - seg_addr);
834
835			/*
836			 * Make the largest executable segment the official
837			 * text segment and all others data.
838			 *
839			 * Note that obreak() assumes that data_addr +
840			 * data_size == end of data load area, and the ELF
841			 * file format expects segments to be sorted by
842			 * address.  If multiple data segments exist, the
843			 * last one will be used.
844			 */
845
846			if (phdr[i].p_flags & PF_X && text_size < seg_size) {
847				text_size = seg_size;
848				text_addr = seg_addr;
849			} else {
850				data_size = seg_size;
851				data_addr = seg_addr;
852			}
853			total_size += seg_size;
854			break;
855		case PT_PHDR: 	/* Program header table info */
856			proghdr = phdr[i].p_vaddr + et_dyn_addr;
857			break;
858		default:
859			break;
860		}
861	}
862
863	if (data_addr == 0 && data_size == 0) {
864		data_addr = text_addr;
865		data_size = text_size;
866	}
867
868	entry = (u_long)hdr->e_entry + et_dyn_addr;
869
870	/*
871	 * Check limits.  It should be safe to check the
872	 * limits after loading the segments since we do
873	 * not actually fault in all the segments pages.
874	 */
875	PROC_LOCK(imgp->proc);
876	if (data_size > lim_cur(imgp->proc, RLIMIT_DATA) ||
877	    text_size > maxtsiz ||
878	    total_size > lim_cur(imgp->proc, RLIMIT_VMEM)) {
879		PROC_UNLOCK(imgp->proc);
880		return (ENOMEM);
881	}
882
883	vmspace->vm_tsize = text_size >> PAGE_SHIFT;
884	vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
885	vmspace->vm_dsize = data_size >> PAGE_SHIFT;
886	vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr;
887
888	/*
889	 * We load the dynamic linker where a userland call
890	 * to mmap(0, ...) would put it.  The rationale behind this
891	 * calculation is that it leaves room for the heap to grow to
892	 * its maximum allowed size.
893	 */
894	addr = round_page((vm_offset_t)imgp->proc->p_vmspace->vm_daddr +
895	    lim_max(imgp->proc, RLIMIT_DATA));
896	PROC_UNLOCK(imgp->proc);
897
898	imgp->entry_addr = entry;
899
900	if (interp != NULL) {
901		int have_interp = FALSE;
902		VOP_UNLOCK(imgp->vp, 0);
903		if (brand_info->emul_path != NULL &&
904		    brand_info->emul_path[0] != '\0') {
905			path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
906			snprintf(path, MAXPATHLEN, "%s%s",
907			    brand_info->emul_path, interp);
908			error = __elfN(load_file)(imgp->proc, path, &addr,
909			    &imgp->entry_addr, sv->sv_pagesize);
910			free(path, M_TEMP);
911			if (error == 0)
912				have_interp = TRUE;
913		}
914		if (!have_interp && newinterp != NULL) {
915			error = __elfN(load_file)(imgp->proc, newinterp, &addr,
916			    &imgp->entry_addr, sv->sv_pagesize);
917			if (error == 0)
918				have_interp = TRUE;
919		}
920		if (!have_interp) {
921			error = __elfN(load_file)(imgp->proc, interp, &addr,
922			    &imgp->entry_addr, sv->sv_pagesize);
923		}
924		vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
925		if (error != 0) {
926			uprintf("ELF interpreter %s not found\n", interp);
927			return (error);
928		}
929	} else
930		addr = et_dyn_addr;
931
932	/*
933	 * Construct auxargs table (used by the fixup routine)
934	 */
935	elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK);
936	elf_auxargs->execfd = -1;
937	elf_auxargs->phdr = proghdr;
938	elf_auxargs->phent = hdr->e_phentsize;
939	elf_auxargs->phnum = hdr->e_phnum;
940	elf_auxargs->pagesz = PAGE_SIZE;
941	elf_auxargs->base = addr;
942	elf_auxargs->flags = 0;
943	elf_auxargs->entry = entry;
944
945	imgp->auxargs = elf_auxargs;
946	imgp->interpreted = 0;
947	imgp->reloc_base = addr;
948	imgp->proc->p_osrel = osrel;
949
950	return (error);
951}
952
953#define	suword __CONCAT(suword, __ELF_WORD_SIZE)
954
955int
956__elfN(freebsd_fixup)(register_t **stack_base, struct image_params *imgp)
957{
958	Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs;
959	Elf_Addr *base;
960	Elf_Addr *pos;
961
962	base = (Elf_Addr *)*stack_base;
963	pos = base + (imgp->args->argc + imgp->args->envc + 2);
964
965	if (args->execfd != -1)
966		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
967	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
968	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
969	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
970	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
971	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
972	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
973	AUXARGS_ENTRY(pos, AT_BASE, args->base);
974	if (imgp->execpathp != 0)
975		AUXARGS_ENTRY(pos, AT_EXECPATH, imgp->execpathp);
976	AUXARGS_ENTRY(pos, AT_OSRELDATE, osreldate);
977	if (imgp->canary != 0) {
978		AUXARGS_ENTRY(pos, AT_CANARY, imgp->canary);
979		AUXARGS_ENTRY(pos, AT_CANARYLEN, imgp->canarylen);
980	}
981	AUXARGS_ENTRY(pos, AT_NCPUS, mp_ncpus);
982	if (imgp->pagesizes != 0) {
983		AUXARGS_ENTRY(pos, AT_PAGESIZES, imgp->pagesizes);
984		AUXARGS_ENTRY(pos, AT_PAGESIZESLEN, imgp->pagesizeslen);
985	}
986	AUXARGS_ENTRY(pos, AT_NULL, 0);
987
988	free(imgp->auxargs, M_TEMP);
989	imgp->auxargs = NULL;
990
991	base--;
992	suword(base, (long)imgp->args->argc);
993	*stack_base = (register_t *)base;
994	return (0);
995}
996
997/*
998 * Code for generating ELF core dumps.
999 */
1000
1001typedef void (*segment_callback)(vm_map_entry_t, void *);
1002
1003/* Closure for cb_put_phdr(). */
1004struct phdr_closure {
1005	Elf_Phdr *phdr;		/* Program header to fill in */
1006	Elf_Off offset;		/* Offset of segment in core file */
1007};
1008
1009/* Closure for cb_size_segment(). */
1010struct sseg_closure {
1011	int count;		/* Count of writable segments. */
1012	size_t size;		/* Total size of all writable segments. */
1013};
1014
1015static void cb_put_phdr(vm_map_entry_t, void *);
1016static void cb_size_segment(vm_map_entry_t, void *);
1017static void each_writable_segment(struct thread *, segment_callback, void *);
1018static int __elfN(corehdr)(struct thread *, struct vnode *, struct ucred *,
1019    int, void *, size_t, gzFile);
1020static void __elfN(puthdr)(struct thread *, void *, size_t *, int);
1021static void __elfN(putnote)(void *, size_t *, const char *, int,
1022    const void *, size_t);
1023
1024#ifdef COMPRESS_USER_CORES
1025extern int compress_user_cores;
1026extern int compress_user_cores_gzlevel;
1027#endif
1028
1029static int
1030core_output(struct vnode *vp, void *base, size_t len, off_t offset,
1031    struct ucred *active_cred, struct ucred *file_cred,
1032    struct thread *td, char *core_buf, gzFile gzfile) {
1033
1034	int error;
1035	if (gzfile) {
1036#ifdef COMPRESS_USER_CORES
1037		error = compress_core(gzfile, base, core_buf, len, td);
1038#else
1039		panic("shouldn't be here");
1040#endif
1041	} else {
1042		error = vn_rdwr_inchunks(UIO_WRITE, vp, base, len, offset,
1043		    UIO_USERSPACE, IO_UNIT | IO_DIRECT, active_cred, file_cred,
1044		    NULL, td);
1045	}
1046	return (error);
1047}
1048
1049int
1050__elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags)
1051{
1052	struct ucred *cred = td->td_ucred;
1053	int error = 0;
1054	struct sseg_closure seginfo;
1055	void *hdr;
1056	size_t hdrsize;
1057
1058	gzFile gzfile = Z_NULL;
1059	char *core_buf = NULL;
1060#ifdef COMPRESS_USER_CORES
1061	char gzopen_flags[8];
1062	char *p;
1063	int doing_compress = flags & IMGACT_CORE_COMPRESS;
1064#endif
1065
1066	hdr = NULL;
1067
1068#ifdef COMPRESS_USER_CORES
1069        if (doing_compress) {
1070                p = gzopen_flags;
1071                *p++ = 'w';
1072                if (compress_user_cores_gzlevel >= 0 &&
1073                    compress_user_cores_gzlevel <= 9)
1074                        *p++ = '0' + compress_user_cores_gzlevel;
1075                *p = 0;
1076                gzfile = gz_open("", gzopen_flags, vp);
1077                if (gzfile == Z_NULL) {
1078                        error = EFAULT;
1079                        goto done;
1080                }
1081                core_buf = malloc(CORE_BUF_SIZE, M_TEMP, M_WAITOK | M_ZERO);
1082                if (!core_buf) {
1083                        error = ENOMEM;
1084                        goto done;
1085                }
1086        }
1087#endif
1088
1089	/* Size the program segments. */
1090	seginfo.count = 0;
1091	seginfo.size = 0;
1092	each_writable_segment(td, cb_size_segment, &seginfo);
1093
1094	/*
1095	 * Calculate the size of the core file header area by making
1096	 * a dry run of generating it.  Nothing is written, but the
1097	 * size is calculated.
1098	 */
1099	hdrsize = 0;
1100	__elfN(puthdr)(td, (void *)NULL, &hdrsize, seginfo.count);
1101
1102	if (hdrsize + seginfo.size >= limit) {
1103		error = EFAULT;
1104		goto done;
1105	}
1106
1107	/*
1108	 * Allocate memory for building the header, fill it up,
1109	 * and write it out.
1110	 */
1111	hdr = malloc(hdrsize, M_TEMP, M_WAITOK);
1112	if (hdr == NULL) {
1113		error = EINVAL;
1114		goto done;
1115	}
1116	error = __elfN(corehdr)(td, vp, cred, seginfo.count, hdr, hdrsize,
1117	    gzfile);
1118
1119	/* Write the contents of all of the writable segments. */
1120	if (error == 0) {
1121		Elf_Phdr *php;
1122		off_t offset;
1123		int i;
1124
1125		php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
1126		offset = hdrsize;
1127		for (i = 0; i < seginfo.count; i++) {
1128			error = core_output(vp, (caddr_t)(uintptr_t)php->p_vaddr,
1129			    php->p_filesz, offset, cred, NOCRED, curthread, core_buf, gzfile);
1130			if (error != 0)
1131				break;
1132			offset += php->p_filesz;
1133			php++;
1134		}
1135	}
1136	if (error) {
1137		log(LOG_WARNING,
1138		    "Failed to write core file for process %s (error %d)\n",
1139		    curproc->p_comm, error);
1140	}
1141
1142done:
1143#ifdef COMPRESS_USER_CORES
1144	if (core_buf)
1145		free(core_buf, M_TEMP);
1146	if (gzfile)
1147		gzclose(gzfile);
1148#endif
1149
1150	free(hdr, M_TEMP);
1151
1152	return (error);
1153}
1154
1155/*
1156 * A callback for each_writable_segment() to write out the segment's
1157 * program header entry.
1158 */
1159static void
1160cb_put_phdr(entry, closure)
1161	vm_map_entry_t entry;
1162	void *closure;
1163{
1164	struct phdr_closure *phc = (struct phdr_closure *)closure;
1165	Elf_Phdr *phdr = phc->phdr;
1166
1167	phc->offset = round_page(phc->offset);
1168
1169	phdr->p_type = PT_LOAD;
1170	phdr->p_offset = phc->offset;
1171	phdr->p_vaddr = entry->start;
1172	phdr->p_paddr = 0;
1173	phdr->p_filesz = phdr->p_memsz = entry->end - entry->start;
1174	phdr->p_align = PAGE_SIZE;
1175	phdr->p_flags = 0;
1176	if (entry->protection & VM_PROT_READ)
1177		phdr->p_flags |= PF_R;
1178	if (entry->protection & VM_PROT_WRITE)
1179		phdr->p_flags |= PF_W;
1180	if (entry->protection & VM_PROT_EXECUTE)
1181		phdr->p_flags |= PF_X;
1182
1183	phc->offset += phdr->p_filesz;
1184	phc->phdr++;
1185}
1186
1187/*
1188 * A callback for each_writable_segment() to gather information about
1189 * the number of segments and their total size.
1190 */
1191static void
1192cb_size_segment(entry, closure)
1193	vm_map_entry_t entry;
1194	void *closure;
1195{
1196	struct sseg_closure *ssc = (struct sseg_closure *)closure;
1197
1198	ssc->count++;
1199	ssc->size += entry->end - entry->start;
1200}
1201
1202/*
1203 * For each writable segment in the process's memory map, call the given
1204 * function with a pointer to the map entry and some arbitrary
1205 * caller-supplied data.
1206 */
1207static void
1208each_writable_segment(td, func, closure)
1209	struct thread *td;
1210	segment_callback func;
1211	void *closure;
1212{
1213	struct proc *p = td->td_proc;
1214	vm_map_t map = &p->p_vmspace->vm_map;
1215	vm_map_entry_t entry;
1216	vm_object_t backing_object, object;
1217	boolean_t ignore_entry;
1218
1219	vm_map_lock_read(map);
1220	for (entry = map->header.next; entry != &map->header;
1221	    entry = entry->next) {
1222		/*
1223		 * Don't dump inaccessible mappings, deal with legacy
1224		 * coredump mode.
1225		 *
1226		 * Note that read-only segments related to the elf binary
1227		 * are marked MAP_ENTRY_NOCOREDUMP now so we no longer
1228		 * need to arbitrarily ignore such segments.
1229		 */
1230		if (elf_legacy_coredump) {
1231			if ((entry->protection & VM_PROT_RW) != VM_PROT_RW)
1232				continue;
1233		} else {
1234			if ((entry->protection & VM_PROT_ALL) == 0)
1235				continue;
1236		}
1237
1238		/*
1239		 * Dont include memory segment in the coredump if
1240		 * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in
1241		 * madvise(2).  Do not dump submaps (i.e. parts of the
1242		 * kernel map).
1243		 */
1244		if (entry->eflags & (MAP_ENTRY_NOCOREDUMP|MAP_ENTRY_IS_SUB_MAP))
1245			continue;
1246
1247		if ((object = entry->object.vm_object) == NULL)
1248			continue;
1249
1250		/* Ignore memory-mapped devices and such things. */
1251		VM_OBJECT_LOCK(object);
1252		while ((backing_object = object->backing_object) != NULL) {
1253			VM_OBJECT_LOCK(backing_object);
1254			VM_OBJECT_UNLOCK(object);
1255			object = backing_object;
1256		}
1257		ignore_entry = object->type != OBJT_DEFAULT &&
1258		    object->type != OBJT_SWAP && object->type != OBJT_VNODE;
1259		VM_OBJECT_UNLOCK(object);
1260		if (ignore_entry)
1261			continue;
1262
1263		(*func)(entry, closure);
1264	}
1265	vm_map_unlock_read(map);
1266}
1267
1268/*
1269 * Write the core file header to the file, including padding up to
1270 * the page boundary.
1271 */
1272static int
1273__elfN(corehdr)(td, vp, cred, numsegs, hdr, hdrsize, gzfile)
1274	struct thread *td;
1275	struct vnode *vp;
1276	struct ucred *cred;
1277	int numsegs;
1278	size_t hdrsize;
1279	void *hdr;
1280	gzFile gzfile;
1281{
1282	size_t off;
1283
1284	/* Fill in the header. */
1285	bzero(hdr, hdrsize);
1286	off = 0;
1287	__elfN(puthdr)(td, hdr, &off, numsegs);
1288
1289	if (!gzfile) {
1290		/* Write it to the core file. */
1291		return (vn_rdwr_inchunks(UIO_WRITE, vp, hdr, hdrsize, (off_t)0,
1292			UIO_SYSSPACE, IO_UNIT | IO_DIRECT, cred, NOCRED, NULL,
1293			td));
1294	} else {
1295#ifdef COMPRESS_USER_CORES
1296		if (gzwrite(gzfile, hdr, hdrsize) != hdrsize) {
1297			log(LOG_WARNING,
1298			    "Failed to compress core file header for process"
1299			    " %s.\n", curproc->p_comm);
1300			return (EFAULT);
1301		}
1302		else {
1303			return (0);
1304		}
1305#else
1306		panic("shouldn't be here");
1307#endif
1308	}
1309}
1310
1311#if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
1312#include <compat/freebsd32/freebsd32.h>
1313
1314typedef struct prstatus32 elf_prstatus_t;
1315typedef struct prpsinfo32 elf_prpsinfo_t;
1316typedef struct fpreg32 elf_prfpregset_t;
1317typedef struct fpreg32 elf_fpregset_t;
1318typedef struct reg32 elf_gregset_t;
1319typedef struct thrmisc32 elf_thrmisc_t;
1320#else
1321typedef prstatus_t elf_prstatus_t;
1322typedef prpsinfo_t elf_prpsinfo_t;
1323typedef prfpregset_t elf_prfpregset_t;
1324typedef prfpregset_t elf_fpregset_t;
1325typedef gregset_t elf_gregset_t;
1326typedef thrmisc_t elf_thrmisc_t;
1327#endif
1328
1329static void
1330__elfN(puthdr)(struct thread *td, void *dst, size_t *off, int numsegs)
1331{
1332	struct {
1333		elf_prstatus_t status;
1334		elf_prfpregset_t fpregset;
1335		elf_prpsinfo_t psinfo;
1336		elf_thrmisc_t thrmisc;
1337	} *tempdata;
1338	elf_prstatus_t *status;
1339	elf_prfpregset_t *fpregset;
1340	elf_prpsinfo_t *psinfo;
1341	elf_thrmisc_t *thrmisc;
1342	struct proc *p;
1343	struct thread *thr;
1344	size_t ehoff, noteoff, notesz, phoff;
1345
1346	p = td->td_proc;
1347
1348	ehoff = *off;
1349	*off += sizeof(Elf_Ehdr);
1350
1351	phoff = *off;
1352	*off += (numsegs + 1) * sizeof(Elf_Phdr);
1353
1354	noteoff = *off;
1355	/*
1356	 * Don't allocate space for the notes if we're just calculating
1357	 * the size of the header. We also don't collect the data.
1358	 */
1359	if (dst != NULL) {
1360		tempdata = malloc(sizeof(*tempdata), M_TEMP, M_ZERO|M_WAITOK);
1361		status = &tempdata->status;
1362		fpregset = &tempdata->fpregset;
1363		psinfo = &tempdata->psinfo;
1364		thrmisc = &tempdata->thrmisc;
1365	} else {
1366		tempdata = NULL;
1367		status = NULL;
1368		fpregset = NULL;
1369		psinfo = NULL;
1370		thrmisc = NULL;
1371	}
1372
1373	if (dst != NULL) {
1374		psinfo->pr_version = PRPSINFO_VERSION;
1375		psinfo->pr_psinfosz = sizeof(elf_prpsinfo_t);
1376		strlcpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname));
1377		/*
1378		 * XXX - We don't fill in the command line arguments properly
1379		 * yet.
1380		 */
1381		strlcpy(psinfo->pr_psargs, p->p_comm,
1382		    sizeof(psinfo->pr_psargs));
1383	}
1384	__elfN(putnote)(dst, off, "FreeBSD", NT_PRPSINFO, psinfo,
1385	    sizeof *psinfo);
1386
1387	/*
1388	 * To have the debugger select the right thread (LWP) as the initial
1389	 * thread, we dump the state of the thread passed to us in td first.
1390	 * This is the thread that causes the core dump and thus likely to
1391	 * be the right thread one wants to have selected in the debugger.
1392	 */
1393	thr = td;
1394	while (thr != NULL) {
1395		if (dst != NULL) {
1396			status->pr_version = PRSTATUS_VERSION;
1397			status->pr_statussz = sizeof(elf_prstatus_t);
1398			status->pr_gregsetsz = sizeof(elf_gregset_t);
1399			status->pr_fpregsetsz = sizeof(elf_fpregset_t);
1400			status->pr_osreldate = osreldate;
1401			status->pr_cursig = p->p_sig;
1402			status->pr_pid = thr->td_tid;
1403#if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
1404			fill_regs32(thr, &status->pr_reg);
1405			fill_fpregs32(thr, fpregset);
1406#else
1407			fill_regs(thr, &status->pr_reg);
1408			fill_fpregs(thr, fpregset);
1409#endif
1410			memset(&thrmisc->_pad, 0, sizeof (thrmisc->_pad));
1411			strcpy(thrmisc->pr_tname, thr->td_name);
1412		}
1413		__elfN(putnote)(dst, off, "FreeBSD", NT_PRSTATUS, status,
1414		    sizeof *status);
1415		__elfN(putnote)(dst, off, "FreeBSD", NT_FPREGSET, fpregset,
1416		    sizeof *fpregset);
1417		__elfN(putnote)(dst, off, "FreeBSD", NT_THRMISC, thrmisc,
1418		    sizeof *thrmisc);
1419		/*
1420		 * Allow for MD specific notes, as well as any MD
1421		 * specific preparations for writing MI notes.
1422		 */
1423		__elfN(dump_thread)(thr, dst, off);
1424
1425		thr = (thr == td) ? TAILQ_FIRST(&p->p_threads) :
1426		    TAILQ_NEXT(thr, td_plist);
1427		if (thr == td)
1428			thr = TAILQ_NEXT(thr, td_plist);
1429	}
1430
1431	notesz = *off - noteoff;
1432
1433	if (dst != NULL)
1434		free(tempdata, M_TEMP);
1435
1436	/* Align up to a page boundary for the program segments. */
1437	*off = round_page(*off);
1438
1439	if (dst != NULL) {
1440		Elf_Ehdr *ehdr;
1441		Elf_Phdr *phdr;
1442		struct phdr_closure phc;
1443
1444		/*
1445		 * Fill in the ELF header.
1446		 */
1447		ehdr = (Elf_Ehdr *)((char *)dst + ehoff);
1448		ehdr->e_ident[EI_MAG0] = ELFMAG0;
1449		ehdr->e_ident[EI_MAG1] = ELFMAG1;
1450		ehdr->e_ident[EI_MAG2] = ELFMAG2;
1451		ehdr->e_ident[EI_MAG3] = ELFMAG3;
1452		ehdr->e_ident[EI_CLASS] = ELF_CLASS;
1453		ehdr->e_ident[EI_DATA] = ELF_DATA;
1454		ehdr->e_ident[EI_VERSION] = EV_CURRENT;
1455		ehdr->e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
1456		ehdr->e_ident[EI_ABIVERSION] = 0;
1457		ehdr->e_ident[EI_PAD] = 0;
1458		ehdr->e_type = ET_CORE;
1459#if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
1460		ehdr->e_machine = ELF_ARCH32;
1461#else
1462		ehdr->e_machine = ELF_ARCH;
1463#endif
1464		ehdr->e_version = EV_CURRENT;
1465		ehdr->e_entry = 0;
1466		ehdr->e_phoff = phoff;
1467		ehdr->e_flags = 0;
1468		ehdr->e_ehsize = sizeof(Elf_Ehdr);
1469		ehdr->e_phentsize = sizeof(Elf_Phdr);
1470		ehdr->e_phnum = numsegs + 1;
1471		ehdr->e_shentsize = sizeof(Elf_Shdr);
1472		ehdr->e_shnum = 0;
1473		ehdr->e_shstrndx = SHN_UNDEF;
1474
1475		/*
1476		 * Fill in the program header entries.
1477		 */
1478		phdr = (Elf_Phdr *)((char *)dst + phoff);
1479
1480		/* The note segement. */
1481		phdr->p_type = PT_NOTE;
1482		phdr->p_offset = noteoff;
1483		phdr->p_vaddr = 0;
1484		phdr->p_paddr = 0;
1485		phdr->p_filesz = notesz;
1486		phdr->p_memsz = 0;
1487		phdr->p_flags = 0;
1488		phdr->p_align = 0;
1489		phdr++;
1490
1491		/* All the writable segments from the program. */
1492		phc.phdr = phdr;
1493		phc.offset = *off;
1494		each_writable_segment(td, cb_put_phdr, &phc);
1495	}
1496}
1497
1498static void
1499__elfN(putnote)(void *dst, size_t *off, const char *name, int type,
1500    const void *desc, size_t descsz)
1501{
1502	Elf_Note note;
1503
1504	note.n_namesz = strlen(name) + 1;
1505	note.n_descsz = descsz;
1506	note.n_type = type;
1507	if (dst != NULL)
1508		bcopy(&note, (char *)dst + *off, sizeof note);
1509	*off += sizeof note;
1510	if (dst != NULL)
1511		bcopy(name, (char *)dst + *off, note.n_namesz);
1512	*off += roundup2(note.n_namesz, sizeof(Elf_Size));
1513	if (dst != NULL)
1514		bcopy(desc, (char *)dst + *off, note.n_descsz);
1515	*off += roundup2(note.n_descsz, sizeof(Elf_Size));
1516}
1517
1518/*
1519 * Try to find the appropriate ABI-note section for checknote,
1520 * fetch the osreldate for binary from the ELF OSABI-note. Only the
1521 * first page of the image is searched, the same as for headers.
1522 */
1523static boolean_t
1524__elfN(check_note)(struct image_params *imgp, Elf_Brandnote *checknote,
1525    int32_t *osrel)
1526{
1527	const Elf_Note *note, *note0, *note_end;
1528	const Elf_Phdr *phdr, *pnote;
1529	const Elf_Ehdr *hdr;
1530	const char *note_name;
1531	int i;
1532
1533	pnote = NULL;
1534	hdr = (const Elf_Ehdr *)imgp->image_header;
1535	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
1536
1537	for (i = 0; i < hdr->e_phnum; i++) {
1538		if (phdr[i].p_type == PT_NOTE) {
1539			pnote = &phdr[i];
1540			break;
1541		}
1542	}
1543
1544	if (pnote == NULL || pnote->p_offset >= PAGE_SIZE ||
1545	    pnote->p_offset + pnote->p_filesz >= PAGE_SIZE)
1546		return (FALSE);
1547
1548	note = note0 = (const Elf_Note *)(imgp->image_header + pnote->p_offset);
1549	note_end = (const Elf_Note *)(imgp->image_header +
1550	    pnote->p_offset + pnote->p_filesz);
1551	for (i = 0; i < 100 && note >= note0 && note < note_end; i++) {
1552		if (!aligned(note, Elf32_Addr))
1553			return (FALSE);
1554		if (note->n_namesz != checknote->hdr.n_namesz ||
1555		    note->n_descsz != checknote->hdr.n_descsz ||
1556		    note->n_type != checknote->hdr.n_type)
1557			goto nextnote;
1558		note_name = (const char *)(note + 1);
1559		if (strncmp(checknote->vendor, note_name,
1560		    checknote->hdr.n_namesz) != 0)
1561			goto nextnote;
1562
1563		/*
1564		 * Fetch the osreldate for binary
1565		 * from the ELF OSABI-note if necessary.
1566		 */
1567		if ((checknote->flags & BN_TRANSLATE_OSREL) != 0 &&
1568		    checknote->trans_osrel != NULL)
1569			return (checknote->trans_osrel(note, osrel));
1570		return (TRUE);
1571
1572nextnote:
1573		note = (const Elf_Note *)((const char *)(note + 1) +
1574		    roundup2(note->n_namesz, sizeof(Elf32_Addr)) +
1575		    roundup2(note->n_descsz, sizeof(Elf32_Addr)));
1576	}
1577
1578	return (FALSE);
1579}
1580
1581/*
1582 * Tell kern_execve.c about it, with a little help from the linker.
1583 */
1584static struct execsw __elfN(execsw) = {
1585	__CONCAT(exec_, __elfN(imgact)),
1586	__XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
1587};
1588EXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw));
1589
1590#ifdef COMPRESS_USER_CORES
1591/*
1592 * Compress and write out a core segment for a user process.
1593 *
1594 * 'inbuf' is the starting address of a VM segment in the process' address
1595 * space that is to be compressed and written out to the core file.  'dest_buf'
1596 * is a buffer in the kernel's address space.  The segment is copied from
1597 * 'inbuf' to 'dest_buf' first before being processed by the compression
1598 * routine gzwrite().  This copying is necessary because the content of the VM
1599 * segment may change between the compression pass and the crc-computation pass
1600 * in gzwrite().  This is because realtime threads may preempt the UNIX kernel.
1601 */
1602static int
1603compress_core (gzFile file, char *inbuf, char *dest_buf, unsigned int len,
1604    struct thread *td)
1605{
1606	int len_compressed;
1607	int error = 0;
1608	unsigned int chunk_len;
1609
1610	while (len) {
1611		chunk_len = (len > CORE_BUF_SIZE) ? CORE_BUF_SIZE : len;
1612		copyin(inbuf, dest_buf, chunk_len);
1613		len_compressed = gzwrite(file, dest_buf, chunk_len);
1614
1615		EVENTHANDLER_INVOKE(app_coredump_progress, td, len_compressed);
1616
1617		if ((unsigned int)len_compressed != chunk_len) {
1618			log(LOG_WARNING,
1619			    "compress_core: length mismatch (0x%x returned, "
1620			    "0x%x expected)\n", len_compressed, chunk_len);
1621			EVENTHANDLER_INVOKE(app_coredump_error, td,
1622			    "compress_core: length mismatch %x -> %x",
1623			    chunk_len, len_compressed);
1624			error = EFAULT;
1625			break;
1626		}
1627		inbuf += chunk_len;
1628		len -= chunk_len;
1629		if (ticks - PCPU_GET(switchticks) >= hogticks)
1630			uio_yield();
1631	}
1632
1633	return (error);
1634}
1635#endif /* COMPRESS_USER_CORES */
1636