imgact_elf.c revision 253939
1/*-
2 * Copyright (c) 2000 David O'Brien
3 * Copyright (c) 1995-1996 S��ren Schmidt
4 * Copyright (c) 1996 Peter Wemm
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer
12 *    in this position and unchanged.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 *    derived from this software without specific prior written permission
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: head/sys/kern/imgact_elf.c 253939 2013-08-04 21:07:24Z attilio $");
33
34#include "opt_capsicum.h"
35#include "opt_compat.h"
36#include "opt_core.h"
37
38#include <sys/param.h>
39#include <sys/capability.h>
40#include <sys/exec.h>
41#include <sys/fcntl.h>
42#include <sys/imgact.h>
43#include <sys/imgact_elf.h>
44#include <sys/kernel.h>
45#include <sys/lock.h>
46#include <sys/malloc.h>
47#include <sys/mount.h>
48#include <sys/mman.h>
49#include <sys/namei.h>
50#include <sys/pioctl.h>
51#include <sys/proc.h>
52#include <sys/procfs.h>
53#include <sys/racct.h>
54#include <sys/resourcevar.h>
55#include <sys/rwlock.h>
56#include <sys/sbuf.h>
57#include <sys/sf_buf.h>
58#include <sys/smp.h>
59#include <sys/systm.h>
60#include <sys/signalvar.h>
61#include <sys/stat.h>
62#include <sys/sx.h>
63#include <sys/syscall.h>
64#include <sys/sysctl.h>
65#include <sys/sysent.h>
66#include <sys/vnode.h>
67#include <sys/syslog.h>
68#include <sys/eventhandler.h>
69#include <sys/user.h>
70
71#include <net/zlib.h>
72
73#include <vm/vm.h>
74#include <vm/vm_kern.h>
75#include <vm/vm_param.h>
76#include <vm/pmap.h>
77#include <vm/vm_map.h>
78#include <vm/vm_object.h>
79#include <vm/vm_extern.h>
80
81#include <machine/elf.h>
82#include <machine/md_var.h>
83
84#define ELF_NOTE_ROUNDSIZE	4
85#define OLD_EI_BRAND	8
86
87static int __elfN(check_header)(const Elf_Ehdr *hdr);
88static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp,
89    const char *interp, int interp_name_len, int32_t *osrel);
90static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
91    u_long *entry, size_t pagesize);
92static int __elfN(load_section)(struct image_params *imgp, vm_offset_t offset,
93    caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot,
94    size_t pagesize);
95static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp);
96static boolean_t __elfN(freebsd_trans_osrel)(const Elf_Note *note,
97    int32_t *osrel);
98static boolean_t kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel);
99static boolean_t __elfN(check_note)(struct image_params *imgp,
100    Elf_Brandnote *checknote, int32_t *osrel);
101static vm_prot_t __elfN(trans_prot)(Elf_Word);
102static Elf_Word __elfN(untrans_prot)(vm_prot_t);
103
104SYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE), CTLFLAG_RW, 0,
105    "");
106
107#ifdef COMPRESS_USER_CORES
108static int compress_core(gzFile, char *, char *, unsigned int,
109    struct thread * td);
110#endif
111#define CORE_BUF_SIZE	(16 * 1024)
112
113int __elfN(fallback_brand) = -1;
114SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
115    fallback_brand, CTLFLAG_RW, &__elfN(fallback_brand), 0,
116    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort");
117TUNABLE_INT("kern.elf" __XSTRING(__ELF_WORD_SIZE) ".fallback_brand",
118    &__elfN(fallback_brand));
119
120static int elf_legacy_coredump = 0;
121SYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW,
122    &elf_legacy_coredump, 0, "");
123
124int __elfN(nxstack) =
125#if defined(__amd64__) || defined(__powerpc64__) /* both 64 and 32 bit */
126	1;
127#else
128	0;
129#endif
130SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
131    nxstack, CTLFLAG_RW, &__elfN(nxstack), 0,
132    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": enable non-executable stack");
133
134#if __ELF_WORD_SIZE == 32
135#if defined(__amd64__) || defined(__ia64__)
136int i386_read_exec = 0;
137SYSCTL_INT(_kern_elf32, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0,
138    "enable execution from readable segments");
139#endif
140#endif
141
142static Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
143
144#define	trunc_page_ps(va, ps)	((va) & ~(ps - 1))
145#define	round_page_ps(va, ps)	(((va) + (ps - 1)) & ~(ps - 1))
146#define	aligned(a, t)	(trunc_page_ps((u_long)(a), sizeof(t)) == (u_long)(a))
147
148static const char FREEBSD_ABI_VENDOR[] = "FreeBSD";
149
150Elf_Brandnote __elfN(freebsd_brandnote) = {
151	.hdr.n_namesz	= sizeof(FREEBSD_ABI_VENDOR),
152	.hdr.n_descsz	= sizeof(int32_t),
153	.hdr.n_type	= 1,
154	.vendor		= FREEBSD_ABI_VENDOR,
155	.flags		= BN_TRANSLATE_OSREL,
156	.trans_osrel	= __elfN(freebsd_trans_osrel)
157};
158
159static boolean_t
160__elfN(freebsd_trans_osrel)(const Elf_Note *note, int32_t *osrel)
161{
162	uintptr_t p;
163
164	p = (uintptr_t)(note + 1);
165	p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE);
166	*osrel = *(const int32_t *)(p);
167
168	return (TRUE);
169}
170
171static const char GNU_ABI_VENDOR[] = "GNU";
172static int GNU_KFREEBSD_ABI_DESC = 3;
173
174Elf_Brandnote __elfN(kfreebsd_brandnote) = {
175	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
176	.hdr.n_descsz	= 16,	/* XXX at least 16 */
177	.hdr.n_type	= 1,
178	.vendor		= GNU_ABI_VENDOR,
179	.flags		= BN_TRANSLATE_OSREL,
180	.trans_osrel	= kfreebsd_trans_osrel
181};
182
183static boolean_t
184kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel)
185{
186	const Elf32_Word *desc;
187	uintptr_t p;
188
189	p = (uintptr_t)(note + 1);
190	p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE);
191
192	desc = (const Elf32_Word *)p;
193	if (desc[0] != GNU_KFREEBSD_ABI_DESC)
194		return (FALSE);
195
196	/*
197	 * Debian GNU/kFreeBSD embed the earliest compatible kernel version
198	 * (__FreeBSD_version: <major><two digit minor>Rxx) in the LSB way.
199	 */
200	*osrel = desc[1] * 100000 + desc[2] * 1000 + desc[3];
201
202	return (TRUE);
203}
204
205int
206__elfN(insert_brand_entry)(Elf_Brandinfo *entry)
207{
208	int i;
209
210	for (i = 0; i < MAX_BRANDS; i++) {
211		if (elf_brand_list[i] == NULL) {
212			elf_brand_list[i] = entry;
213			break;
214		}
215	}
216	if (i == MAX_BRANDS) {
217		printf("WARNING: %s: could not insert brandinfo entry: %p\n",
218			__func__, entry);
219		return (-1);
220	}
221	return (0);
222}
223
224int
225__elfN(remove_brand_entry)(Elf_Brandinfo *entry)
226{
227	int i;
228
229	for (i = 0; i < MAX_BRANDS; i++) {
230		if (elf_brand_list[i] == entry) {
231			elf_brand_list[i] = NULL;
232			break;
233		}
234	}
235	if (i == MAX_BRANDS)
236		return (-1);
237	return (0);
238}
239
240int
241__elfN(brand_inuse)(Elf_Brandinfo *entry)
242{
243	struct proc *p;
244	int rval = FALSE;
245
246	sx_slock(&allproc_lock);
247	FOREACH_PROC_IN_SYSTEM(p) {
248		if (p->p_sysent == entry->sysvec) {
249			rval = TRUE;
250			break;
251		}
252	}
253	sx_sunlock(&allproc_lock);
254
255	return (rval);
256}
257
258static Elf_Brandinfo *
259__elfN(get_brandinfo)(struct image_params *imgp, const char *interp,
260    int interp_name_len, int32_t *osrel)
261{
262	const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
263	Elf_Brandinfo *bi;
264	boolean_t ret;
265	int i;
266
267	/*
268	 * We support four types of branding -- (1) the ELF EI_OSABI field
269	 * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string
270	 * branding w/in the ELF header, (3) path of the `interp_path'
271	 * field, and (4) the ".note.ABI-tag" ELF section.
272	 */
273
274	/* Look for an ".note.ABI-tag" ELF section */
275	for (i = 0; i < MAX_BRANDS; i++) {
276		bi = elf_brand_list[i];
277		if (bi == NULL)
278			continue;
279		if (hdr->e_machine == bi->machine && (bi->flags &
280		    (BI_BRAND_NOTE|BI_BRAND_NOTE_MANDATORY)) != 0) {
281			ret = __elfN(check_note)(imgp, bi->brand_note, osrel);
282			if (ret)
283				return (bi);
284		}
285	}
286
287	/* If the executable has a brand, search for it in the brand list. */
288	for (i = 0; i < MAX_BRANDS; i++) {
289		bi = elf_brand_list[i];
290		if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY)
291			continue;
292		if (hdr->e_machine == bi->machine &&
293		    (hdr->e_ident[EI_OSABI] == bi->brand ||
294		    strncmp((const char *)&hdr->e_ident[OLD_EI_BRAND],
295		    bi->compat_3_brand, strlen(bi->compat_3_brand)) == 0))
296			return (bi);
297	}
298
299	/* Lacking a known brand, search for a recognized interpreter. */
300	if (interp != NULL) {
301		for (i = 0; i < MAX_BRANDS; i++) {
302			bi = elf_brand_list[i];
303			if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY)
304				continue;
305			if (hdr->e_machine == bi->machine &&
306			    /* ELF image p_filesz includes terminating zero */
307			    strlen(bi->interp_path) + 1 == interp_name_len &&
308			    strncmp(interp, bi->interp_path, interp_name_len)
309			    == 0)
310				return (bi);
311		}
312	}
313
314	/* Lacking a recognized interpreter, try the default brand */
315	for (i = 0; i < MAX_BRANDS; i++) {
316		bi = elf_brand_list[i];
317		if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY)
318			continue;
319		if (hdr->e_machine == bi->machine &&
320		    __elfN(fallback_brand) == bi->brand)
321			return (bi);
322	}
323	return (NULL);
324}
325
326static int
327__elfN(check_header)(const Elf_Ehdr *hdr)
328{
329	Elf_Brandinfo *bi;
330	int i;
331
332	if (!IS_ELF(*hdr) ||
333	    hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
334	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
335	    hdr->e_ident[EI_VERSION] != EV_CURRENT ||
336	    hdr->e_phentsize != sizeof(Elf_Phdr) ||
337	    hdr->e_version != ELF_TARG_VER)
338		return (ENOEXEC);
339
340	/*
341	 * Make sure we have at least one brand for this machine.
342	 */
343
344	for (i = 0; i < MAX_BRANDS; i++) {
345		bi = elf_brand_list[i];
346		if (bi != NULL && bi->machine == hdr->e_machine)
347			break;
348	}
349	if (i == MAX_BRANDS)
350		return (ENOEXEC);
351
352	return (0);
353}
354
355static int
356__elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
357    vm_offset_t start, vm_offset_t end, vm_prot_t prot)
358{
359	struct sf_buf *sf;
360	int error;
361	vm_offset_t off;
362
363	/*
364	 * Create the page if it doesn't exist yet. Ignore errors.
365	 */
366	vm_map_lock(map);
367	vm_map_insert(map, NULL, 0, trunc_page(start), round_page(end),
368	    VM_PROT_ALL, VM_PROT_ALL, 0);
369	vm_map_unlock(map);
370
371	/*
372	 * Find the page from the underlying object.
373	 */
374	if (object) {
375		sf = vm_imgact_map_page(object, offset);
376		if (sf == NULL)
377			return (KERN_FAILURE);
378		off = offset - trunc_page(offset);
379		error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start,
380		    end - start);
381		vm_imgact_unmap_page(object, sf);
382		if (error) {
383			return (KERN_FAILURE);
384		}
385	}
386
387	return (KERN_SUCCESS);
388}
389
390static int
391__elfN(map_insert)(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
392    vm_offset_t start, vm_offset_t end, vm_prot_t prot, int cow)
393{
394	struct sf_buf *sf;
395	vm_offset_t off;
396	vm_size_t sz;
397	int error, rv;
398
399	if (start != trunc_page(start)) {
400		rv = __elfN(map_partial)(map, object, offset, start,
401		    round_page(start), prot);
402		if (rv)
403			return (rv);
404		offset += round_page(start) - start;
405		start = round_page(start);
406	}
407	if (end != round_page(end)) {
408		rv = __elfN(map_partial)(map, object, offset +
409		    trunc_page(end) - start, trunc_page(end), end, prot);
410		if (rv)
411			return (rv);
412		end = trunc_page(end);
413	}
414	if (end > start) {
415		if (offset & PAGE_MASK) {
416			/*
417			 * The mapping is not page aligned. This means we have
418			 * to copy the data. Sigh.
419			 */
420			rv = vm_map_find(map, NULL, 0, &start, end - start,
421			    FALSE, prot | VM_PROT_WRITE, VM_PROT_ALL, 0);
422			if (rv)
423				return (rv);
424			if (object == NULL)
425				return (KERN_SUCCESS);
426			for (; start < end; start += sz) {
427				sf = vm_imgact_map_page(object, offset);
428				if (sf == NULL)
429					return (KERN_FAILURE);
430				off = offset - trunc_page(offset);
431				sz = end - start;
432				if (sz > PAGE_SIZE - off)
433					sz = PAGE_SIZE - off;
434				error = copyout((caddr_t)sf_buf_kva(sf) + off,
435				    (caddr_t)start, sz);
436				vm_imgact_unmap_page(object, sf);
437				if (error) {
438					return (KERN_FAILURE);
439				}
440				offset += sz;
441			}
442			rv = KERN_SUCCESS;
443		} else {
444			vm_object_reference(object);
445			vm_map_lock(map);
446			rv = vm_map_insert(map, object, offset, start, end,
447			    prot, VM_PROT_ALL, cow);
448			vm_map_unlock(map);
449			if (rv != KERN_SUCCESS)
450				vm_object_deallocate(object);
451		}
452		return (rv);
453	} else {
454		return (KERN_SUCCESS);
455	}
456}
457
458static int
459__elfN(load_section)(struct image_params *imgp, vm_offset_t offset,
460    caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot,
461    size_t pagesize)
462{
463	struct sf_buf *sf;
464	size_t map_len;
465	vm_map_t map;
466	vm_object_t object;
467	vm_offset_t map_addr;
468	int error, rv, cow;
469	size_t copy_len;
470	vm_offset_t file_addr;
471
472	/*
473	 * It's necessary to fail if the filsz + offset taken from the
474	 * header is greater than the actual file pager object's size.
475	 * If we were to allow this, then the vm_map_find() below would
476	 * walk right off the end of the file object and into the ether.
477	 *
478	 * While I'm here, might as well check for something else that
479	 * is invalid: filsz cannot be greater than memsz.
480	 */
481	if ((off_t)filsz + offset > imgp->attr->va_size || filsz > memsz) {
482		uprintf("elf_load_section: truncated ELF file\n");
483		return (ENOEXEC);
484	}
485
486	object = imgp->object;
487	map = &imgp->proc->p_vmspace->vm_map;
488	map_addr = trunc_page_ps((vm_offset_t)vmaddr, pagesize);
489	file_addr = trunc_page_ps(offset, pagesize);
490
491	/*
492	 * We have two choices.  We can either clear the data in the last page
493	 * of an oversized mapping, or we can start the anon mapping a page
494	 * early and copy the initialized data into that first page.  We
495	 * choose the second..
496	 */
497	if (memsz > filsz)
498		map_len = trunc_page_ps(offset + filsz, pagesize) - file_addr;
499	else
500		map_len = round_page_ps(offset + filsz, pagesize) - file_addr;
501
502	if (map_len != 0) {
503		/* cow flags: don't dump readonly sections in core */
504		cow = MAP_COPY_ON_WRITE | MAP_PREFAULT |
505		    (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP);
506
507		rv = __elfN(map_insert)(map,
508				      object,
509				      file_addr,	/* file offset */
510				      map_addr,		/* virtual start */
511				      map_addr + map_len,/* virtual end */
512				      prot,
513				      cow);
514		if (rv != KERN_SUCCESS)
515			return (EINVAL);
516
517		/* we can stop now if we've covered it all */
518		if (memsz == filsz) {
519			return (0);
520		}
521	}
522
523
524	/*
525	 * We have to get the remaining bit of the file into the first part
526	 * of the oversized map segment.  This is normally because the .data
527	 * segment in the file is extended to provide bss.  It's a neat idea
528	 * to try and save a page, but it's a pain in the behind to implement.
529	 */
530	copy_len = (offset + filsz) - trunc_page_ps(offset + filsz, pagesize);
531	map_addr = trunc_page_ps((vm_offset_t)vmaddr + filsz, pagesize);
532	map_len = round_page_ps((vm_offset_t)vmaddr + memsz, pagesize) -
533	    map_addr;
534
535	/* This had damn well better be true! */
536	if (map_len != 0) {
537		rv = __elfN(map_insert)(map, NULL, 0, map_addr, map_addr +
538		    map_len, VM_PROT_ALL, 0);
539		if (rv != KERN_SUCCESS) {
540			return (EINVAL);
541		}
542	}
543
544	if (copy_len != 0) {
545		vm_offset_t off;
546
547		sf = vm_imgact_map_page(object, offset + filsz);
548		if (sf == NULL)
549			return (EIO);
550
551		/* send the page fragment to user space */
552		off = trunc_page_ps(offset + filsz, pagesize) -
553		    trunc_page(offset + filsz);
554		error = copyout((caddr_t)sf_buf_kva(sf) + off,
555		    (caddr_t)map_addr, copy_len);
556		vm_imgact_unmap_page(object, sf);
557		if (error) {
558			return (error);
559		}
560	}
561
562	/*
563	 * set it to the specified protection.
564	 * XXX had better undo the damage from pasting over the cracks here!
565	 */
566	vm_map_protect(map, trunc_page(map_addr), round_page(map_addr +
567	    map_len), prot, FALSE);
568
569	return (0);
570}
571
572/*
573 * Load the file "file" into memory.  It may be either a shared object
574 * or an executable.
575 *
576 * The "addr" reference parameter is in/out.  On entry, it specifies
577 * the address where a shared object should be loaded.  If the file is
578 * an executable, this value is ignored.  On exit, "addr" specifies
579 * where the file was actually loaded.
580 *
581 * The "entry" reference parameter is out only.  On exit, it specifies
582 * the entry point for the loaded file.
583 */
584static int
585__elfN(load_file)(struct proc *p, const char *file, u_long *addr,
586	u_long *entry, size_t pagesize)
587{
588	struct {
589		struct nameidata nd;
590		struct vattr attr;
591		struct image_params image_params;
592	} *tempdata;
593	const Elf_Ehdr *hdr = NULL;
594	const Elf_Phdr *phdr = NULL;
595	struct nameidata *nd;
596	struct vattr *attr;
597	struct image_params *imgp;
598	vm_prot_t prot;
599	u_long rbase;
600	u_long base_addr = 0;
601	int error, i, numsegs;
602
603#ifdef CAPABILITY_MODE
604	/*
605	 * XXXJA: This check can go away once we are sufficiently confident
606	 * that the checks in namei() are correct.
607	 */
608	if (IN_CAPABILITY_MODE(curthread))
609		return (ECAPMODE);
610#endif
611
612	tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK);
613	nd = &tempdata->nd;
614	attr = &tempdata->attr;
615	imgp = &tempdata->image_params;
616
617	/*
618	 * Initialize part of the common data
619	 */
620	imgp->proc = p;
621	imgp->attr = attr;
622	imgp->firstpage = NULL;
623	imgp->image_header = NULL;
624	imgp->object = NULL;
625	imgp->execlabel = NULL;
626
627	NDINIT(nd, LOOKUP, LOCKLEAF | FOLLOW, UIO_SYSSPACE, file, curthread);
628	if ((error = namei(nd)) != 0) {
629		nd->ni_vp = NULL;
630		goto fail;
631	}
632	NDFREE(nd, NDF_ONLY_PNBUF);
633	imgp->vp = nd->ni_vp;
634
635	/*
636	 * Check permissions, modes, uid, etc on the file, and "open" it.
637	 */
638	error = exec_check_permissions(imgp);
639	if (error)
640		goto fail;
641
642	error = exec_map_first_page(imgp);
643	if (error)
644		goto fail;
645
646	/*
647	 * Also make certain that the interpreter stays the same, so set
648	 * its VV_TEXT flag, too.
649	 */
650	VOP_SET_TEXT(nd->ni_vp);
651
652	imgp->object = nd->ni_vp->v_object;
653
654	hdr = (const Elf_Ehdr *)imgp->image_header;
655	if ((error = __elfN(check_header)(hdr)) != 0)
656		goto fail;
657	if (hdr->e_type == ET_DYN)
658		rbase = *addr;
659	else if (hdr->e_type == ET_EXEC)
660		rbase = 0;
661	else {
662		error = ENOEXEC;
663		goto fail;
664	}
665
666	/* Only support headers that fit within first page for now      */
667	if ((hdr->e_phoff > PAGE_SIZE) ||
668	    (u_int)hdr->e_phentsize * hdr->e_phnum > PAGE_SIZE - hdr->e_phoff) {
669		error = ENOEXEC;
670		goto fail;
671	}
672
673	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
674	if (!aligned(phdr, Elf_Addr)) {
675		error = ENOEXEC;
676		goto fail;
677	}
678
679	for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) {
680		if (phdr[i].p_type == PT_LOAD && phdr[i].p_memsz != 0) {
681			/* Loadable segment */
682			prot = __elfN(trans_prot)(phdr[i].p_flags);
683			error = __elfN(load_section)(imgp, phdr[i].p_offset,
684			    (caddr_t)(uintptr_t)phdr[i].p_vaddr + rbase,
685			    phdr[i].p_memsz, phdr[i].p_filesz, prot, pagesize);
686			if (error != 0)
687				goto fail;
688			/*
689			 * Establish the base address if this is the
690			 * first segment.
691			 */
692			if (numsegs == 0)
693  				base_addr = trunc_page(phdr[i].p_vaddr +
694				    rbase);
695			numsegs++;
696		}
697	}
698	*addr = base_addr;
699	*entry = (unsigned long)hdr->e_entry + rbase;
700
701fail:
702	if (imgp->firstpage)
703		exec_unmap_first_page(imgp);
704
705	if (nd->ni_vp)
706		vput(nd->ni_vp);
707
708	free(tempdata, M_TEMP);
709
710	return (error);
711}
712
713static int
714__CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
715{
716	const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
717	const Elf_Phdr *phdr;
718	Elf_Auxargs *elf_auxargs;
719	struct vmspace *vmspace;
720	vm_prot_t prot;
721	u_long text_size = 0, data_size = 0, total_size = 0;
722	u_long text_addr = 0, data_addr = 0;
723	u_long seg_size, seg_addr;
724	u_long addr, baddr, et_dyn_addr, entry = 0, proghdr = 0;
725	int32_t osrel = 0;
726	int error = 0, i, n, interp_name_len = 0;
727	const char *interp = NULL, *newinterp = NULL;
728	Elf_Brandinfo *brand_info;
729	char *path;
730	struct sysentvec *sv;
731
732	/*
733	 * Do we have a valid ELF header ?
734	 *
735	 * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later
736	 * if particular brand doesn't support it.
737	 */
738	if (__elfN(check_header)(hdr) != 0 ||
739	    (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN))
740		return (-1);
741
742	/*
743	 * From here on down, we return an errno, not -1, as we've
744	 * detected an ELF file.
745	 */
746
747	if ((hdr->e_phoff > PAGE_SIZE) ||
748	    (u_int)hdr->e_phentsize * hdr->e_phnum > PAGE_SIZE - hdr->e_phoff) {
749		/* Only support headers in first page for now */
750		return (ENOEXEC);
751	}
752	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
753	if (!aligned(phdr, Elf_Addr))
754		return (ENOEXEC);
755	n = 0;
756	baddr = 0;
757	for (i = 0; i < hdr->e_phnum; i++) {
758		switch (phdr[i].p_type) {
759		case PT_LOAD:
760			if (n == 0)
761				baddr = phdr[i].p_vaddr;
762			n++;
763			break;
764		case PT_INTERP:
765			/* Path to interpreter */
766			if (phdr[i].p_filesz > MAXPATHLEN ||
767			    phdr[i].p_offset > PAGE_SIZE ||
768			    phdr[i].p_filesz > PAGE_SIZE - phdr[i].p_offset)
769				return (ENOEXEC);
770			interp = imgp->image_header + phdr[i].p_offset;
771			interp_name_len = phdr[i].p_filesz;
772			break;
773		case PT_GNU_STACK:
774			if (__elfN(nxstack))
775				imgp->stack_prot =
776				    __elfN(trans_prot)(phdr[i].p_flags);
777			break;
778		}
779	}
780
781	brand_info = __elfN(get_brandinfo)(imgp, interp, interp_name_len,
782	    &osrel);
783	if (brand_info == NULL) {
784		uprintf("ELF binary type \"%u\" not known.\n",
785		    hdr->e_ident[EI_OSABI]);
786		return (ENOEXEC);
787	}
788	if (hdr->e_type == ET_DYN) {
789		if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0)
790			return (ENOEXEC);
791		/*
792		 * Honour the base load address from the dso if it is
793		 * non-zero for some reason.
794		 */
795		if (baddr == 0)
796			et_dyn_addr = ET_DYN_LOAD_ADDR;
797		else
798			et_dyn_addr = 0;
799	} else
800		et_dyn_addr = 0;
801	sv = brand_info->sysvec;
802	if (interp != NULL && brand_info->interp_newpath != NULL)
803		newinterp = brand_info->interp_newpath;
804
805	/*
806	 * Avoid a possible deadlock if the current address space is destroyed
807	 * and that address space maps the locked vnode.  In the common case,
808	 * the locked vnode's v_usecount is decremented but remains greater
809	 * than zero.  Consequently, the vnode lock is not needed by vrele().
810	 * However, in cases where the vnode lock is external, such as nullfs,
811	 * v_usecount may become zero.
812	 *
813	 * The VV_TEXT flag prevents modifications to the executable while
814	 * the vnode is unlocked.
815	 */
816	VOP_UNLOCK(imgp->vp, 0);
817
818	error = exec_new_vmspace(imgp, sv);
819	imgp->proc->p_sysent = sv;
820
821	vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
822	if (error)
823		return (error);
824
825	for (i = 0; i < hdr->e_phnum; i++) {
826		switch (phdr[i].p_type) {
827		case PT_LOAD:	/* Loadable segment */
828			if (phdr[i].p_memsz == 0)
829				break;
830			prot = __elfN(trans_prot)(phdr[i].p_flags);
831			error = __elfN(load_section)(imgp, phdr[i].p_offset,
832			    (caddr_t)(uintptr_t)phdr[i].p_vaddr + et_dyn_addr,
833			    phdr[i].p_memsz, phdr[i].p_filesz, prot,
834			    sv->sv_pagesize);
835			if (error != 0)
836				return (error);
837
838			/*
839			 * If this segment contains the program headers,
840			 * remember their virtual address for the AT_PHDR
841			 * aux entry. Static binaries don't usually include
842			 * a PT_PHDR entry.
843			 */
844			if (phdr[i].p_offset == 0 &&
845			    hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize
846				<= phdr[i].p_filesz)
847				proghdr = phdr[i].p_vaddr + hdr->e_phoff +
848				    et_dyn_addr;
849
850			seg_addr = trunc_page(phdr[i].p_vaddr + et_dyn_addr);
851			seg_size = round_page(phdr[i].p_memsz +
852			    phdr[i].p_vaddr + et_dyn_addr - seg_addr);
853
854			/*
855			 * Make the largest executable segment the official
856			 * text segment and all others data.
857			 *
858			 * Note that obreak() assumes that data_addr +
859			 * data_size == end of data load area, and the ELF
860			 * file format expects segments to be sorted by
861			 * address.  If multiple data segments exist, the
862			 * last one will be used.
863			 */
864
865			if (phdr[i].p_flags & PF_X && text_size < seg_size) {
866				text_size = seg_size;
867				text_addr = seg_addr;
868			} else {
869				data_size = seg_size;
870				data_addr = seg_addr;
871			}
872			total_size += seg_size;
873			break;
874		case PT_PHDR: 	/* Program header table info */
875			proghdr = phdr[i].p_vaddr + et_dyn_addr;
876			break;
877		default:
878			break;
879		}
880	}
881
882	if (data_addr == 0 && data_size == 0) {
883		data_addr = text_addr;
884		data_size = text_size;
885	}
886
887	entry = (u_long)hdr->e_entry + et_dyn_addr;
888
889	/*
890	 * Check limits.  It should be safe to check the
891	 * limits after loading the segments since we do
892	 * not actually fault in all the segments pages.
893	 */
894	PROC_LOCK(imgp->proc);
895	if (data_size > lim_cur(imgp->proc, RLIMIT_DATA) ||
896	    text_size > maxtsiz ||
897	    total_size > lim_cur(imgp->proc, RLIMIT_VMEM) ||
898	    racct_set(imgp->proc, RACCT_DATA, data_size) != 0 ||
899	    racct_set(imgp->proc, RACCT_VMEM, total_size) != 0) {
900		PROC_UNLOCK(imgp->proc);
901		return (ENOMEM);
902	}
903
904	vmspace = imgp->proc->p_vmspace;
905	vmspace->vm_tsize = text_size >> PAGE_SHIFT;
906	vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
907	vmspace->vm_dsize = data_size >> PAGE_SHIFT;
908	vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr;
909
910	/*
911	 * We load the dynamic linker where a userland call
912	 * to mmap(0, ...) would put it.  The rationale behind this
913	 * calculation is that it leaves room for the heap to grow to
914	 * its maximum allowed size.
915	 */
916	addr = round_page((vm_offset_t)vmspace->vm_daddr + lim_max(imgp->proc,
917	    RLIMIT_DATA));
918	PROC_UNLOCK(imgp->proc);
919
920	imgp->entry_addr = entry;
921
922	if (interp != NULL) {
923		int have_interp = FALSE;
924		VOP_UNLOCK(imgp->vp, 0);
925		if (brand_info->emul_path != NULL &&
926		    brand_info->emul_path[0] != '\0') {
927			path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
928			snprintf(path, MAXPATHLEN, "%s%s",
929			    brand_info->emul_path, interp);
930			error = __elfN(load_file)(imgp->proc, path, &addr,
931			    &imgp->entry_addr, sv->sv_pagesize);
932			free(path, M_TEMP);
933			if (error == 0)
934				have_interp = TRUE;
935		}
936		if (!have_interp && newinterp != NULL) {
937			error = __elfN(load_file)(imgp->proc, newinterp, &addr,
938			    &imgp->entry_addr, sv->sv_pagesize);
939			if (error == 0)
940				have_interp = TRUE;
941		}
942		if (!have_interp) {
943			error = __elfN(load_file)(imgp->proc, interp, &addr,
944			    &imgp->entry_addr, sv->sv_pagesize);
945		}
946		vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
947		if (error != 0) {
948			uprintf("ELF interpreter %s not found\n", interp);
949			return (error);
950		}
951	} else
952		addr = et_dyn_addr;
953
954	/*
955	 * Construct auxargs table (used by the fixup routine)
956	 */
957	elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK);
958	elf_auxargs->execfd = -1;
959	elf_auxargs->phdr = proghdr;
960	elf_auxargs->phent = hdr->e_phentsize;
961	elf_auxargs->phnum = hdr->e_phnum;
962	elf_auxargs->pagesz = PAGE_SIZE;
963	elf_auxargs->base = addr;
964	elf_auxargs->flags = 0;
965	elf_auxargs->entry = entry;
966
967	imgp->auxargs = elf_auxargs;
968	imgp->interpreted = 0;
969	imgp->reloc_base = addr;
970	imgp->proc->p_osrel = osrel;
971
972	return (error);
973}
974
975#define	suword __CONCAT(suword, __ELF_WORD_SIZE)
976
977int
978__elfN(freebsd_fixup)(register_t **stack_base, struct image_params *imgp)
979{
980	Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs;
981	Elf_Addr *base;
982	Elf_Addr *pos;
983
984	base = (Elf_Addr *)*stack_base;
985	pos = base + (imgp->args->argc + imgp->args->envc + 2);
986
987	if (args->execfd != -1)
988		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
989	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
990	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
991	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
992	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
993	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
994	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
995	AUXARGS_ENTRY(pos, AT_BASE, args->base);
996	if (imgp->execpathp != 0)
997		AUXARGS_ENTRY(pos, AT_EXECPATH, imgp->execpathp);
998	AUXARGS_ENTRY(pos, AT_OSRELDATE, osreldate);
999	if (imgp->canary != 0) {
1000		AUXARGS_ENTRY(pos, AT_CANARY, imgp->canary);
1001		AUXARGS_ENTRY(pos, AT_CANARYLEN, imgp->canarylen);
1002	}
1003	AUXARGS_ENTRY(pos, AT_NCPUS, mp_ncpus);
1004	if (imgp->pagesizes != 0) {
1005		AUXARGS_ENTRY(pos, AT_PAGESIZES, imgp->pagesizes);
1006		AUXARGS_ENTRY(pos, AT_PAGESIZESLEN, imgp->pagesizeslen);
1007	}
1008	if (imgp->sysent->sv_timekeep_base != 0) {
1009		AUXARGS_ENTRY(pos, AT_TIMEKEEP,
1010		    imgp->sysent->sv_timekeep_base);
1011	}
1012	AUXARGS_ENTRY(pos, AT_STACKPROT, imgp->sysent->sv_shared_page_obj
1013	    != NULL && imgp->stack_prot != 0 ? imgp->stack_prot :
1014	    imgp->sysent->sv_stackprot);
1015	AUXARGS_ENTRY(pos, AT_NULL, 0);
1016
1017	free(imgp->auxargs, M_TEMP);
1018	imgp->auxargs = NULL;
1019
1020	base--;
1021	suword(base, (long)imgp->args->argc);
1022	*stack_base = (register_t *)base;
1023	return (0);
1024}
1025
1026/*
1027 * Code for generating ELF core dumps.
1028 */
1029
1030typedef void (*segment_callback)(vm_map_entry_t, void *);
1031
1032/* Closure for cb_put_phdr(). */
1033struct phdr_closure {
1034	Elf_Phdr *phdr;		/* Program header to fill in */
1035	Elf_Off offset;		/* Offset of segment in core file */
1036};
1037
1038/* Closure for cb_size_segment(). */
1039struct sseg_closure {
1040	int count;		/* Count of writable segments. */
1041	size_t size;		/* Total size of all writable segments. */
1042};
1043
1044typedef void (*outfunc_t)(void *, struct sbuf *, size_t *);
1045
1046struct note_info {
1047	int		type;		/* Note type. */
1048	outfunc_t 	outfunc; 	/* Output function. */
1049	void		*outarg;	/* Argument for the output function. */
1050	size_t		outsize;	/* Output size. */
1051	TAILQ_ENTRY(note_info) link;	/* Link to the next note info. */
1052};
1053
1054TAILQ_HEAD(note_info_list, note_info);
1055
1056static void cb_put_phdr(vm_map_entry_t, void *);
1057static void cb_size_segment(vm_map_entry_t, void *);
1058static void each_writable_segment(struct thread *, segment_callback, void *);
1059static int __elfN(corehdr)(struct thread *, struct vnode *, struct ucred *,
1060    int, void *, size_t, struct note_info_list *, size_t, gzFile);
1061static void __elfN(prepare_notes)(struct thread *, struct note_info_list *,
1062    size_t *);
1063static void __elfN(puthdr)(struct thread *, void *, size_t, int, size_t);
1064static void __elfN(putnote)(struct note_info *, struct sbuf *);
1065static size_t register_note(struct note_info_list *, int, outfunc_t, void *);
1066static int sbuf_drain_core_output(void *, const char *, int);
1067static int sbuf_drain_count(void *arg, const char *data, int len);
1068
1069static void __elfN(note_fpregset)(void *, struct sbuf *, size_t *);
1070static void __elfN(note_prpsinfo)(void *, struct sbuf *, size_t *);
1071static void __elfN(note_prstatus)(void *, struct sbuf *, size_t *);
1072static void __elfN(note_threadmd)(void *, struct sbuf *, size_t *);
1073static void __elfN(note_thrmisc)(void *, struct sbuf *, size_t *);
1074static void __elfN(note_procstat_auxv)(void *, struct sbuf *, size_t *);
1075static void __elfN(note_procstat_proc)(void *, struct sbuf *, size_t *);
1076static void __elfN(note_procstat_psstrings)(void *, struct sbuf *, size_t *);
1077static void note_procstat_files(void *, struct sbuf *, size_t *);
1078static void note_procstat_groups(void *, struct sbuf *, size_t *);
1079static void note_procstat_osrel(void *, struct sbuf *, size_t *);
1080static void note_procstat_rlimit(void *, struct sbuf *, size_t *);
1081static void note_procstat_umask(void *, struct sbuf *, size_t *);
1082static void note_procstat_vmmap(void *, struct sbuf *, size_t *);
1083
1084#ifdef COMPRESS_USER_CORES
1085extern int compress_user_cores;
1086extern int compress_user_cores_gzlevel;
1087#endif
1088
1089static int
1090core_output(struct vnode *vp, void *base, size_t len, off_t offset,
1091    struct ucred *active_cred, struct ucred *file_cred,
1092    struct thread *td, char *core_buf, gzFile gzfile) {
1093
1094	int error;
1095	if (gzfile) {
1096#ifdef COMPRESS_USER_CORES
1097		error = compress_core(gzfile, base, core_buf, len, td);
1098#else
1099		panic("shouldn't be here");
1100#endif
1101	} else {
1102		error = vn_rdwr_inchunks(UIO_WRITE, vp, base, len, offset,
1103		    UIO_USERSPACE, IO_UNIT | IO_DIRECT, active_cred, file_cred,
1104		    NULL, td);
1105	}
1106	return (error);
1107}
1108
1109/* Coredump output parameters for sbuf drain routine. */
1110struct sbuf_drain_core_params {
1111	off_t		offset;
1112	struct ucred	*active_cred;
1113	struct ucred	*file_cred;
1114	struct thread	*td;
1115	struct vnode	*vp;
1116#ifdef COMPRESS_USER_CORES
1117	gzFile		gzfile;
1118#endif
1119};
1120
1121/*
1122 * Drain into a core file.
1123 */
1124static int
1125sbuf_drain_core_output(void *arg, const char *data, int len)
1126{
1127	struct sbuf_drain_core_params *p;
1128	int error, locked;
1129
1130	p = (struct sbuf_drain_core_params *)arg;
1131
1132	/*
1133	 * Some kern_proc out routines that print to this sbuf may
1134	 * call us with the process lock held. Draining with the
1135	 * non-sleepable lock held is unsafe. The lock is needed for
1136	 * those routines when dumping a live process. In our case we
1137	 * can safely release the lock before draining and acquire
1138	 * again after.
1139	 */
1140	locked = PROC_LOCKED(p->td->td_proc);
1141	if (locked)
1142		PROC_UNLOCK(p->td->td_proc);
1143#ifdef COMPRESS_USER_CORES
1144	if (p->gzfile != Z_NULL)
1145		error = compress_core(p->gzfile, NULL, __DECONST(char *, data),
1146		    len, p->td);
1147	else
1148#endif
1149		error = vn_rdwr_inchunks(UIO_WRITE, p->vp,
1150		    __DECONST(void *, data), len, p->offset, UIO_SYSSPACE,
1151		    IO_UNIT | IO_DIRECT, p->active_cred, p->file_cred, NULL,
1152		    p->td);
1153	if (locked)
1154		PROC_LOCK(p->td->td_proc);
1155	if (error != 0)
1156		return (-error);
1157	p->offset += len;
1158	return (len);
1159}
1160
1161/*
1162 * Drain into a counter.
1163 */
1164static int
1165sbuf_drain_count(void *arg, const char *data __unused, int len)
1166{
1167	size_t *sizep;
1168
1169	sizep = (size_t *)arg;
1170	*sizep += len;
1171	return (len);
1172}
1173
1174int
1175__elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags)
1176{
1177	struct ucred *cred = td->td_ucred;
1178	int error = 0;
1179	struct sseg_closure seginfo;
1180	struct note_info_list notelst;
1181	struct note_info *ninfo;
1182	void *hdr;
1183	size_t hdrsize, notesz, coresize;
1184
1185	gzFile gzfile = Z_NULL;
1186	char *core_buf = NULL;
1187#ifdef COMPRESS_USER_CORES
1188	char gzopen_flags[8];
1189	char *p;
1190	int doing_compress = flags & IMGACT_CORE_COMPRESS;
1191#endif
1192
1193	hdr = NULL;
1194	TAILQ_INIT(&notelst);
1195
1196#ifdef COMPRESS_USER_CORES
1197        if (doing_compress) {
1198                p = gzopen_flags;
1199                *p++ = 'w';
1200                if (compress_user_cores_gzlevel >= 0 &&
1201                    compress_user_cores_gzlevel <= 9)
1202                        *p++ = '0' + compress_user_cores_gzlevel;
1203                *p = 0;
1204                gzfile = gz_open("", gzopen_flags, vp);
1205                if (gzfile == Z_NULL) {
1206                        error = EFAULT;
1207                        goto done;
1208                }
1209                core_buf = malloc(CORE_BUF_SIZE, M_TEMP, M_WAITOK | M_ZERO);
1210                if (!core_buf) {
1211                        error = ENOMEM;
1212                        goto done;
1213                }
1214        }
1215#endif
1216
1217	/* Size the program segments. */
1218	seginfo.count = 0;
1219	seginfo.size = 0;
1220	each_writable_segment(td, cb_size_segment, &seginfo);
1221
1222	/*
1223	 * Collect info about the core file header area.
1224	 */
1225	hdrsize = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * (1 + seginfo.count);
1226	__elfN(prepare_notes)(td, &notelst, &notesz);
1227	coresize = round_page(hdrsize + notesz) + seginfo.size;
1228
1229#ifdef RACCT
1230	PROC_LOCK(td->td_proc);
1231	error = racct_add(td->td_proc, RACCT_CORE, coresize);
1232	PROC_UNLOCK(td->td_proc);
1233	if (error != 0) {
1234		error = EFAULT;
1235		goto done;
1236	}
1237#endif
1238	if (coresize >= limit) {
1239		error = EFAULT;
1240		goto done;
1241	}
1242
1243	/*
1244	 * Allocate memory for building the header, fill it up,
1245	 * and write it out following the notes.
1246	 */
1247	hdr = malloc(hdrsize, M_TEMP, M_WAITOK);
1248	if (hdr == NULL) {
1249		error = EINVAL;
1250		goto done;
1251	}
1252	error = __elfN(corehdr)(td, vp, cred, seginfo.count, hdr, hdrsize,
1253	    &notelst, notesz, gzfile);
1254
1255	/* Write the contents of all of the writable segments. */
1256	if (error == 0) {
1257		Elf_Phdr *php;
1258		off_t offset;
1259		int i;
1260
1261		php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
1262		offset = round_page(hdrsize + notesz);
1263		for (i = 0; i < seginfo.count; i++) {
1264			error = core_output(vp, (caddr_t)(uintptr_t)php->p_vaddr,
1265			    php->p_filesz, offset, cred, NOCRED, curthread, core_buf, gzfile);
1266			if (error != 0)
1267				break;
1268			offset += php->p_filesz;
1269			php++;
1270		}
1271	}
1272	if (error) {
1273		log(LOG_WARNING,
1274		    "Failed to write core file for process %s (error %d)\n",
1275		    curproc->p_comm, error);
1276	}
1277
1278done:
1279#ifdef COMPRESS_USER_CORES
1280	if (core_buf)
1281		free(core_buf, M_TEMP);
1282	if (gzfile)
1283		gzclose(gzfile);
1284#endif
1285	while ((ninfo = TAILQ_FIRST(&notelst)) != NULL) {
1286		TAILQ_REMOVE(&notelst, ninfo, link);
1287		free(ninfo, M_TEMP);
1288	}
1289	if (hdr != NULL)
1290		free(hdr, M_TEMP);
1291
1292	return (error);
1293}
1294
1295/*
1296 * A callback for each_writable_segment() to write out the segment's
1297 * program header entry.
1298 */
1299static void
1300cb_put_phdr(entry, closure)
1301	vm_map_entry_t entry;
1302	void *closure;
1303{
1304	struct phdr_closure *phc = (struct phdr_closure *)closure;
1305	Elf_Phdr *phdr = phc->phdr;
1306
1307	phc->offset = round_page(phc->offset);
1308
1309	phdr->p_type = PT_LOAD;
1310	phdr->p_offset = phc->offset;
1311	phdr->p_vaddr = entry->start;
1312	phdr->p_paddr = 0;
1313	phdr->p_filesz = phdr->p_memsz = entry->end - entry->start;
1314	phdr->p_align = PAGE_SIZE;
1315	phdr->p_flags = __elfN(untrans_prot)(entry->protection);
1316
1317	phc->offset += phdr->p_filesz;
1318	phc->phdr++;
1319}
1320
1321/*
1322 * A callback for each_writable_segment() to gather information about
1323 * the number of segments and their total size.
1324 */
1325static void
1326cb_size_segment(entry, closure)
1327	vm_map_entry_t entry;
1328	void *closure;
1329{
1330	struct sseg_closure *ssc = (struct sseg_closure *)closure;
1331
1332	ssc->count++;
1333	ssc->size += entry->end - entry->start;
1334}
1335
1336/*
1337 * For each writable segment in the process's memory map, call the given
1338 * function with a pointer to the map entry and some arbitrary
1339 * caller-supplied data.
1340 */
1341static void
1342each_writable_segment(td, func, closure)
1343	struct thread *td;
1344	segment_callback func;
1345	void *closure;
1346{
1347	struct proc *p = td->td_proc;
1348	vm_map_t map = &p->p_vmspace->vm_map;
1349	vm_map_entry_t entry;
1350	vm_object_t backing_object, object;
1351	boolean_t ignore_entry;
1352
1353	vm_map_lock_read(map);
1354	for (entry = map->header.next; entry != &map->header;
1355	    entry = entry->next) {
1356		/*
1357		 * Don't dump inaccessible mappings, deal with legacy
1358		 * coredump mode.
1359		 *
1360		 * Note that read-only segments related to the elf binary
1361		 * are marked MAP_ENTRY_NOCOREDUMP now so we no longer
1362		 * need to arbitrarily ignore such segments.
1363		 */
1364		if (elf_legacy_coredump) {
1365			if ((entry->protection & VM_PROT_RW) != VM_PROT_RW)
1366				continue;
1367		} else {
1368			if ((entry->protection & VM_PROT_ALL) == 0)
1369				continue;
1370		}
1371
1372		/*
1373		 * Dont include memory segment in the coredump if
1374		 * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in
1375		 * madvise(2).  Do not dump submaps (i.e. parts of the
1376		 * kernel map).
1377		 */
1378		if (entry->eflags & (MAP_ENTRY_NOCOREDUMP|MAP_ENTRY_IS_SUB_MAP))
1379			continue;
1380
1381		if ((object = entry->object.vm_object) == NULL)
1382			continue;
1383
1384		/* Ignore memory-mapped devices and such things. */
1385		VM_OBJECT_RLOCK(object);
1386		while ((backing_object = object->backing_object) != NULL) {
1387			VM_OBJECT_RLOCK(backing_object);
1388			VM_OBJECT_RUNLOCK(object);
1389			object = backing_object;
1390		}
1391		ignore_entry = object->type != OBJT_DEFAULT &&
1392		    object->type != OBJT_SWAP && object->type != OBJT_VNODE;
1393		VM_OBJECT_RUNLOCK(object);
1394		if (ignore_entry)
1395			continue;
1396
1397		(*func)(entry, closure);
1398	}
1399	vm_map_unlock_read(map);
1400}
1401
1402/*
1403 * Write the core file header to the file, including padding up to
1404 * the page boundary.
1405 */
1406static int
1407__elfN(corehdr)(struct thread *td, struct vnode *vp, struct ucred *cred,
1408    int numsegs, void *hdr, size_t hdrsize, struct note_info_list *notelst,
1409    size_t notesz, gzFile gzfile)
1410{
1411	struct sbuf_drain_core_params params;
1412	struct note_info *ninfo;
1413	struct sbuf *sb;
1414	int error;
1415
1416	/* Fill in the header. */
1417	bzero(hdr, hdrsize);
1418	__elfN(puthdr)(td, hdr, hdrsize, numsegs, notesz);
1419
1420	params.offset = 0;
1421	params.active_cred = cred;
1422	params.file_cred = NOCRED;
1423	params.td = td;
1424	params.vp = vp;
1425#ifdef COMPRESS_USER_CORES
1426	params.gzfile = gzfile;
1427#endif
1428	sb = sbuf_new(NULL, NULL, CORE_BUF_SIZE, SBUF_FIXEDLEN);
1429	sbuf_set_drain(sb, sbuf_drain_core_output, &params);
1430	sbuf_start_section(sb, NULL);
1431	sbuf_bcat(sb, hdr, hdrsize);
1432	TAILQ_FOREACH(ninfo, notelst, link)
1433	    __elfN(putnote)(ninfo, sb);
1434	/* Align up to a page boundary for the program segments. */
1435	sbuf_end_section(sb, -1, PAGE_SIZE, 0);
1436	error = sbuf_finish(sb);
1437	sbuf_delete(sb);
1438
1439	return (error);
1440}
1441
1442static void
1443__elfN(prepare_notes)(struct thread *td, struct note_info_list *list,
1444    size_t *sizep)
1445{
1446	struct proc *p;
1447	struct thread *thr;
1448	size_t size;
1449
1450	p = td->td_proc;
1451	size = 0;
1452
1453	size += register_note(list, NT_PRPSINFO, __elfN(note_prpsinfo), p);
1454
1455	/*
1456	 * To have the debugger select the right thread (LWP) as the initial
1457	 * thread, we dump the state of the thread passed to us in td first.
1458	 * This is the thread that causes the core dump and thus likely to
1459	 * be the right thread one wants to have selected in the debugger.
1460	 */
1461	thr = td;
1462	while (thr != NULL) {
1463		size += register_note(list, NT_PRSTATUS,
1464		    __elfN(note_prstatus), thr);
1465		size += register_note(list, NT_FPREGSET,
1466		    __elfN(note_fpregset), thr);
1467		size += register_note(list, NT_THRMISC,
1468		    __elfN(note_thrmisc), thr);
1469		size += register_note(list, -1,
1470		    __elfN(note_threadmd), thr);
1471
1472		thr = (thr == td) ? TAILQ_FIRST(&p->p_threads) :
1473		    TAILQ_NEXT(thr, td_plist);
1474		if (thr == td)
1475			thr = TAILQ_NEXT(thr, td_plist);
1476	}
1477
1478	size += register_note(list, NT_PROCSTAT_PROC,
1479	    __elfN(note_procstat_proc), p);
1480	size += register_note(list, NT_PROCSTAT_FILES,
1481	    note_procstat_files, p);
1482	size += register_note(list, NT_PROCSTAT_VMMAP,
1483	    note_procstat_vmmap, p);
1484	size += register_note(list, NT_PROCSTAT_GROUPS,
1485	    note_procstat_groups, p);
1486	size += register_note(list, NT_PROCSTAT_UMASK,
1487	    note_procstat_umask, p);
1488	size += register_note(list, NT_PROCSTAT_RLIMIT,
1489	    note_procstat_rlimit, p);
1490	size += register_note(list, NT_PROCSTAT_OSREL,
1491	    note_procstat_osrel, p);
1492	size += register_note(list, NT_PROCSTAT_PSSTRINGS,
1493	    __elfN(note_procstat_psstrings), p);
1494	size += register_note(list, NT_PROCSTAT_AUXV,
1495	    __elfN(note_procstat_auxv), p);
1496
1497	*sizep = size;
1498}
1499
1500static void
1501__elfN(puthdr)(struct thread *td, void *hdr, size_t hdrsize, int numsegs,
1502    size_t notesz)
1503{
1504	Elf_Ehdr *ehdr;
1505	Elf_Phdr *phdr;
1506	struct phdr_closure phc;
1507
1508	ehdr = (Elf_Ehdr *)hdr;
1509	phdr = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr));
1510
1511	ehdr->e_ident[EI_MAG0] = ELFMAG0;
1512	ehdr->e_ident[EI_MAG1] = ELFMAG1;
1513	ehdr->e_ident[EI_MAG2] = ELFMAG2;
1514	ehdr->e_ident[EI_MAG3] = ELFMAG3;
1515	ehdr->e_ident[EI_CLASS] = ELF_CLASS;
1516	ehdr->e_ident[EI_DATA] = ELF_DATA;
1517	ehdr->e_ident[EI_VERSION] = EV_CURRENT;
1518	ehdr->e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
1519	ehdr->e_ident[EI_ABIVERSION] = 0;
1520	ehdr->e_ident[EI_PAD] = 0;
1521	ehdr->e_type = ET_CORE;
1522#if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
1523	ehdr->e_machine = ELF_ARCH32;
1524#else
1525	ehdr->e_machine = ELF_ARCH;
1526#endif
1527	ehdr->e_version = EV_CURRENT;
1528	ehdr->e_entry = 0;
1529	ehdr->e_phoff = sizeof(Elf_Ehdr);
1530	ehdr->e_flags = 0;
1531	ehdr->e_ehsize = sizeof(Elf_Ehdr);
1532	ehdr->e_phentsize = sizeof(Elf_Phdr);
1533	ehdr->e_phnum = numsegs + 1;
1534	ehdr->e_shentsize = sizeof(Elf_Shdr);
1535	ehdr->e_shnum = 0;
1536	ehdr->e_shstrndx = SHN_UNDEF;
1537
1538	/*
1539	 * Fill in the program header entries.
1540	 */
1541
1542	/* The note segement. */
1543	phdr->p_type = PT_NOTE;
1544	phdr->p_offset = hdrsize;
1545	phdr->p_vaddr = 0;
1546	phdr->p_paddr = 0;
1547	phdr->p_filesz = notesz;
1548	phdr->p_memsz = 0;
1549	phdr->p_flags = PF_R;
1550	phdr->p_align = ELF_NOTE_ROUNDSIZE;
1551	phdr++;
1552
1553	/* All the writable segments from the program. */
1554	phc.phdr = phdr;
1555	phc.offset = round_page(hdrsize + notesz);
1556	each_writable_segment(td, cb_put_phdr, &phc);
1557}
1558
1559static size_t
1560register_note(struct note_info_list *list, int type, outfunc_t out, void *arg)
1561{
1562	struct note_info *ninfo;
1563	size_t size, notesize;
1564
1565	size = 0;
1566	out(arg, NULL, &size);
1567	ninfo = malloc(sizeof(*ninfo), M_TEMP, M_ZERO | M_WAITOK);
1568	ninfo->type = type;
1569	ninfo->outfunc = out;
1570	ninfo->outarg = arg;
1571	ninfo->outsize = size;
1572	TAILQ_INSERT_TAIL(list, ninfo, link);
1573
1574	if (type == -1)
1575		return (size);
1576
1577	notesize = sizeof(Elf_Note) +		/* note header */
1578	    roundup2(8, ELF_NOTE_ROUNDSIZE) +	/* note name ("FreeBSD") */
1579	    roundup2(size, ELF_NOTE_ROUNDSIZE);	/* note description */
1580
1581	return (notesize);
1582}
1583
1584static void
1585__elfN(putnote)(struct note_info *ninfo, struct sbuf *sb)
1586{
1587	Elf_Note note;
1588	ssize_t old_len;
1589
1590	if (ninfo->type == -1) {
1591		ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize);
1592		return;
1593	}
1594
1595	note.n_namesz = 8; /* strlen("FreeBSD") + 1 */
1596	note.n_descsz = ninfo->outsize;
1597	note.n_type = ninfo->type;
1598
1599	sbuf_bcat(sb, &note, sizeof(note));
1600	sbuf_start_section(sb, &old_len);
1601	sbuf_bcat(sb, "FreeBSD", note.n_namesz);
1602	sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0);
1603	if (note.n_descsz == 0)
1604		return;
1605	sbuf_start_section(sb, &old_len);
1606	ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize);
1607	sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0);
1608}
1609
1610/*
1611 * Miscellaneous note out functions.
1612 */
1613
1614#if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
1615#include <compat/freebsd32/freebsd32.h>
1616
1617typedef struct prstatus32 elf_prstatus_t;
1618typedef struct prpsinfo32 elf_prpsinfo_t;
1619typedef struct fpreg32 elf_prfpregset_t;
1620typedef struct fpreg32 elf_fpregset_t;
1621typedef struct reg32 elf_gregset_t;
1622typedef struct thrmisc32 elf_thrmisc_t;
1623#define ELF_KERN_PROC_MASK	KERN_PROC_MASK32
1624typedef struct kinfo_proc32 elf_kinfo_proc_t;
1625typedef uint32_t elf_ps_strings_t;
1626#else
1627typedef prstatus_t elf_prstatus_t;
1628typedef prpsinfo_t elf_prpsinfo_t;
1629typedef prfpregset_t elf_prfpregset_t;
1630typedef prfpregset_t elf_fpregset_t;
1631typedef gregset_t elf_gregset_t;
1632typedef thrmisc_t elf_thrmisc_t;
1633#define ELF_KERN_PROC_MASK	0
1634typedef struct kinfo_proc elf_kinfo_proc_t;
1635typedef vm_offset_t elf_ps_strings_t;
1636#endif
1637
1638static void
1639__elfN(note_prpsinfo)(void *arg, struct sbuf *sb, size_t *sizep)
1640{
1641	struct proc *p;
1642	elf_prpsinfo_t *psinfo;
1643
1644	p = (struct proc *)arg;
1645	if (sb != NULL) {
1646		KASSERT(*sizep == sizeof(*psinfo), ("invalid size"));
1647		psinfo = malloc(sizeof(*psinfo), M_TEMP, M_ZERO | M_WAITOK);
1648		psinfo->pr_version = PRPSINFO_VERSION;
1649		psinfo->pr_psinfosz = sizeof(elf_prpsinfo_t);
1650		strlcpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname));
1651		/*
1652		 * XXX - We don't fill in the command line arguments properly
1653		 * yet.
1654		 */
1655		strlcpy(psinfo->pr_psargs, p->p_comm,
1656		    sizeof(psinfo->pr_psargs));
1657
1658		sbuf_bcat(sb, psinfo, sizeof(*psinfo));
1659		free(psinfo, M_TEMP);
1660	}
1661	*sizep = sizeof(*psinfo);
1662}
1663
1664static void
1665__elfN(note_prstatus)(void *arg, struct sbuf *sb, size_t *sizep)
1666{
1667	struct thread *td;
1668	elf_prstatus_t *status;
1669
1670	td = (struct thread *)arg;
1671	if (sb != NULL) {
1672		KASSERT(*sizep == sizeof(*status), ("invalid size"));
1673		status = malloc(sizeof(*status), M_TEMP, M_ZERO | M_WAITOK);
1674		status->pr_version = PRSTATUS_VERSION;
1675		status->pr_statussz = sizeof(elf_prstatus_t);
1676		status->pr_gregsetsz = sizeof(elf_gregset_t);
1677		status->pr_fpregsetsz = sizeof(elf_fpregset_t);
1678		status->pr_osreldate = osreldate;
1679		status->pr_cursig = td->td_proc->p_sig;
1680		status->pr_pid = td->td_tid;
1681#if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
1682		fill_regs32(td, &status->pr_reg);
1683#else
1684		fill_regs(td, &status->pr_reg);
1685#endif
1686		sbuf_bcat(sb, status, sizeof(*status));
1687		free(status, M_TEMP);
1688	}
1689	*sizep = sizeof(*status);
1690}
1691
1692static void
1693__elfN(note_fpregset)(void *arg, struct sbuf *sb, size_t *sizep)
1694{
1695	struct thread *td;
1696	elf_prfpregset_t *fpregset;
1697
1698	td = (struct thread *)arg;
1699	if (sb != NULL) {
1700		KASSERT(*sizep == sizeof(*fpregset), ("invalid size"));
1701		fpregset = malloc(sizeof(*fpregset), M_TEMP, M_ZERO | M_WAITOK);
1702#if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
1703		fill_fpregs32(td, fpregset);
1704#else
1705		fill_fpregs(td, fpregset);
1706#endif
1707		sbuf_bcat(sb, fpregset, sizeof(*fpregset));
1708		free(fpregset, M_TEMP);
1709	}
1710	*sizep = sizeof(*fpregset);
1711}
1712
1713static void
1714__elfN(note_thrmisc)(void *arg, struct sbuf *sb, size_t *sizep)
1715{
1716	struct thread *td;
1717	elf_thrmisc_t thrmisc;
1718
1719	td = (struct thread *)arg;
1720	if (sb != NULL) {
1721		KASSERT(*sizep == sizeof(thrmisc), ("invalid size"));
1722		bzero(&thrmisc._pad, sizeof(thrmisc._pad));
1723		strcpy(thrmisc.pr_tname, td->td_name);
1724		sbuf_bcat(sb, &thrmisc, sizeof(thrmisc));
1725	}
1726	*sizep = sizeof(thrmisc);
1727}
1728
1729/*
1730 * Allow for MD specific notes, as well as any MD
1731 * specific preparations for writing MI notes.
1732 */
1733static void
1734__elfN(note_threadmd)(void *arg, struct sbuf *sb, size_t *sizep)
1735{
1736	struct thread *td;
1737	void *buf;
1738	size_t size;
1739
1740	td = (struct thread *)arg;
1741	size = *sizep;
1742	buf = NULL;
1743	if (size != 0 && sb != NULL)
1744		buf = malloc(size, M_TEMP, M_ZERO | M_WAITOK);
1745	size = 0;
1746	__elfN(dump_thread)(td, buf, &size);
1747	KASSERT(*sizep == size, ("invalid size"));
1748	if (size != 0 && sb != NULL)
1749		sbuf_bcat(sb, buf, size);
1750	*sizep = size;
1751}
1752
1753#ifdef KINFO_PROC_SIZE
1754CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE);
1755#endif
1756
1757static void
1758__elfN(note_procstat_proc)(void *arg, struct sbuf *sb, size_t *sizep)
1759{
1760	struct proc *p;
1761	size_t size;
1762	int structsize;
1763
1764	p = (struct proc *)arg;
1765	size = sizeof(structsize) + p->p_numthreads *
1766	    sizeof(elf_kinfo_proc_t);
1767
1768	if (sb != NULL) {
1769		KASSERT(*sizep == size, ("invalid size"));
1770		structsize = sizeof(elf_kinfo_proc_t);
1771		sbuf_bcat(sb, &structsize, sizeof(structsize));
1772		PROC_LOCK(p);
1773		kern_proc_out(p, sb, ELF_KERN_PROC_MASK);
1774	}
1775	*sizep = size;
1776}
1777
1778#ifdef KINFO_FILE_SIZE
1779CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE);
1780#endif
1781
1782static void
1783note_procstat_files(void *arg, struct sbuf *sb, size_t *sizep)
1784{
1785	struct proc *p;
1786	size_t size;
1787	int structsize;
1788
1789	p = (struct proc *)arg;
1790	if (sb == NULL) {
1791		size = 0;
1792		sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
1793		sbuf_set_drain(sb, sbuf_drain_count, &size);
1794		sbuf_bcat(sb, &structsize, sizeof(structsize));
1795		PROC_LOCK(p);
1796		kern_proc_filedesc_out(p, sb, -1);
1797		sbuf_finish(sb);
1798		sbuf_delete(sb);
1799		*sizep = size;
1800	} else {
1801		structsize = sizeof(struct kinfo_file);
1802		sbuf_bcat(sb, &structsize, sizeof(structsize));
1803		PROC_LOCK(p);
1804		kern_proc_filedesc_out(p, sb, -1);
1805	}
1806}
1807
1808#ifdef KINFO_VMENTRY_SIZE
1809CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE);
1810#endif
1811
1812static void
1813note_procstat_vmmap(void *arg, struct sbuf *sb, size_t *sizep)
1814{
1815	struct proc *p;
1816	size_t size;
1817	int structsize;
1818
1819	p = (struct proc *)arg;
1820	if (sb == NULL) {
1821		size = 0;
1822		sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
1823		sbuf_set_drain(sb, sbuf_drain_count, &size);
1824		sbuf_bcat(sb, &structsize, sizeof(structsize));
1825		PROC_LOCK(p);
1826		kern_proc_vmmap_out(p, sb);
1827		sbuf_finish(sb);
1828		sbuf_delete(sb);
1829		*sizep = size;
1830	} else {
1831		structsize = sizeof(struct kinfo_vmentry);
1832		sbuf_bcat(sb, &structsize, sizeof(structsize));
1833		PROC_LOCK(p);
1834		kern_proc_vmmap_out(p, sb);
1835	}
1836}
1837
1838static void
1839note_procstat_groups(void *arg, struct sbuf *sb, size_t *sizep)
1840{
1841	struct proc *p;
1842	size_t size;
1843	int structsize;
1844
1845	p = (struct proc *)arg;
1846	size = sizeof(structsize) + p->p_ucred->cr_ngroups * sizeof(gid_t);
1847	if (sb != NULL) {
1848		KASSERT(*sizep == size, ("invalid size"));
1849		structsize = sizeof(gid_t);
1850		sbuf_bcat(sb, &structsize, sizeof(structsize));
1851		sbuf_bcat(sb, p->p_ucred->cr_groups, p->p_ucred->cr_ngroups *
1852		    sizeof(gid_t));
1853	}
1854	*sizep = size;
1855}
1856
1857static void
1858note_procstat_umask(void *arg, struct sbuf *sb, size_t *sizep)
1859{
1860	struct proc *p;
1861	size_t size;
1862	int structsize;
1863
1864	p = (struct proc *)arg;
1865	size = sizeof(structsize) + sizeof(p->p_fd->fd_cmask);
1866	if (sb != NULL) {
1867		KASSERT(*sizep == size, ("invalid size"));
1868		structsize = sizeof(p->p_fd->fd_cmask);
1869		sbuf_bcat(sb, &structsize, sizeof(structsize));
1870		sbuf_bcat(sb, &p->p_fd->fd_cmask, sizeof(p->p_fd->fd_cmask));
1871	}
1872	*sizep = size;
1873}
1874
1875static void
1876note_procstat_rlimit(void *arg, struct sbuf *sb, size_t *sizep)
1877{
1878	struct proc *p;
1879	struct rlimit rlim[RLIM_NLIMITS];
1880	size_t size;
1881	int structsize, i;
1882
1883	p = (struct proc *)arg;
1884	size = sizeof(structsize) + sizeof(rlim);
1885	if (sb != NULL) {
1886		KASSERT(*sizep == size, ("invalid size"));
1887		structsize = sizeof(rlim);
1888		sbuf_bcat(sb, &structsize, sizeof(structsize));
1889		PROC_LOCK(p);
1890		for (i = 0; i < RLIM_NLIMITS; i++)
1891			lim_rlimit(p, i, &rlim[i]);
1892		PROC_UNLOCK(p);
1893		sbuf_bcat(sb, rlim, sizeof(rlim));
1894	}
1895	*sizep = size;
1896}
1897
1898static void
1899note_procstat_osrel(void *arg, struct sbuf *sb, size_t *sizep)
1900{
1901	struct proc *p;
1902	size_t size;
1903	int structsize;
1904
1905	p = (struct proc *)arg;
1906	size = sizeof(structsize) + sizeof(p->p_osrel);
1907	if (sb != NULL) {
1908		KASSERT(*sizep == size, ("invalid size"));
1909		structsize = sizeof(p->p_osrel);
1910		sbuf_bcat(sb, &structsize, sizeof(structsize));
1911		sbuf_bcat(sb, &p->p_osrel, sizeof(p->p_osrel));
1912	}
1913	*sizep = size;
1914}
1915
1916static void
1917__elfN(note_procstat_psstrings)(void *arg, struct sbuf *sb, size_t *sizep)
1918{
1919	struct proc *p;
1920	elf_ps_strings_t ps_strings;
1921	size_t size;
1922	int structsize;
1923
1924	p = (struct proc *)arg;
1925	size = sizeof(structsize) + sizeof(ps_strings);
1926	if (sb != NULL) {
1927		KASSERT(*sizep == size, ("invalid size"));
1928		structsize = sizeof(ps_strings);
1929#if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32
1930		ps_strings = PTROUT(p->p_sysent->sv_psstrings);
1931#else
1932		ps_strings = p->p_sysent->sv_psstrings;
1933#endif
1934		sbuf_bcat(sb, &structsize, sizeof(structsize));
1935		sbuf_bcat(sb, &ps_strings, sizeof(ps_strings));
1936	}
1937	*sizep = size;
1938}
1939
1940static void
1941__elfN(note_procstat_auxv)(void *arg, struct sbuf *sb, size_t *sizep)
1942{
1943	struct proc *p;
1944	size_t size;
1945	int structsize;
1946
1947	p = (struct proc *)arg;
1948	if (sb == NULL) {
1949		size = 0;
1950		sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
1951		sbuf_set_drain(sb, sbuf_drain_count, &size);
1952		sbuf_bcat(sb, &structsize, sizeof(structsize));
1953		PHOLD(p);
1954		proc_getauxv(curthread, p, sb);
1955		PRELE(p);
1956		sbuf_finish(sb);
1957		sbuf_delete(sb);
1958		*sizep = size;
1959	} else {
1960		structsize = sizeof(Elf_Auxinfo);
1961		sbuf_bcat(sb, &structsize, sizeof(structsize));
1962		PHOLD(p);
1963		proc_getauxv(curthread, p, sb);
1964		PRELE(p);
1965	}
1966}
1967
1968static boolean_t
1969__elfN(parse_notes)(struct image_params *imgp, Elf_Brandnote *checknote,
1970    int32_t *osrel, const Elf_Phdr *pnote)
1971{
1972	const Elf_Note *note, *note0, *note_end;
1973	const char *note_name;
1974	int i;
1975
1976	if (pnote == NULL || pnote->p_offset > PAGE_SIZE ||
1977	    pnote->p_filesz > PAGE_SIZE - pnote->p_offset)
1978		return (FALSE);
1979
1980	note = note0 = (const Elf_Note *)(imgp->image_header + pnote->p_offset);
1981	note_end = (const Elf_Note *)(imgp->image_header +
1982	    pnote->p_offset + pnote->p_filesz);
1983	for (i = 0; i < 100 && note >= note0 && note < note_end; i++) {
1984		if (!aligned(note, Elf32_Addr) || (const char *)note_end -
1985		    (const char *)note < sizeof(Elf_Note))
1986			return (FALSE);
1987		if (note->n_namesz != checknote->hdr.n_namesz ||
1988		    note->n_descsz != checknote->hdr.n_descsz ||
1989		    note->n_type != checknote->hdr.n_type)
1990			goto nextnote;
1991		note_name = (const char *)(note + 1);
1992		if (note_name + checknote->hdr.n_namesz >=
1993		    (const char *)note_end || strncmp(checknote->vendor,
1994		    note_name, checknote->hdr.n_namesz) != 0)
1995			goto nextnote;
1996
1997		/*
1998		 * Fetch the osreldate for binary
1999		 * from the ELF OSABI-note if necessary.
2000		 */
2001		if ((checknote->flags & BN_TRANSLATE_OSREL) != 0 &&
2002		    checknote->trans_osrel != NULL)
2003			return (checknote->trans_osrel(note, osrel));
2004		return (TRUE);
2005
2006nextnote:
2007		note = (const Elf_Note *)((const char *)(note + 1) +
2008		    roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE) +
2009		    roundup2(note->n_descsz, ELF_NOTE_ROUNDSIZE));
2010	}
2011
2012	return (FALSE);
2013}
2014
2015/*
2016 * Try to find the appropriate ABI-note section for checknote,
2017 * fetch the osreldate for binary from the ELF OSABI-note. Only the
2018 * first page of the image is searched, the same as for headers.
2019 */
2020static boolean_t
2021__elfN(check_note)(struct image_params *imgp, Elf_Brandnote *checknote,
2022    int32_t *osrel)
2023{
2024	const Elf_Phdr *phdr;
2025	const Elf_Ehdr *hdr;
2026	int i;
2027
2028	hdr = (const Elf_Ehdr *)imgp->image_header;
2029	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
2030
2031	for (i = 0; i < hdr->e_phnum; i++) {
2032		if (phdr[i].p_type == PT_NOTE &&
2033		    __elfN(parse_notes)(imgp, checknote, osrel, &phdr[i]))
2034			return (TRUE);
2035	}
2036	return (FALSE);
2037
2038}
2039
2040/*
2041 * Tell kern_execve.c about it, with a little help from the linker.
2042 */
2043static struct execsw __elfN(execsw) = {
2044	__CONCAT(exec_, __elfN(imgact)),
2045	__XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
2046};
2047EXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw));
2048
2049#ifdef COMPRESS_USER_CORES
2050/*
2051 * Compress and write out a core segment for a user process.
2052 *
2053 * 'inbuf' is the starting address of a VM segment in the process' address
2054 * space that is to be compressed and written out to the core file.  'dest_buf'
2055 * is a buffer in the kernel's address space.  The segment is copied from
2056 * 'inbuf' to 'dest_buf' first before being processed by the compression
2057 * routine gzwrite().  This copying is necessary because the content of the VM
2058 * segment may change between the compression pass and the crc-computation pass
2059 * in gzwrite().  This is because realtime threads may preempt the UNIX kernel.
2060 *
2061 * If inbuf is NULL it is assumed that data is already copied to 'dest_buf'.
2062 */
2063static int
2064compress_core (gzFile file, char *inbuf, char *dest_buf, unsigned int len,
2065    struct thread *td)
2066{
2067	int len_compressed;
2068	int error = 0;
2069	unsigned int chunk_len;
2070
2071	while (len) {
2072		if (inbuf != NULL) {
2073			chunk_len = (len > CORE_BUF_SIZE) ? CORE_BUF_SIZE : len;
2074			copyin(inbuf, dest_buf, chunk_len);
2075			inbuf += chunk_len;
2076		} else {
2077			chunk_len = len;
2078		}
2079		len_compressed = gzwrite(file, dest_buf, chunk_len);
2080
2081		EVENTHANDLER_INVOKE(app_coredump_progress, td, len_compressed);
2082
2083		if ((unsigned int)len_compressed != chunk_len) {
2084			log(LOG_WARNING,
2085			    "compress_core: length mismatch (0x%x returned, "
2086			    "0x%x expected)\n", len_compressed, chunk_len);
2087			EVENTHANDLER_INVOKE(app_coredump_error, td,
2088			    "compress_core: length mismatch %x -> %x",
2089			    chunk_len, len_compressed);
2090			error = EFAULT;
2091			break;
2092		}
2093		len -= chunk_len;
2094		maybe_yield();
2095	}
2096
2097	return (error);
2098}
2099#endif /* COMPRESS_USER_CORES */
2100
2101static vm_prot_t
2102__elfN(trans_prot)(Elf_Word flags)
2103{
2104	vm_prot_t prot;
2105
2106	prot = 0;
2107	if (flags & PF_X)
2108		prot |= VM_PROT_EXECUTE;
2109	if (flags & PF_W)
2110		prot |= VM_PROT_WRITE;
2111	if (flags & PF_R)
2112		prot |= VM_PROT_READ;
2113#if __ELF_WORD_SIZE == 32
2114#if defined(__amd64__) || defined(__ia64__)
2115	if (i386_read_exec && (flags & PF_R))
2116		prot |= VM_PROT_EXECUTE;
2117#endif
2118#endif
2119	return (prot);
2120}
2121
2122static Elf_Word
2123__elfN(untrans_prot)(vm_prot_t prot)
2124{
2125	Elf_Word flags;
2126
2127	flags = 0;
2128	if (prot & VM_PROT_EXECUTE)
2129		flags |= PF_X;
2130	if (prot & VM_PROT_READ)
2131		flags |= PF_R;
2132	if (prot & VM_PROT_WRITE)
2133		flags |= PF_W;
2134	return (flags);
2135}
2136