1/*	$OpenBSD: exec_elf.c,v 1.186 2024/04/02 08:39:16 deraadt Exp $	*/
2
3/*
4 * Copyright (c) 1996 Per Fogelstrom
5 * All rights reserved.
6 *
7 * Copyright (c) 1994 Christos Zoulas
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 *    derived from this software without specific prior written permission
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 */
33
34/*
35 * Copyright (c) 2001 Wasabi Systems, Inc.
36 * All rights reserved.
37 *
38 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 *    notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 *    notice, this list of conditions and the following disclaimer in the
47 *    documentation and/or other materials provided with the distribution.
48 * 3. All advertising materials mentioning features or use of this software
49 *    must display the following acknowledgement:
50 *	This product includes software developed for the NetBSD Project by
51 *	Wasabi Systems, Inc.
52 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
53 *    or promote products derived from this software without specific prior
54 *    written permission.
55 *
56 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
59 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66 * POSSIBILITY OF SUCH DAMAGE.
67 */
68
69#include <sys/param.h>
70#include <sys/systm.h>
71#include <sys/proc.h>
72#include <sys/malloc.h>
73#include <sys/pool.h>
74#include <sys/mount.h>
75#include <sys/namei.h>
76#include <sys/vnode.h>
77#include <sys/core.h>
78#include <sys/exec.h>
79#include <sys/exec_elf.h>
80#include <sys/fcntl.h>
81#include <sys/ptrace.h>
82#include <sys/signalvar.h>
83#include <sys/pledge.h>
84#include <sys/syscall.h>
85
86#include <sys/mman.h>
87
88#include <uvm/uvm_extern.h>
89
90#include <machine/reg.h>
91#include <machine/exec.h>
92
93int	elf_load_file(struct proc *, char *, struct exec_package *,
94	    struct elf_args *);
95int	elf_check_header(Elf_Ehdr *);
96int	elf_read_from(struct proc *, struct vnode *, u_long, void *, int);
97void	elf_load_psection(struct exec_vmcmd_set *, struct vnode *,
98	    Elf_Phdr *, Elf_Addr *, Elf_Addr *, int *, int);
99int	elf_os_pt_note_name(Elf_Note *);
100int	elf_os_pt_note(struct proc *, struct exec_package *, Elf_Ehdr *, int *);
101int	elf_read_pintable(struct proc *p, struct vnode *vp, Elf_Phdr *pp,
102	    u_int **pinp, int is_ldso, size_t len);
103
104/* round up and down to page boundaries. */
105#define ELF_ROUND(a, b)		(((a) + (b) - 1) & ~((b) - 1))
106#define ELF_TRUNC(a, b)		((a) & ~((b) - 1))
107
108/*
109 * We limit the number of program headers to 32, this should
110 * be a reasonable limit for ELF, the most we have seen so far is 12
111 */
112#define ELF_MAX_VALID_PHDR 32
113
114#define ELF_NOTE_NAME_OPENBSD	0x01
115
116struct elf_note_name {
117	char *name;
118	int id;
119} elf_note_names[] = {
120	{ "OpenBSD",	ELF_NOTE_NAME_OPENBSD },
121};
122
123#define	ELFROUNDSIZE	sizeof(Elf_Word)
124#define	elfround(x)	roundup((x), ELFROUNDSIZE)
125
126
127/*
128 * Check header for validity; return 0 for ok, ENOEXEC if error
129 */
130int
131elf_check_header(Elf_Ehdr *ehdr)
132{
133	/*
134	 * We need to check magic, class size, endianness, and version before
135	 * we look at the rest of the Elf_Ehdr structure. These few elements
136	 * are represented in a machine independent fashion.
137	 */
138	if (!IS_ELF(*ehdr) ||
139	    ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
140	    ehdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
141	    ehdr->e_ident[EI_VERSION] != ELF_TARG_VER)
142		return (ENOEXEC);
143
144	/* Now check the machine dependent header */
145	if (ehdr->e_machine != ELF_TARG_MACH ||
146	    ehdr->e_version != ELF_TARG_VER)
147		return (ENOEXEC);
148
149	/* Don't allow an insane amount of sections. */
150	if (ehdr->e_phnum > ELF_MAX_VALID_PHDR)
151		return (ENOEXEC);
152
153	return (0);
154}
155
156/*
157 * Load a psection at the appropriate address
158 */
159void
160elf_load_psection(struct exec_vmcmd_set *vcset, struct vnode *vp,
161    Elf_Phdr *ph, Elf_Addr *addr, Elf_Addr *size, int *prot, int flags)
162{
163	u_long msize, lsize, psize, rm, rf;
164	long diff, offset, bdiff;
165	Elf_Addr base;
166
167	/*
168	 * If the user specified an address, then we load there.
169	 */
170	if (*addr != ELF_NO_ADDR) {
171		if (ph->p_align > 1) {
172			*addr = ELF_TRUNC(*addr, ph->p_align);
173			diff = ph->p_vaddr - ELF_TRUNC(ph->p_vaddr, ph->p_align);
174			/* page align vaddr */
175			base = *addr + trunc_page(ph->p_vaddr)
176			    - ELF_TRUNC(ph->p_vaddr, ph->p_align);
177		} else {
178			diff = 0;
179			base = *addr + trunc_page(ph->p_vaddr) - ph->p_vaddr;
180		}
181	} else {
182		*addr = ph->p_vaddr;
183		if (ph->p_align > 1)
184			*addr = ELF_TRUNC(*addr, ph->p_align);
185		base = trunc_page(ph->p_vaddr);
186		diff = ph->p_vaddr - *addr;
187	}
188	bdiff = ph->p_vaddr - trunc_page(ph->p_vaddr);
189
190	/*
191	 * Enforce W^X and map W|X segments without X permission
192	 * initially.  The dynamic linker will make these read-only
193	 * and add back X permission after relocation processing.
194	 * Static executables with W|X segments will probably crash.
195	 */
196	*prot |= (ph->p_flags & PF_R) ? PROT_READ : 0;
197	*prot |= (ph->p_flags & PF_W) ? PROT_WRITE : 0;
198	if ((ph->p_flags & PF_W) == 0)
199		*prot |= (ph->p_flags & PF_X) ? PROT_EXEC : 0;
200
201	/*
202	 * Apply immutability as much as possible, but not text/rodata
203	 * segments of textrel binaries, or RELRO or PT_OPENBSD_MUTABLE
204	 * sections, or LOADS marked PF_OPENBSD_MUTABLE, or LOADS which
205	 * violate W^X.
206	 * Userland (meaning crt0 or ld.so) will repair those regions.
207	 */
208	if ((ph->p_flags & (PF_X | PF_W)) != (PF_X | PF_W) &&
209	    ((ph->p_flags & PF_OPENBSD_MUTABLE) == 0))
210		flags |= VMCMD_IMMUTABLE;
211	if ((flags & VMCMD_TEXTREL) && (ph->p_flags & PF_W) == 0)
212		flags &= ~VMCMD_IMMUTABLE;
213
214	msize = ph->p_memsz + diff;
215	offset = ph->p_offset - bdiff;
216	lsize = ph->p_filesz + bdiff;
217	psize = round_page(lsize);
218
219	/*
220	 * Because the pagedvn pager can't handle zero fill of the last
221	 * data page if it's not page aligned we map the last page readvn.
222	 */
223	if (ph->p_flags & PF_W) {
224		psize = trunc_page(lsize);
225		if (psize > 0)
226			NEW_VMCMD2(vcset, vmcmd_map_pagedvn, psize, base, vp,
227			    offset, *prot, flags);
228		if (psize != lsize) {
229			NEW_VMCMD2(vcset, vmcmd_map_readvn, lsize - psize,
230			    base + psize, vp, offset + psize, *prot, flags);
231		}
232	} else {
233		NEW_VMCMD2(vcset, vmcmd_map_pagedvn, psize, base, vp, offset,
234		    *prot, flags);
235	}
236
237	/*
238	 * Check if we need to extend the size of the segment
239	 */
240	rm = round_page(*addr + ph->p_memsz + diff);
241	rf = round_page(*addr + ph->p_filesz + diff);
242
243	if (rm != rf) {
244		NEW_VMCMD2(vcset, vmcmd_map_zero, rm - rf, rf, NULLVP, 0,
245		    *prot, flags);
246	}
247	*size = msize;
248}
249
250/*
251 * Read from vnode into buffer at offset.
252 */
253int
254elf_read_from(struct proc *p, struct vnode *vp, u_long off, void *buf,
255    int size)
256{
257	int error;
258	size_t resid;
259
260	if ((error = vn_rdwr(UIO_READ, vp, buf, size, off, UIO_SYSSPACE,
261	    0, p->p_ucred, &resid, p)) != 0)
262		return error;
263	/*
264	 * See if we got all of it
265	 */
266	if (resid != 0)
267		return (ENOEXEC);
268	return (0);
269}
270
271/*
272 * rebase the pin offsets inside a base,len window for the text segment only.
273 */
274void
275elf_adjustpins(vaddr_t *basep, size_t *lenp, u_int *pins, int npins, u_int offset)
276{
277	int i;
278
279	/* Adjust offsets, base, len */
280	for (i = 0; i < npins; i++) {
281		if (pins[i] == -1 || pins[i] == 0)
282			continue;
283		pins[i] -= offset;
284	}
285	*basep += offset;
286	*lenp -= offset;
287}
288
289int
290elf_read_pintable(struct proc *p, struct vnode *vp, Elf_Phdr *pp,
291    u_int **pinp, int is_ldso, size_t len)
292{
293	struct pinsyscalls {
294		u_int offset;
295		u_int sysno;
296	} *syscalls = NULL;
297	int i, nsyscalls = 0, npins = 0;
298	u_int *pins = NULL;
299
300	if (pp->p_filesz > SYS_MAXSYSCALL * 2 * sizeof(*syscalls) ||
301	    pp->p_filesz % sizeof(*syscalls) != 0)
302		goto bad;
303	nsyscalls = pp->p_filesz / sizeof(*syscalls);
304	syscalls = malloc(pp->p_filesz, M_PINSYSCALL, M_WAITOK);
305	if (elf_read_from(p, vp, pp->p_offset, syscalls,
306	    pp->p_filesz) != 0)
307		goto bad;
308
309	/* Validate, and calculate pintable size */
310	for (i = 0; i < nsyscalls; i++) {
311		if (syscalls[i].sysno <= 0 ||
312		    syscalls[i].sysno >= SYS_MAXSYSCALL ||
313		    syscalls[i].offset > len)
314			goto bad;
315		npins = MAX(npins, syscalls[i].sysno);
316	}
317	if (is_ldso)
318		npins = MAX(npins, SYS_kbind);	/* XXX see ld.so/loader.c */
319	npins++;
320
321	/* Fill pintable: 0 = invalid, -1 = allowed, else offset from base */
322	pins = mallocarray(npins, sizeof(u_int), M_PINSYSCALL, M_WAITOK|M_ZERO);
323	for (i = 0; i < nsyscalls; i++) {
324		if (pins[syscalls[i].sysno])
325			pins[syscalls[i].sysno] = -1;	/* duplicated */
326		else
327			pins[syscalls[i].sysno] = syscalls[i].offset;
328	}
329	if (is_ldso)
330		pins[SYS_kbind] = -1;	/* XXX see ld.so/loader.c */
331	*pinp = pins;
332	pins = NULL;
333bad:
334	free(syscalls, M_PINSYSCALL, nsyscalls * sizeof(*syscalls));
335	free(pins, M_PINSYSCALL, npins * sizeof(u_int));
336	return npins;
337}
338
339/*
340 * Load a file (interpreter/library) pointed to by path [stolen from
341 * coff_load_shlib()]. Made slightly generic so it might be used externally.
342 */
343int
344elf_load_file(struct proc *p, char *path, struct exec_package *epp,
345    struct elf_args *ap)
346{
347	int error, i;
348	struct nameidata nd;
349	Elf_Ehdr eh;
350	Elf_Phdr *ph = NULL, *syscall_ph = NULL;
351	u_long phsize = 0;
352	Elf_Addr addr;
353	struct vnode *vp;
354	Elf_Phdr *base_ph = NULL;
355	struct interp_ld_sec {
356		Elf_Addr vaddr;
357		u_long memsz;
358	} loadmap[ELF_MAX_VALID_PHDR];
359	int nload, idx = 0;
360	Elf_Addr pos;
361	int file_align;
362	int loop;
363	size_t randomizequota = ELF_RANDOMIZE_LIMIT;
364	vaddr_t text_start = -1, text_end = 0;
365
366	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, path, p);
367	nd.ni_pledge = PLEDGE_RPATH;
368	nd.ni_unveil = UNVEIL_READ;
369	if ((error = namei(&nd)) != 0) {
370		return (error);
371	}
372	vp = nd.ni_vp;
373	if (vp->v_type != VREG) {
374		error = EACCES;
375		goto bad;
376	}
377	if ((error = VOP_GETATTR(vp, epp->ep_vap, p->p_ucred, p)) != 0)
378		goto bad;
379	if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
380		error = EACCES;
381		goto bad;
382	}
383	if ((error = VOP_ACCESS(vp, VREAD, p->p_ucred, p)) != 0)
384		goto bad1;
385	if ((error = elf_read_from(p, nd.ni_vp, 0, &eh, sizeof(eh))) != 0)
386		goto bad1;
387
388	if (elf_check_header(&eh) || eh.e_type != ET_DYN) {
389		error = ENOEXEC;
390		goto bad1;
391	}
392
393	ph = mallocarray(eh.e_phnum, sizeof(Elf_Phdr), M_TEMP, M_WAITOK);
394	phsize = eh.e_phnum * sizeof(Elf_Phdr);
395
396	if ((error = elf_read_from(p, nd.ni_vp, eh.e_phoff, ph, phsize)) != 0)
397		goto bad1;
398
399	for (i = 0; i < eh.e_phnum; i++) {
400		if ((ph[i].p_align > 1) && !powerof2(ph[i].p_align)) {
401			error = EINVAL;
402			goto bad1;
403		}
404
405		if (ph[i].p_type == PT_LOAD) {
406			if (ph[i].p_filesz > ph[i].p_memsz ||
407			    ph[i].p_memsz == 0) {
408				error = EINVAL;
409				goto bad1;
410			}
411			loadmap[idx].vaddr = trunc_page(ph[i].p_vaddr);
412			loadmap[idx].memsz = round_page (ph[i].p_vaddr +
413			    ph[i].p_memsz - loadmap[idx].vaddr);
414			file_align = ph[i].p_align;
415			idx++;
416		}
417	}
418	nload = idx;
419
420	/*
421	 * Load the interpreter where a non-fixed mmap(NULL, ...)
422	 * would (i.e. something safely out of the way).
423	 */
424	pos = uvm_map_hint(p->p_vmspace, PROT_EXEC, VM_MIN_ADDRESS,
425	    VM_MAXUSER_ADDRESS);
426	pos = ELF_ROUND(pos, file_align);
427
428	loop = 0;
429	for (i = 0; i < nload;/**/) {
430		vaddr_t	addr;
431		struct	uvm_object *uobj;
432		off_t	uoff;
433		size_t	size;
434
435#ifdef this_needs_fixing
436		if (i == 0) {
437			uobj = &vp->v_uvm.u_obj;
438			/* need to fix uoff */
439		} else {
440#endif
441			uobj = NULL;
442			uoff = 0;
443#ifdef this_needs_fixing
444		}
445#endif
446
447		addr = trunc_page(pos + loadmap[i].vaddr);
448		size =  round_page(addr + loadmap[i].memsz) - addr;
449
450		/* CRAP - map_findspace does not avoid daddr+BRKSIZ */
451		if ((addr + size > (vaddr_t)p->p_vmspace->vm_daddr) &&
452		    (addr < (vaddr_t)p->p_vmspace->vm_daddr + BRKSIZ))
453			addr = round_page((vaddr_t)p->p_vmspace->vm_daddr +
454			    BRKSIZ);
455
456		if (uvm_map_mquery(&p->p_vmspace->vm_map, &addr, size,
457		    (i == 0 ? uoff : UVM_UNKNOWN_OFFSET), 0) != 0) {
458			if (loop == 0) {
459				loop = 1;
460				i = 0;
461				pos = 0;
462				continue;
463			}
464			error = ENOMEM;
465			goto bad1;
466		}
467		if (addr != pos + loadmap[i].vaddr) {
468			/* base changed. */
469			pos = addr - trunc_page(loadmap[i].vaddr);
470			pos = ELF_ROUND(pos,file_align);
471			i = 0;
472			continue;
473		}
474
475		i++;
476	}
477
478	/*
479	 * Load all the necessary sections
480	 */
481	for (i = 0; i < eh.e_phnum; i++) {
482		Elf_Addr size = 0;
483		int prot = 0;
484		int flags;
485
486		switch (ph[i].p_type) {
487		case PT_LOAD:
488			if (base_ph == NULL) {
489				flags = VMCMD_BASE;
490				addr = pos;
491				base_ph = &ph[i];
492			} else {
493				flags = VMCMD_RELATIVE;
494				addr = ph[i].p_vaddr - base_ph->p_vaddr;
495			}
496			elf_load_psection(&epp->ep_vmcmds, nd.ni_vp,
497			    &ph[i], &addr, &size, &prot, flags);
498			/* If entry is within this section it must be text */
499			if (eh.e_entry >= ph[i].p_vaddr &&
500			    eh.e_entry < (ph[i].p_vaddr + size)) {
501				/* LOAD containing e_entry may not be writable */
502				if (prot & PROT_WRITE) {
503					error = ENOEXEC;
504					goto bad1;
505				}
506 				epp->ep_entry = addr + eh.e_entry -
507				    ELF_TRUNC(ph[i].p_vaddr,ph[i].p_align);
508				if (flags == VMCMD_RELATIVE)
509					epp->ep_entry += pos;
510				ap->arg_interp = pos;
511			}
512			if (prot & PROT_EXEC) {
513				if (addr < text_start)
514					text_start = addr;
515				if (addr+size >= text_end)
516					text_end = addr + size;
517			}
518			addr += size;
519			break;
520
521		case PT_PHDR:
522		case PT_NOTE:
523			break;
524
525		case PT_OPENBSD_RANDOMIZE:
526			if (ph[i].p_memsz > randomizequota) {
527				error = ENOMEM;
528				goto bad1;
529			}
530			randomizequota -= ph[i].p_memsz;
531			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_randomize,
532			    ph[i].p_memsz, ph[i].p_vaddr + pos, NULLVP, 0, 0);
533			break;
534
535		case PT_DYNAMIC:
536#if defined (__mips__)
537			/* DT_DEBUG is not ready on mips */
538			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_mutable,
539			    ph[i].p_memsz, ph[i].p_vaddr + pos, NULLVP, 0, 0);
540#endif
541			break;
542		case PT_GNU_RELRO:
543		case PT_OPENBSD_MUTABLE:
544			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_mutable,
545			    ph[i].p_memsz, ph[i].p_vaddr + pos, NULLVP, 0, 0);
546			break;
547		case PT_OPENBSD_SYSCALLS:
548			syscall_ph = &ph[i];
549			break;
550		default:
551			break;
552		}
553	}
554
555	if (syscall_ph) {
556		struct process *pr = p->p_p;
557		vaddr_t base = pos;
558		size_t len = text_end;
559		u_int *pins;
560		int npins;
561
562		npins = elf_read_pintable(p, nd.ni_vp, syscall_ph,
563		    &pins, 1, len);
564		if (npins) {
565			elf_adjustpins(&base, &len, pins, npins,
566			    text_start);
567			pr->ps_pin.pn_start = base;
568			pr->ps_pin.pn_end = base + len;
569			pr->ps_pin.pn_pins = pins;
570			pr->ps_pin.pn_npins = npins;
571			pr->ps_flags |= PS_PIN;
572		}
573	}
574
575	vn_marktext(nd.ni_vp);
576
577bad1:
578	VOP_CLOSE(nd.ni_vp, FREAD, p->p_ucred, p);
579bad:
580	free(ph, M_TEMP, phsize);
581
582	vput(nd.ni_vp);
583	return (error);
584}
585
586/*
587 * Prepare an Elf binary's exec package
588 *
589 * First, set of the various offsets/lengths in the exec package.
590 *
591 * Then, mark the text image busy (so it can be demand paged) or error out if
592 * this is not possible.  Finally, set up vmcmds for the text, data, bss, and
593 * stack segments.
594 */
595int
596exec_elf_makecmds(struct proc *p, struct exec_package *epp)
597{
598	Elf_Ehdr *eh = epp->ep_hdr;
599	Elf_Phdr *ph, *pp, *base_ph = NULL, *syscall_ph = NULL;
600	Elf_Addr phdr = 0, exe_base = 0, exe_end = 0;
601	int error, i, has_phdr = 0, names = 0, textrel = 0;
602	char *interp = NULL;
603	u_long phsize;
604	size_t randomizequota = ELF_RANDOMIZE_LIMIT;
605
606	if (epp->ep_hdrvalid < sizeof(Elf_Ehdr))
607		return (ENOEXEC);
608
609	if (elf_check_header(eh) ||
610	   (eh->e_type != ET_EXEC && eh->e_type != ET_DYN))
611		return (ENOEXEC);
612
613	/*
614	 * check if vnode is in open for writing, because we want to demand-
615	 * page out of it.  if it is, don't do it, for various reasons.
616	 */
617	if (epp->ep_vp->v_writecount != 0) {
618#ifdef DIAGNOSTIC
619		if (epp->ep_vp->v_flag & VTEXT)
620			panic("exec: a VTEXT vnode has writecount != 0");
621#endif
622		return (ETXTBSY);
623	}
624	/*
625	 * Allocate space to hold all the program headers, and read them
626	 * from the file
627	 */
628	ph = mallocarray(eh->e_phnum, sizeof(Elf_Phdr), M_TEMP, M_WAITOK);
629	phsize = eh->e_phnum * sizeof(Elf_Phdr);
630
631	if ((error = elf_read_from(p, epp->ep_vp, eh->e_phoff, ph,
632	    phsize)) != 0)
633		goto bad;
634
635	epp->ep_tsize = ELF_NO_ADDR;
636	epp->ep_dsize = ELF_NO_ADDR;
637
638	for (i = 0, pp = ph; i < eh->e_phnum; i++, pp++) {
639		if ((pp->p_align > 1) && !powerof2(pp->p_align)) {
640			error = EINVAL;
641			goto bad;
642		}
643
644		if (pp->p_type == PT_INTERP && !interp) {
645			if (pp->p_filesz < 2 || pp->p_filesz > MAXPATHLEN)
646				goto bad;
647			interp = pool_get(&namei_pool, PR_WAITOK);
648			if ((error = elf_read_from(p, epp->ep_vp,
649			    pp->p_offset, interp, pp->p_filesz)) != 0) {
650				goto bad;
651			}
652			if (interp[pp->p_filesz - 1] != '\0')
653				goto bad;
654		} else if (pp->p_type == PT_LOAD) {
655			if (pp->p_filesz > pp->p_memsz ||
656			    pp->p_memsz == 0) {
657				error = EINVAL;
658				goto bad;
659			}
660			if (base_ph == NULL)
661				base_ph = pp;
662		} else if (pp->p_type == PT_PHDR) {
663			has_phdr = 1;
664		}
665	}
666
667	/*
668	 * Verify this is an OpenBSD executable.  If it's marked that way
669	 * via a PT_NOTE then also check for a PT_OPENBSD_WXNEEDED segment.
670	 */
671	if ((error = elf_os_pt_note(p, epp, epp->ep_hdr, &names)) != 0)
672		goto bad;
673	if (eh->e_ident[EI_OSABI] == ELFOSABI_OPENBSD)
674		names |= ELF_NOTE_NAME_OPENBSD;
675
676	if (eh->e_type == ET_DYN) {
677		/* need phdr and load sections for PIE */
678		if (!has_phdr || base_ph == NULL || base_ph->p_vaddr != 0) {
679			error = EINVAL;
680			goto bad;
681		}
682		/* randomize exe_base for PIE */
683		exe_base = uvm_map_pie(base_ph->p_align);
684
685		/*
686		 * Check if DYNAMIC contains DT_TEXTREL
687		 */
688		for (i = 0, pp = ph; i < eh->e_phnum; i++, pp++) {
689			Elf_Dyn *dt;
690			int j;
691
692			switch (pp->p_type) {
693			case PT_DYNAMIC:
694				if (pp->p_filesz > 64*1024)
695					break;
696				dt = malloc(pp->p_filesz, M_TEMP, M_WAITOK);
697				error = vn_rdwr(UIO_READ, epp->ep_vp,
698				    (caddr_t)dt, pp->p_filesz, pp->p_offset,
699				    UIO_SYSSPACE, IO_UNIT, p->p_ucred, NULL, p);
700				if (error) {
701					free(dt, M_TEMP, pp->p_filesz);
702					break;
703				}
704				for (j = 0; j < pp->p_filesz / sizeof(*dt); j++) {
705					if (dt[j].d_tag == DT_TEXTREL) {
706						textrel = VMCMD_TEXTREL;
707						break;
708					}
709				}
710				free(dt, M_TEMP, pp->p_filesz);
711				break;
712			default:
713				break;
714			}
715		}
716	}
717
718	/*
719	 * Load all the necessary sections
720	 */
721	for (i = 0, pp = ph; i < eh->e_phnum; i++, pp++) {
722		Elf_Addr addr, size = 0;
723		int prot = 0;
724		int flags = 0;
725
726		switch (pp->p_type) {
727		case PT_LOAD:
728			if (exe_base != 0) {
729				if (pp == base_ph) {
730					flags = VMCMD_BASE;
731					addr = exe_base;
732				} else {
733					flags = VMCMD_RELATIVE;
734					addr = pp->p_vaddr - base_ph->p_vaddr;
735				}
736			} else
737				addr = ELF_NO_ADDR;
738
739			/* Static binaries may not call pinsyscalls() */
740			if (interp == NULL)
741				p->p_vmspace->vm_map.flags |= VM_MAP_PINSYSCALL_ONCE;
742
743			/*
744			 * Calculates size of text and data segments
745			 * by starting at first and going to end of last.
746			 * 'rwx' sections are treated as data.
747			 * this is correct for BSS_PLT, but may not be
748			 * for DATA_PLT, is fine for TEXT_PLT.
749			 */
750			elf_load_psection(&epp->ep_vmcmds, epp->ep_vp,
751			    pp, &addr, &size, &prot, flags | textrel);
752
753			/*
754			 * Update exe_base in case alignment was off.
755			 * For PIE, addr is relative to exe_base so
756			 * adjust it (non PIE exe_base is 0 so no change).
757			 */
758			if (flags == VMCMD_BASE)
759				exe_base = addr;
760			else
761				addr += exe_base;
762
763			/*
764			 * Decide whether it's text or data by looking
765			 * at the protection of the section
766			 */
767			if (prot & PROT_WRITE) {
768				/* data section */
769				if (epp->ep_dsize == ELF_NO_ADDR) {
770					epp->ep_daddr = addr;
771					epp->ep_dsize = size;
772				} else {
773					if (addr < epp->ep_daddr) {
774						epp->ep_dsize =
775						    epp->ep_dsize +
776						    epp->ep_daddr -
777						    addr;
778						epp->ep_daddr = addr;
779					} else
780						epp->ep_dsize = addr+size -
781						    epp->ep_daddr;
782				}
783			} else if (prot & PROT_EXEC) {
784				/* text section */
785				if (epp->ep_tsize == ELF_NO_ADDR) {
786					epp->ep_taddr = addr;
787					epp->ep_tsize = size;
788				} else {
789					if (addr < epp->ep_taddr) {
790						epp->ep_tsize =
791						    epp->ep_tsize +
792						    epp->ep_taddr -
793						    addr;
794						epp->ep_taddr = addr;
795					} else
796						epp->ep_tsize = addr+size -
797						    epp->ep_taddr;
798				}
799				if (interp == NULL)
800					exe_end = epp->ep_taddr +
801					    epp->ep_tsize;	/* end of TEXT */
802			}
803			break;
804
805		case PT_SHLIB:
806			error = ENOEXEC;
807			goto bad;
808
809		case PT_INTERP:
810			/* Already did this one */
811		case PT_NOTE:
812			break;
813
814		case PT_PHDR:
815			/* Note address of program headers (in text segment) */
816			phdr = pp->p_vaddr;
817			break;
818
819		case PT_OPENBSD_RANDOMIZE:
820			if (ph[i].p_memsz > randomizequota) {
821				error = ENOMEM;
822				goto bad;
823			}
824			randomizequota -= ph[i].p_memsz;
825			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_randomize,
826			    ph[i].p_memsz, ph[i].p_vaddr + exe_base, NULLVP, 0, 0);
827			break;
828
829		case PT_DYNAMIC:
830#if defined (__mips__)
831			/* DT_DEBUG is not ready on mips */
832			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_mutable,
833			    ph[i].p_memsz, ph[i].p_vaddr + exe_base, NULLVP, 0, 0);
834#endif
835			break;
836		case PT_GNU_RELRO:
837		case PT_OPENBSD_MUTABLE:
838			NEW_VMCMD(&epp->ep_vmcmds, vmcmd_mutable,
839			    ph[i].p_memsz, ph[i].p_vaddr + exe_base, NULLVP, 0, 0);
840			break;
841		case PT_OPENBSD_SYSCALLS:
842			if (interp == NULL)
843				syscall_ph = &ph[i];
844			break;
845		default:
846			/*
847			 * Not fatal, we don't need to understand everything
848			 * :-)
849			 */
850			break;
851		}
852	}
853
854	if (syscall_ph) {
855		vaddr_t base = exe_base;
856		size_t len = exe_end - exe_base;
857		u_int *pins;
858		int npins;
859
860		npins = elf_read_pintable(p, epp->ep_vp, syscall_ph,
861		    &pins, 0, len);
862		if (npins) {
863			elf_adjustpins(&base, &len, pins, npins,
864			    epp->ep_taddr - exe_base);
865			epp->ep_pinstart = base;
866			epp->ep_pinend = base + len;
867			epp->ep_pins = pins;
868			epp->ep_npins = npins;
869			p->p_p->ps_flags |= PS_PIN;
870		}
871	}
872
873	phdr += exe_base;
874
875	/*
876	 * Strangely some linux programs may have all load sections marked
877	 * writeable, in this case, textsize is not -1, but rather 0;
878	 */
879	if (epp->ep_tsize == ELF_NO_ADDR)
880		epp->ep_tsize = 0;
881	/*
882	 * Another possibility is that it has all load sections marked
883	 * read-only.  Fake a zero-sized data segment right after the
884	 * text segment.
885	 */
886	if (epp->ep_dsize == ELF_NO_ADDR) {
887		epp->ep_daddr = round_page(epp->ep_taddr + epp->ep_tsize);
888		epp->ep_dsize = 0;
889	}
890
891	epp->ep_interp = interp;
892	epp->ep_entry = eh->e_entry + exe_base;
893
894	/*
895	 * Check if we found a dynamically linked binary and arrange to load
896	 * its interpreter when the exec file is released.
897	 */
898	if (interp || eh->e_type == ET_DYN) {
899		struct elf_args *ap;
900
901		ap = malloc(sizeof(*ap), M_TEMP, M_WAITOK);
902
903		ap->arg_phaddr = phdr;
904		ap->arg_phentsize = eh->e_phentsize;
905		ap->arg_phnum = eh->e_phnum;
906		ap->arg_entry = eh->e_entry + exe_base;
907		ap->arg_interp = exe_base;
908
909		epp->ep_args = ap;
910	}
911
912	free(ph, M_TEMP, phsize);
913	vn_marktext(epp->ep_vp);
914	return (exec_setup_stack(p, epp));
915
916bad:
917	if (interp)
918		pool_put(&namei_pool, interp);
919	free(ph, M_TEMP, phsize);
920	kill_vmcmds(&epp->ep_vmcmds);
921	if (error == 0)
922		return (ENOEXEC);
923	return (error);
924}
925
926/*
927 * Phase II of load. It is now safe to load the interpreter. Info collected
928 * when loading the program is available for setup of the interpreter.
929 */
930int
931exec_elf_fixup(struct proc *p, struct exec_package *epp)
932{
933	char	*interp;
934	int	error = 0;
935	struct	elf_args *ap;
936	AuxInfo ai[ELF_AUX_ENTRIES], *a;
937
938	ap = epp->ep_args;
939	if (ap == NULL) {
940		return (0);
941	}
942
943	interp = epp->ep_interp;
944
945	/* disable kbind in programs that don't use ld.so */
946	if (interp == NULL)
947		p->p_p->ps_kbind_addr = BOGO_PC;
948
949	if (interp &&
950	    (error = elf_load_file(p, interp, epp, ap)) != 0) {
951		uprintf("execve: cannot load %s\n", interp);
952		free(ap, M_TEMP, sizeof *ap);
953		pool_put(&namei_pool, interp);
954		kill_vmcmds(&epp->ep_vmcmds);
955		return (error);
956	}
957	/*
958	 * We have to do this ourselves...
959	 */
960	error = exec_process_vmcmds(p, epp);
961
962	/*
963	 * Push extra arguments on the stack needed by dynamically
964	 * linked binaries
965	 */
966	if (error == 0) {
967		memset(&ai, 0, sizeof ai);
968		a = ai;
969
970		a->au_id = AUX_phdr;
971		a->au_v = ap->arg_phaddr;
972		a++;
973
974		a->au_id = AUX_phent;
975		a->au_v = ap->arg_phentsize;
976		a++;
977
978		a->au_id = AUX_phnum;
979		a->au_v = ap->arg_phnum;
980		a++;
981
982		a->au_id = AUX_pagesz;
983		a->au_v = PAGE_SIZE;
984		a++;
985
986		a->au_id = AUX_base;
987		a->au_v = ap->arg_interp;
988		a++;
989
990		a->au_id = AUX_flags;
991		a->au_v = 0;
992		a++;
993
994		a->au_id = AUX_entry;
995		a->au_v = ap->arg_entry;
996		a++;
997
998		a->au_id = AUX_openbsd_timekeep;
999		a->au_v = p->p_p->ps_timekeep;
1000		a++;
1001
1002		a->au_id = AUX_null;
1003		a->au_v = 0;
1004		a++;
1005
1006		error = copyout(ai, epp->ep_auxinfo, sizeof ai);
1007	}
1008	free(ap, M_TEMP, sizeof *ap);
1009	if (interp)
1010		pool_put(&namei_pool, interp);
1011	return (error);
1012}
1013
1014int
1015elf_os_pt_note_name(Elf_Note *np)
1016{
1017	int i, j;
1018
1019	for (i = 0; i < nitems(elf_note_names); i++) {
1020		size_t namlen = strlen(elf_note_names[i].name);
1021		if (np->namesz < namlen)
1022			continue;
1023		/* verify name padding (after the NUL) is NUL */
1024		for (j = namlen + 1; j < elfround(np->namesz); j++)
1025			if (((char *)(np + 1))[j] != '\0')
1026				continue;
1027		/* verify desc padding is NUL */
1028		for (j = np->descsz; j < elfround(np->descsz); j++)
1029			if (((char *)(np + 1))[j] != '\0')
1030				continue;
1031		if (strcmp((char *)(np + 1), elf_note_names[i].name) == 0)
1032			return elf_note_names[i].id;
1033	}
1034	return (0);
1035}
1036
1037int
1038elf_os_pt_note(struct proc *p, struct exec_package *epp, Elf_Ehdr *eh, int *namesp)
1039{
1040	Elf_Phdr *hph, *ph;
1041	Elf_Note *np = NULL;
1042	size_t phsize, offset, pfilesz = 0, total;
1043	int error, names = 0;
1044
1045	hph = mallocarray(eh->e_phnum, sizeof(Elf_Phdr), M_TEMP, M_WAITOK);
1046	phsize = eh->e_phnum * sizeof(Elf_Phdr);
1047	if ((error = elf_read_from(p, epp->ep_vp, eh->e_phoff,
1048	    hph, phsize)) != 0)
1049		goto out1;
1050
1051	for (ph = hph;  ph < &hph[eh->e_phnum]; ph++) {
1052		if (ph->p_type == PT_OPENBSD_WXNEEDED) {
1053			epp->ep_flags |= EXEC_WXNEEDED;
1054			continue;
1055		}
1056		if (ph->p_type == PT_OPENBSD_NOBTCFI) {
1057			epp->ep_flags |= EXEC_NOBTCFI;
1058			continue;
1059		}
1060
1061		if (ph->p_type != PT_NOTE || ph->p_filesz > 1024)
1062			continue;
1063
1064		if (np && ph->p_filesz != pfilesz) {
1065			free(np, M_TEMP, pfilesz);
1066			np = NULL;
1067		}
1068		if (!np)
1069			np = malloc(ph->p_filesz, M_TEMP, M_WAITOK);
1070		pfilesz = ph->p_filesz;
1071		if ((error = elf_read_from(p, epp->ep_vp, ph->p_offset,
1072		    np, ph->p_filesz)) != 0)
1073			goto out2;
1074
1075		for (offset = 0; offset < ph->p_filesz; offset += total) {
1076			Elf_Note *np2 = (Elf_Note *)((char *)np + offset);
1077
1078			if (offset + sizeof(Elf_Note) > ph->p_filesz)
1079				break;
1080			total = sizeof(Elf_Note) + elfround(np2->namesz) +
1081			    elfround(np2->descsz);
1082			if (offset + total > ph->p_filesz)
1083				break;
1084			names |= elf_os_pt_note_name(np2);
1085		}
1086	}
1087
1088out2:
1089	free(np, M_TEMP, pfilesz);
1090out1:
1091	free(hph, M_TEMP, phsize);
1092	*namesp = names;
1093	return ((names & ELF_NOTE_NAME_OPENBSD) ? 0 : ENOEXEC);
1094}
1095
1096/*
1097 * Start of routines related to dumping core
1098 */
1099
1100#ifdef SMALL_KERNEL
1101int
1102coredump_elf(struct proc *p, void *cookie)
1103{
1104	return EPERM;
1105}
1106#else /* !SMALL_KERNEL */
1107
1108struct writesegs_state {
1109	off_t	notestart;
1110	off_t	secstart;
1111	off_t	secoff;
1112	struct	proc *p;
1113	void	*iocookie;
1114	Elf_Phdr *psections;
1115	size_t	psectionslen;
1116	size_t	notesize;
1117	int	npsections;
1118};
1119
1120uvm_coredump_setup_cb	coredump_setup_elf;
1121uvm_coredump_walk_cb	coredump_walk_elf;
1122
1123int	coredump_notes_elf(struct proc *, void *, size_t *);
1124int	coredump_note_elf(struct proc *, void *, size_t *);
1125int	coredump_writenote_elf(struct proc *, void *, Elf_Note *,
1126	    const char *, void *);
1127
1128extern vaddr_t sigcode_va;
1129extern vsize_t sigcode_sz;
1130
1131int
1132coredump_elf(struct proc *p, void *cookie)
1133{
1134#ifdef DIAGNOSTIC
1135	off_t offset;
1136#endif
1137	struct writesegs_state ws;
1138	size_t notesize;
1139	int error, i;
1140
1141	ws.p = p;
1142	ws.iocookie = cookie;
1143	ws.psections = NULL;
1144
1145	/*
1146	 * Walk the map to get all the segment offsets and lengths,
1147	 * write out the ELF header.
1148	 */
1149	error = uvm_coredump_walkmap(p, coredump_setup_elf,
1150	    coredump_walk_elf, &ws);
1151	if (error)
1152		goto out;
1153
1154	error = coredump_write(cookie, UIO_SYSSPACE, ws.psections,
1155	    ws.psectionslen, 0);
1156	if (error)
1157		goto out;
1158
1159	/* Write out the notes. */
1160	error = coredump_notes_elf(p, cookie, &notesize);
1161	if (error)
1162		goto out;
1163
1164#ifdef DIAGNOSTIC
1165	if (notesize != ws.notesize)
1166		panic("coredump: notesize changed: %zu != %zu",
1167		    ws.notesize, notesize);
1168	offset = ws.notestart + notesize;
1169	if (offset != ws.secstart)
1170		panic("coredump: offset %lld != secstart %lld",
1171		    (long long) offset, (long long) ws.secstart);
1172#endif
1173
1174	/* Pass 3: finally, write the sections themselves. */
1175	for (i = 0; i < ws.npsections - 1; i++) {
1176		Elf_Phdr *pent = &ws.psections[i];
1177		if (pent->p_filesz == 0)
1178			continue;
1179
1180#ifdef DIAGNOSTIC
1181		if (offset != pent->p_offset)
1182			panic("coredump: offset %lld != p_offset[%d] %lld",
1183			    (long long) offset, i,
1184			    (long long) pent->p_filesz);
1185#endif
1186
1187		/*
1188		 * Since the sigcode is mapped execute-only, we can't
1189		 * read it.  So use the kernel mapping for it instead.
1190		 */
1191		if (pent->p_vaddr == p->p_p->ps_sigcode &&
1192		    pent->p_filesz == sigcode_sz) {
1193			error = coredump_write(cookie, UIO_SYSSPACE,
1194			    (void *)sigcode_va, sigcode_sz, 0);
1195		} else {
1196			error = coredump_write(cookie, UIO_USERSPACE,
1197			    (void *)(vaddr_t)pent->p_vaddr, pent->p_filesz,
1198			    (pent->p_flags & PF_ISVNODE));
1199		}
1200		if (error)
1201			goto out;
1202
1203		coredump_unmap(cookie, (vaddr_t)pent->p_vaddr,
1204		    (vaddr_t)pent->p_vaddr + pent->p_filesz);
1205
1206#ifdef DIAGNOSTIC
1207		offset += ws.psections[i].p_filesz;
1208#endif
1209	}
1210
1211out:
1212	free(ws.psections, M_TEMP, ws.psectionslen);
1213	return (error);
1214}
1215
1216
1217/*
1218 * Normally we lay out core files like this:
1219 *	[ELF Header] [Program headers] [Notes] [data for PT_LOAD segments]
1220 *
1221 * However, if there's >= 65535 segments then it overflows the field
1222 * in the ELF header, so the standard specifies putting a magic
1223 * number there and saving the real count in the .sh_info field of
1224 * the first *section* header...which requires generating a section
1225 * header.  To avoid confusing tools, we include an .shstrtab section
1226 * as well so all the indexes look valid.  So in this case we lay
1227 * out the core file like this:
1228 *	[ELF Header] [Section Headers] [.shstrtab] [Program headers] \
1229 *	[Notes] [data for PT_LOAD segments]
1230 *
1231 * The 'shstrtab' structure below is data for the second of the two
1232 * section headers, plus the .shstrtab itself, in one const buffer.
1233 */
1234static const struct {
1235    Elf_Shdr	shdr;
1236    char	shstrtab[sizeof(ELF_SHSTRTAB) + 1];
1237} shstrtab = {
1238    .shdr = {
1239	.sh_name = 1,			/* offset in .shstrtab below */
1240	.sh_type = SHT_STRTAB,
1241	.sh_offset = sizeof(Elf_Ehdr) + 2*sizeof(Elf_Shdr),
1242	.sh_size = sizeof(ELF_SHSTRTAB) + 1,
1243	.sh_addralign = 1,
1244    },
1245    .shstrtab = "\0" ELF_SHSTRTAB,
1246};
1247
1248int
1249coredump_setup_elf(int segment_count, void *cookie)
1250{
1251	Elf_Ehdr ehdr;
1252	struct writesegs_state *ws = cookie;
1253	Elf_Phdr *note;
1254	int error;
1255
1256	/* Get the count of segments, plus one for the PT_NOTE */
1257	ws->npsections = segment_count + 1;
1258
1259	/* Get the size of the notes. */
1260	error = coredump_notes_elf(ws->p, NULL, &ws->notesize);
1261	if (error)
1262		return error;
1263
1264	/* Setup the ELF header */
1265	memset(&ehdr, 0, sizeof(ehdr));
1266	memcpy(ehdr.e_ident, ELFMAG, SELFMAG);
1267	ehdr.e_ident[EI_CLASS] = ELF_TARG_CLASS;
1268	ehdr.e_ident[EI_DATA] = ELF_TARG_DATA;
1269	ehdr.e_ident[EI_VERSION] = EV_CURRENT;
1270	/* XXX Should be the OSABI/ABI version of the executable. */
1271	ehdr.e_ident[EI_OSABI] = ELFOSABI_SYSV;
1272	ehdr.e_ident[EI_ABIVERSION] = 0;
1273	ehdr.e_type = ET_CORE;
1274	/* XXX This should be the e_machine of the executable. */
1275	ehdr.e_machine = ELF_TARG_MACH;
1276	ehdr.e_version = EV_CURRENT;
1277	ehdr.e_entry = 0;
1278	ehdr.e_flags = 0;
1279	ehdr.e_ehsize = sizeof(ehdr);
1280	ehdr.e_phentsize = sizeof(Elf_Phdr);
1281
1282	if (ws->npsections < PN_XNUM) {
1283		ehdr.e_phoff = sizeof(ehdr);
1284		ehdr.e_shoff = 0;
1285		ehdr.e_phnum = ws->npsections;
1286		ehdr.e_shentsize = 0;
1287		ehdr.e_shnum = 0;
1288		ehdr.e_shstrndx = 0;
1289	} else {
1290		/* too many segments, use extension setup */
1291		ehdr.e_shoff = sizeof(ehdr);
1292		ehdr.e_phnum = PN_XNUM;
1293		ehdr.e_shentsize = sizeof(Elf_Shdr);
1294		ehdr.e_shnum = 2;
1295		ehdr.e_shstrndx = 1;
1296		ehdr.e_phoff = shstrtab.shdr.sh_offset + shstrtab.shdr.sh_size;
1297	}
1298
1299	/* Write out the ELF header. */
1300	error = coredump_write(ws->iocookie, UIO_SYSSPACE, &ehdr, sizeof(ehdr), 0);
1301	if (error)
1302		return error;
1303
1304	/*
1305	 * If an section header is needed to store extension info, write
1306	 * it out after the ELF header and before the program header.
1307	 */
1308	if (ehdr.e_shnum != 0) {
1309		Elf_Shdr shdr = { .sh_info = ws->npsections };
1310		error = coredump_write(ws->iocookie, UIO_SYSSPACE, &shdr,
1311		    sizeof shdr, 0);
1312		if (error)
1313			return error;
1314		error = coredump_write(ws->iocookie, UIO_SYSSPACE, &shstrtab,
1315		    sizeof(shstrtab.shdr) + sizeof(shstrtab.shstrtab), 0);
1316		if (error)
1317			return error;
1318	}
1319
1320	/*
1321	 * Allocate the segment header array and setup to collect
1322	 * the section sizes and offsets
1323	 */
1324	ws->psections = mallocarray(ws->npsections, sizeof(Elf_Phdr),
1325	    M_TEMP, M_WAITOK|M_CANFAIL|M_ZERO);
1326	if (ws->psections == NULL)
1327		return ENOMEM;
1328	ws->psectionslen = ws->npsections * sizeof(Elf_Phdr);
1329
1330	ws->notestart = ehdr.e_phoff + ws->psectionslen;
1331	ws->secstart = ws->notestart + ws->notesize;
1332	ws->secoff = ws->secstart;
1333
1334	/* Fill in the PT_NOTE segment header in the last slot */
1335	note = &ws->psections[ws->npsections - 1];
1336	note->p_type = PT_NOTE;
1337	note->p_offset = ws->notestart;
1338	note->p_vaddr = 0;
1339	note->p_paddr = 0;
1340	note->p_filesz = ws->notesize;
1341	note->p_memsz = 0;
1342	note->p_flags = PF_R;
1343	note->p_align = ELFROUNDSIZE;
1344
1345	return (0);
1346}
1347
1348int
1349coredump_walk_elf(vaddr_t start, vaddr_t realend, vaddr_t end, vm_prot_t prot,
1350    int isvnode, int nsegment, void *cookie)
1351{
1352	struct writesegs_state *ws = cookie;
1353	Elf_Phdr phdr;
1354	vsize_t size, realsize;
1355
1356	size = end - start;
1357	realsize = realend - start;
1358
1359	phdr.p_type = PT_LOAD;
1360	phdr.p_offset = ws->secoff;
1361	phdr.p_vaddr = start;
1362	phdr.p_paddr = 0;
1363	phdr.p_filesz = realsize;
1364	phdr.p_memsz = size;
1365	phdr.p_flags = 0;
1366	if (prot & PROT_READ)
1367		phdr.p_flags |= PF_R;
1368	if (prot & PROT_WRITE)
1369		phdr.p_flags |= PF_W;
1370	if (prot & PROT_EXEC)
1371		phdr.p_flags |= PF_X;
1372	if (isvnode)
1373		phdr.p_flags |= PF_ISVNODE;
1374	phdr.p_align = PAGE_SIZE;
1375
1376	ws->secoff += phdr.p_filesz;
1377	ws->psections[nsegment] = phdr;
1378
1379	return (0);
1380}
1381
1382int
1383coredump_notes_elf(struct proc *p, void *iocookie, size_t *sizep)
1384{
1385	struct elfcore_procinfo cpi;
1386	Elf_Note nhdr;
1387	struct process *pr = p->p_p;
1388	struct proc *q;
1389	size_t size, notesize;
1390	int error;
1391
1392	KASSERT(!P_HASSIBLING(p) || pr->ps_single != NULL);
1393	size = 0;
1394
1395	/* First, write an elfcore_procinfo. */
1396	notesize = sizeof(nhdr) + elfround(sizeof("OpenBSD")) +
1397	    elfround(sizeof(cpi));
1398	if (iocookie) {
1399		memset(&cpi, 0, sizeof(cpi));
1400
1401		cpi.cpi_version = ELFCORE_PROCINFO_VERSION;
1402		cpi.cpi_cpisize = sizeof(cpi);
1403		cpi.cpi_signo = p->p_sisig;
1404		cpi.cpi_sigcode = p->p_sicode;
1405
1406		cpi.cpi_sigpend = p->p_siglist | pr->ps_siglist;
1407		cpi.cpi_sigmask = p->p_sigmask;
1408		cpi.cpi_sigignore = pr->ps_sigacts->ps_sigignore;
1409		cpi.cpi_sigcatch = pr->ps_sigacts->ps_sigcatch;
1410
1411		cpi.cpi_pid = pr->ps_pid;
1412		cpi.cpi_ppid = pr->ps_ppid;
1413		cpi.cpi_pgrp = pr->ps_pgid;
1414		if (pr->ps_session->s_leader)
1415			cpi.cpi_sid = pr->ps_session->s_leader->ps_pid;
1416		else
1417			cpi.cpi_sid = 0;
1418
1419		cpi.cpi_ruid = p->p_ucred->cr_ruid;
1420		cpi.cpi_euid = p->p_ucred->cr_uid;
1421		cpi.cpi_svuid = p->p_ucred->cr_svuid;
1422
1423		cpi.cpi_rgid = p->p_ucred->cr_rgid;
1424		cpi.cpi_egid = p->p_ucred->cr_gid;
1425		cpi.cpi_svgid = p->p_ucred->cr_svgid;
1426
1427		(void)strlcpy(cpi.cpi_name, pr->ps_comm, sizeof(cpi.cpi_name));
1428
1429		nhdr.namesz = sizeof("OpenBSD");
1430		nhdr.descsz = sizeof(cpi);
1431		nhdr.type = NT_OPENBSD_PROCINFO;
1432
1433		error = coredump_writenote_elf(p, iocookie, &nhdr,
1434		    "OpenBSD", &cpi);
1435		if (error)
1436			return (error);
1437	}
1438	size += notesize;
1439
1440	/* Second, write an NT_OPENBSD_AUXV note. */
1441	notesize = sizeof(nhdr) + elfround(sizeof("OpenBSD")) +
1442	    elfround(ELF_AUX_WORDS * sizeof(char *));
1443	if (iocookie && pr->ps_auxinfo) {
1444
1445		nhdr.namesz = sizeof("OpenBSD");
1446		nhdr.descsz = ELF_AUX_WORDS * sizeof(char *);
1447		nhdr.type = NT_OPENBSD_AUXV;
1448
1449		error = coredump_write(iocookie, UIO_SYSSPACE,
1450		    &nhdr, sizeof(nhdr), 0);
1451		if (error)
1452			return (error);
1453
1454		error = coredump_write(iocookie, UIO_SYSSPACE,
1455		    "OpenBSD", elfround(nhdr.namesz), 0);
1456		if (error)
1457			return (error);
1458
1459		error = coredump_write(iocookie, UIO_USERSPACE,
1460		    (caddr_t)pr->ps_auxinfo, nhdr.descsz, 0);
1461		if (error)
1462			return (error);
1463	}
1464	size += notesize;
1465
1466#ifdef PT_WCOOKIE
1467	notesize = sizeof(nhdr) + elfround(sizeof("OpenBSD")) +
1468	    elfround(sizeof(register_t));
1469	if (iocookie) {
1470		register_t wcookie;
1471
1472		nhdr.namesz = sizeof("OpenBSD");
1473		nhdr.descsz = sizeof(register_t);
1474		nhdr.type = NT_OPENBSD_WCOOKIE;
1475
1476		wcookie = process_get_wcookie(p);
1477		error = coredump_writenote_elf(p, iocookie, &nhdr,
1478		    "OpenBSD", &wcookie);
1479		if (error)
1480			return (error);
1481	}
1482	size += notesize;
1483#endif
1484
1485	/*
1486	 * Now write the register info for the thread that caused the
1487	 * coredump.
1488	 */
1489	error = coredump_note_elf(p, iocookie, &notesize);
1490	if (error)
1491		return (error);
1492	size += notesize;
1493
1494	/*
1495	 * Now, for each thread, write the register info and any other
1496	 * per-thread notes.  Since we're dumping core, all the other
1497	 * threads in the process have been stopped and the list can't
1498	 * change.
1499	 */
1500	TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
1501		if (q == p)		/* we've taken care of this thread */
1502			continue;
1503		error = coredump_note_elf(q, iocookie, &notesize);
1504		if (error)
1505			return (error);
1506		size += notesize;
1507	}
1508
1509	*sizep = size;
1510	return (0);
1511}
1512
1513int
1514coredump_note_elf(struct proc *p, void *iocookie, size_t *sizep)
1515{
1516	Elf_Note nhdr;
1517	int size, notesize, error;
1518	int namesize;
1519	char name[64+ELFROUNDSIZE];
1520	struct reg intreg;
1521#ifdef PT_GETFPREGS
1522	struct fpreg freg;
1523#endif
1524#ifdef PT_PACMASK
1525	register_t pacmask[2];
1526#endif
1527
1528	size = 0;
1529
1530	snprintf(name, sizeof(name)-ELFROUNDSIZE, "%s@%d",
1531	    "OpenBSD", p->p_tid + THREAD_PID_OFFSET);
1532	namesize = strlen(name) + 1;
1533	memset(name + namesize, 0, elfround(namesize) - namesize);
1534
1535	notesize = sizeof(nhdr) + elfround(namesize) + elfround(sizeof(intreg));
1536	if (iocookie) {
1537		error = process_read_regs(p, &intreg);
1538		if (error)
1539			return (error);
1540
1541		nhdr.namesz = namesize;
1542		nhdr.descsz = sizeof(intreg);
1543		nhdr.type = NT_OPENBSD_REGS;
1544
1545		error = coredump_writenote_elf(p, iocookie, &nhdr,
1546		    name, &intreg);
1547		if (error)
1548			return (error);
1549
1550	}
1551	size += notesize;
1552
1553#ifdef PT_GETFPREGS
1554	notesize = sizeof(nhdr) + elfround(namesize) + elfround(sizeof(freg));
1555	if (iocookie) {
1556		error = process_read_fpregs(p, &freg);
1557		if (error)
1558			return (error);
1559
1560		nhdr.namesz = namesize;
1561		nhdr.descsz = sizeof(freg);
1562		nhdr.type = NT_OPENBSD_FPREGS;
1563
1564		error = coredump_writenote_elf(p, iocookie, &nhdr, name, &freg);
1565		if (error)
1566			return (error);
1567	}
1568	size += notesize;
1569#endif
1570
1571#ifdef PT_PACMASK
1572	notesize = sizeof(nhdr) + elfround(namesize) +
1573	    elfround(sizeof(pacmask));
1574	if (iocookie) {
1575		pacmask[0] = pacmask[1] = process_get_pacmask(p);
1576
1577		nhdr.namesz = namesize;
1578		nhdr.descsz = sizeof(pacmask);
1579		nhdr.type = NT_OPENBSD_PACMASK;
1580
1581		error = coredump_writenote_elf(p, iocookie, &nhdr,
1582		    name, &pacmask);
1583		if (error)
1584			return (error);
1585	}
1586	size += notesize;
1587#endif
1588
1589	*sizep = size;
1590	/* XXX Add hook for machdep per-LWP notes. */
1591	return (0);
1592}
1593
1594int
1595coredump_writenote_elf(struct proc *p, void *cookie, Elf_Note *nhdr,
1596    const char *name, void *data)
1597{
1598	int error;
1599
1600	error = coredump_write(cookie, UIO_SYSSPACE, nhdr, sizeof(*nhdr), 0);
1601	if (error)
1602		return error;
1603
1604	error = coredump_write(cookie, UIO_SYSSPACE, name,
1605	    elfround(nhdr->namesz), 0);
1606	if (error)
1607		return error;
1608
1609	return coredump_write(cookie, UIO_SYSSPACE, data, nhdr->descsz, 0);
1610}
1611#endif /* !SMALL_KERNEL */
1612