elfcore.c revision 210063
1/*-
2 * Copyright (c) 2007 Sandvine Incorporated
3 * Copyright (c) 1998 John D. Polstra
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/usr.bin/gcore/elfcore.c 210063 2010-07-14 17:16:25Z attilio $");
30
31#include <sys/param.h>
32#include <sys/procfs.h>
33#include <sys/ptrace.h>
34#include <sys/queue.h>
35#include <sys/linker_set.h>
36#include <sys/sysctl.h>
37#include <sys/user.h>
38#include <sys/wait.h>
39#include <machine/elf.h>
40#include <vm/vm_param.h>
41#include <vm/vm.h>
42#include <vm/pmap.h>
43#include <vm/vm_map.h>
44#include <err.h>
45#include <errno.h>
46#include <fcntl.h>
47#include <stdint.h>
48#include <stdio.h>
49#include <stdlib.h>
50#include <string.h>
51#include <unistd.h>
52#include <libutil.h>
53
54#include "extern.h"
55
56/*
57 * Code for generating ELF core dumps.
58 */
59
60typedef void (*segment_callback)(vm_map_entry_t, void *);
61
62/* Closure for cb_put_phdr(). */
63struct phdr_closure {
64	Elf_Phdr *phdr;		/* Program header to fill in */
65	Elf_Off offset;		/* Offset of segment in core file */
66};
67
68/* Closure for cb_size_segment(). */
69struct sseg_closure {
70	int count;		/* Count of writable segments. */
71	size_t size;		/* Total size of all writable segments. */
72};
73
74static void cb_put_phdr(vm_map_entry_t, void *);
75static void cb_size_segment(vm_map_entry_t, void *);
76static void each_writable_segment(vm_map_entry_t, segment_callback,
77    void *closure);
78static void elf_detach(void);	/* atexit() handler. */
79static void elf_puthdr(pid_t, vm_map_entry_t, void *, size_t *, int numsegs);
80static void elf_putnote(void *dst, size_t *off, const char *name, int type,
81    const void *desc, size_t descsz);
82static void freemap(vm_map_entry_t);
83static vm_map_entry_t readmap(pid_t);
84
85static pid_t g_pid;		/* Pid being dumped, global for elf_detach */
86
87static int
88elf_ident(int efd, pid_t pid __unused, char *binfile __unused)
89{
90	Elf_Ehdr hdr;
91	int cnt;
92
93	cnt = read(efd, &hdr, sizeof(hdr));
94	if (cnt != sizeof(hdr))
95		return (0);
96	if (IS_ELF(hdr))
97		return (1);
98	return (0);
99}
100
101static void
102elf_detach(void)
103{
104
105	if (g_pid != 0)
106		ptrace(PT_DETACH, g_pid, (caddr_t)1, 0);
107}
108
109/*
110 * Write an ELF coredump for the given pid to the given fd.
111 */
112static void
113elf_coredump(int efd __unused, int fd, pid_t pid)
114{
115	vm_map_entry_t map;
116	struct sseg_closure seginfo;
117	void *hdr;
118	size_t hdrsize;
119	Elf_Phdr *php;
120	int i;
121
122	/* Attach to process to dump. */
123	g_pid = pid;
124	if (atexit(elf_detach) != 0)
125		err(1, "atexit");
126	errno = 0;
127	ptrace(PT_ATTACH, pid, NULL, 0);
128	if (errno)
129		err(1, "PT_ATTACH");
130	if (waitpid(pid, NULL, 0) == -1)
131		err(1, "waitpid");
132
133	/* Get the program's memory map. */
134	map = readmap(pid);
135
136	/* Size the program segments. */
137	seginfo.count = 0;
138	seginfo.size = 0;
139	each_writable_segment(map, cb_size_segment, &seginfo);
140
141	/*
142	 * Calculate the size of the core file header area by making
143	 * a dry run of generating it.  Nothing is written, but the
144	 * size is calculated.
145	 */
146	hdrsize = 0;
147	elf_puthdr(pid, map, NULL, &hdrsize, seginfo.count);
148
149	/*
150	 * Allocate memory for building the header, fill it up,
151	 * and write it out.
152	 */
153	if ((hdr = calloc(1, hdrsize)) == NULL)
154		errx(1, "out of memory");
155
156	/* Fill in the header. */
157	hdrsize = 0;
158	elf_puthdr(pid, map, hdr, &hdrsize, seginfo.count);
159
160	/* Write it to the core file. */
161	if (write(fd, hdr, hdrsize) == -1)
162		err(1, "write");
163
164	/* Write the contents of all of the writable segments. */
165	php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1;
166	for (i = 0;  i < seginfo.count;  i++) {
167		struct ptrace_io_desc iorequest;
168		uintmax_t nleft = php->p_filesz;
169
170		iorequest.piod_op = PIOD_READ_D;
171		iorequest.piod_offs = (caddr_t)php->p_vaddr;
172		while (nleft > 0) {
173			char buf[8*1024];
174			size_t nwant;
175			ssize_t ngot;
176
177			if (nleft > sizeof(buf))
178				nwant = sizeof buf;
179			else
180				nwant = nleft;
181			iorequest.piod_addr = buf;
182			iorequest.piod_len = nwant;
183			ptrace(PT_IO, pid, (caddr_t)&iorequest, 0);
184			ngot = iorequest.piod_len;
185			if ((size_t)ngot < nwant)
186				errx(1, "short read wanted %d, got %d",
187				    nwant, ngot);
188			ngot = write(fd, buf, nwant);
189			if (ngot == -1)
190				err(1, "write of segment %d failed", i);
191			if ((size_t)ngot != nwant)
192				errx(1, "short write");
193			nleft -= nwant;
194			iorequest.piod_offs += ngot;
195		}
196		php++;
197	}
198	free(hdr);
199	freemap(map);
200}
201
202/*
203 * A callback for each_writable_segment() to write out the segment's
204 * program header entry.
205 */
206static void
207cb_put_phdr(vm_map_entry_t entry, void *closure)
208{
209	struct phdr_closure *phc = (struct phdr_closure *)closure;
210	Elf_Phdr *phdr = phc->phdr;
211
212	phc->offset = round_page(phc->offset);
213
214	phdr->p_type = PT_LOAD;
215	phdr->p_offset = phc->offset;
216	phdr->p_vaddr = entry->start;
217	phdr->p_paddr = 0;
218	phdr->p_filesz = phdr->p_memsz = entry->end - entry->start;
219	phdr->p_align = PAGE_SIZE;
220	phdr->p_flags = 0;
221	if (entry->protection & VM_PROT_READ)
222		phdr->p_flags |= PF_R;
223	if (entry->protection & VM_PROT_WRITE)
224		phdr->p_flags |= PF_W;
225	if (entry->protection & VM_PROT_EXECUTE)
226		phdr->p_flags |= PF_X;
227
228	phc->offset += phdr->p_filesz;
229	phc->phdr++;
230}
231
232/*
233 * A callback for each_writable_segment() to gather information about
234 * the number of segments and their total size.
235 */
236static void
237cb_size_segment(vm_map_entry_t entry, void *closure)
238{
239	struct sseg_closure *ssc = (struct sseg_closure *)closure;
240
241	ssc->count++;
242	ssc->size += entry->end - entry->start;
243}
244
245/*
246 * For each segment in the given memory map, call the given function
247 * with a pointer to the map entry and some arbitrary caller-supplied
248 * data.
249 */
250static void
251each_writable_segment(vm_map_entry_t map, segment_callback func, void *closure)
252{
253	vm_map_entry_t entry;
254
255	for (entry = map;  entry != NULL;  entry = entry->next)
256		(*func)(entry, closure);
257}
258
259static void
260elf_getstatus(pid_t pid, prpsinfo_t *psinfo)
261{
262	struct kinfo_proc kobj;
263	int name[4];
264	size_t len;
265
266	name[0] = CTL_KERN;
267	name[1] = KERN_PROC;
268	name[2] = KERN_PROC_PID;
269	name[3] = pid;
270
271	len = sizeof(kobj);
272	if (sysctl(name, 4, &kobj, &len, NULL, 0) == -1)
273		err(1, "error accessing kern.proc.pid.%u sysctl", pid);
274	if (kobj.ki_pid != pid)
275		err(1, "error accessing kern.proc.pid.%u sysctl datas", pid);
276	strncpy(psinfo->pr_fname, kobj.ki_comm, MAXCOMLEN);
277	strncpy(psinfo->pr_psargs, psinfo->pr_fname, PRARGSZ);
278}
279
280/*
281 * Generate the ELF coredump header into the buffer at "dst".  "dst" may
282 * be NULL, in which case the header is sized but not actually generated.
283 */
284static void
285elf_puthdr(pid_t pid, vm_map_entry_t map, void *dst, size_t *off, int numsegs)
286{
287	struct {
288		prstatus_t status;
289		prfpregset_t fpregset;
290		prpsinfo_t psinfo;
291	} *tempdata;
292	size_t ehoff;
293	size_t phoff;
294	size_t noteoff;
295	size_t notesz;
296	size_t threads;
297	lwpid_t *tids;
298	int i;
299
300	prstatus_t *status;
301	prfpregset_t *fpregset;
302	prpsinfo_t *psinfo;
303
304	ehoff = *off;
305	*off += sizeof(Elf_Ehdr);
306
307	phoff = *off;
308	*off += (numsegs + 1) * sizeof(Elf_Phdr);
309
310	noteoff = *off;
311
312	if (dst != NULL) {
313		if ((tempdata = calloc(1, sizeof(*tempdata))) == NULL)
314			errx(1, "out of memory");
315		status = &tempdata->status;
316		fpregset = &tempdata->fpregset;
317		psinfo = &tempdata->psinfo;
318	} else {
319		tempdata = NULL;
320		status = NULL;
321		fpregset = NULL;
322		psinfo = NULL;
323	}
324
325	errno = 0;
326	threads = ptrace(PT_GETNUMLWPS, pid, NULL, 0);
327	if (errno)
328		err(1, "PT_GETNUMLWPS");
329
330	if (dst != NULL) {
331		psinfo->pr_version = PRPSINFO_VERSION;
332		psinfo->pr_psinfosz = sizeof(prpsinfo_t);
333		elf_getstatus(pid, psinfo);
334
335	}
336	elf_putnote(dst, off, "FreeBSD", NT_PRPSINFO, psinfo,
337	    sizeof *psinfo);
338
339	if (dst != NULL) {
340		tids = malloc(threads * sizeof(*tids));
341		if (tids == NULL)
342			errx(1, "out of memory");
343		errno = 0;
344		ptrace(PT_GETLWPLIST, pid, (void *)tids, threads);
345		if (errno)
346			err(1, "PT_GETLWPLIST");
347	}
348	for (i = 0; i < threads; ++i) {
349		if (dst != NULL) {
350			status->pr_version = PRSTATUS_VERSION;
351			status->pr_statussz = sizeof(prstatus_t);
352			status->pr_gregsetsz = sizeof(gregset_t);
353			status->pr_fpregsetsz = sizeof(fpregset_t);
354			status->pr_osreldate = __FreeBSD_version;
355			status->pr_pid = tids[i];
356
357			ptrace(PT_GETREGS, tids[i], (void *)&status->pr_reg, 0);
358			ptrace(PT_GETFPREGS, tids[i], (void *)fpregset, 0);
359		}
360		elf_putnote(dst, off, "FreeBSD", NT_PRSTATUS, status,
361		    sizeof *status);
362		elf_putnote(dst, off, "FreeBSD", NT_FPREGSET, fpregset,
363		    sizeof *fpregset);
364	}
365
366	notesz = *off - noteoff;
367
368	if (dst != NULL) {
369		free(tids);
370		free(tempdata);
371	}
372
373	/* Align up to a page boundary for the program segments. */
374	*off = round_page(*off);
375
376	if (dst != NULL) {
377		Elf_Ehdr *ehdr;
378		Elf_Phdr *phdr;
379		struct phdr_closure phc;
380
381		/*
382		 * Fill in the ELF header.
383		 */
384		ehdr = (Elf_Ehdr *)((char *)dst + ehoff);
385		ehdr->e_ident[EI_MAG0] = ELFMAG0;
386		ehdr->e_ident[EI_MAG1] = ELFMAG1;
387		ehdr->e_ident[EI_MAG2] = ELFMAG2;
388		ehdr->e_ident[EI_MAG3] = ELFMAG3;
389		ehdr->e_ident[EI_CLASS] = ELF_CLASS;
390		ehdr->e_ident[EI_DATA] = ELF_DATA;
391		ehdr->e_ident[EI_VERSION] = EV_CURRENT;
392		ehdr->e_ident[EI_OSABI] = ELFOSABI_FREEBSD;
393		ehdr->e_ident[EI_ABIVERSION] = 0;
394		ehdr->e_ident[EI_PAD] = 0;
395		ehdr->e_type = ET_CORE;
396		ehdr->e_machine = ELF_ARCH;
397		ehdr->e_version = EV_CURRENT;
398		ehdr->e_entry = 0;
399		ehdr->e_phoff = phoff;
400		ehdr->e_flags = 0;
401		ehdr->e_ehsize = sizeof(Elf_Ehdr);
402		ehdr->e_phentsize = sizeof(Elf_Phdr);
403		ehdr->e_phnum = numsegs + 1;
404		ehdr->e_shentsize = sizeof(Elf_Shdr);
405		ehdr->e_shnum = 0;
406		ehdr->e_shstrndx = SHN_UNDEF;
407
408		/*
409		 * Fill in the program header entries.
410		 */
411		phdr = (Elf_Phdr *)((char *)dst + phoff);
412
413		/* The note segment. */
414		phdr->p_type = PT_NOTE;
415		phdr->p_offset = noteoff;
416		phdr->p_vaddr = 0;
417		phdr->p_paddr = 0;
418		phdr->p_filesz = notesz;
419		phdr->p_memsz = 0;
420		phdr->p_flags = 0;
421		phdr->p_align = 0;
422		phdr++;
423
424		/* All the writable segments from the program. */
425		phc.phdr = phdr;
426		phc.offset = *off;
427		each_writable_segment(map, cb_put_phdr, &phc);
428	}
429}
430
431/*
432 * Emit one note section to "dst", or just size it if "dst" is NULL.
433 */
434static void
435elf_putnote(void *dst, size_t *off, const char *name, int type,
436    const void *desc, size_t descsz)
437{
438	Elf_Note note;
439
440	note.n_namesz = strlen(name) + 1;
441	note.n_descsz = descsz;
442	note.n_type = type;
443	if (dst != NULL)
444		bcopy(&note, (char *)dst + *off, sizeof note);
445	*off += sizeof note;
446	if (dst != NULL)
447		bcopy(name, (char *)dst + *off, note.n_namesz);
448	*off += roundup2(note.n_namesz, sizeof(Elf_Size));
449	if (dst != NULL)
450		bcopy(desc, (char *)dst + *off, note.n_descsz);
451	*off += roundup2(note.n_descsz, sizeof(Elf_Size));
452}
453
454/*
455 * Free the memory map.
456 */
457static void
458freemap(vm_map_entry_t map)
459{
460
461	while (map != NULL) {
462		vm_map_entry_t next = map->next;
463		free(map);
464		map = next;
465	}
466}
467
468/*
469 * Read the process's memory map using kinfo_getvmmap(), and return a list of
470 * VM map entries.  Only the non-device read/writable segments are
471 * returned.  The map entries in the list aren't fully filled in; only
472 * the items we need are present.
473 */
474static vm_map_entry_t
475readmap(pid_t pid)
476{
477	vm_map_entry_t ent, *linkp, map;
478	struct kinfo_vmentry *vmentl, *kve;
479	int i, nitems;
480
481	vmentl = kinfo_getvmmap(pid, &nitems);
482	if (vmentl == NULL)
483		err(1, "cannot retrieve mappings for %u process", pid);
484
485	map = NULL;
486	linkp = &map;
487	for (i = 0; i < nitems; i++) {
488		kve = &vmentl[i];
489
490		/*
491		 * Ignore 'malformed' segments or ones representing memory
492		 * mapping with MAP_NOCORE on.
493		 * If the 'full' support is disabled, just dump the most
494		 * meaningful data segments.
495		 */
496		if ((kve->kve_protection & KVME_PROT_READ) == 0 ||
497		    (kve->kve_flags & KVME_FLAG_NOCOREDUMP) != 0 ||
498		    kve->kve_type == KVME_TYPE_DEAD ||
499		    kve->kve_type == KVME_TYPE_UNKNOWN ||
500		    ((pflags & PFLAGS_FULL) == 0 &&
501		    kve->kve_type != KVME_TYPE_DEFAULT &&
502		    kve->kve_type != KVME_TYPE_VNODE &&
503		    kve->kve_type != KVME_TYPE_SWAP))
504			continue;
505
506		ent = calloc(1, sizeof(*ent));
507		if (ent == NULL)
508			errx(1, "out of memory");
509		ent->start = (vm_offset_t)kve->kve_start;
510		ent->end = (vm_offset_t)kve->kve_end;
511		ent->protection = VM_PROT_READ | VM_PROT_WRITE;
512		if ((kve->kve_protection & KVME_PROT_EXEC) != 0)
513			ent->protection |= VM_PROT_EXECUTE;
514
515		*linkp = ent;
516		linkp = &ent->next;
517	}
518	free(vmentl);
519	return (map);
520}
521
522struct dumpers elfdump = { elf_ident, elf_coredump };
523TEXT_SET(dumpset, elfdump);
524