1/*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1991 NeXT Computer, Inc.  All rights reserved.
29 *
30 *	File:	bsd/kern/kern_core.c
31 *
32 *	This file contains machine independent code for performing core dumps.
33 *
34 */
35
36#include <mach/vm_param.h>
37#include <mach/thread_status.h>
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/signalvar.h>
42#include <sys/resourcevar.h>
43#include <sys/namei.h>
44#include <sys/vnode_internal.h>
45#include <sys/proc_internal.h>
46#include <sys/kauth.h>
47#include <sys/timeb.h>
48#include <sys/times.h>
49#include <sys/acct.h>
50#include <sys/file_internal.h>
51#include <sys/uio.h>
52#include <sys/kernel.h>
53#include <sys/stat.h>
54
55#include <mach-o/loader.h>
56#include <mach/vm_region.h>
57#include <mach/vm_statistics.h>
58
59#include <vm/vm_kern.h>
60#include <vm/vm_protos.h> /* last */
61#include <vm/vm_map.h>		/* current_map() */
62#include <mach/mach_vm.h>	/* mach_vm_region_recurse() */
63#include <mach/task.h>		/* task_suspend() */
64#include <kern/task.h>		/* get_task_numacts() */
65
66#include <security/audit/audit.h>
67
68typedef struct {
69	int	flavor;			/* the number for this flavor */
70	mach_msg_type_number_t	count;	/* count of ints in this flavor */
71} mythread_state_flavor_t;
72
73#if defined (__i386__) || defined (__x86_64__)
74mythread_state_flavor_t thread_flavor_array [] = {
75		{x86_THREAD_STATE, x86_THREAD_STATE_COUNT},
76		{x86_FLOAT_STATE, x86_FLOAT_STATE_COUNT},
77		{x86_EXCEPTION_STATE, x86_EXCEPTION_STATE_COUNT},
78		};
79int mynum_flavors=3;
80#else
81#error architecture not supported
82#endif
83
84
85typedef struct {
86	vm_offset_t header;
87	int  hoffset;
88	mythread_state_flavor_t *flavors;
89	int tstate_size;
90	int flavor_count;
91} tir_t;
92
93/* XXX should be static */
94void collectth_state(thread_t th_act, void *tirp);
95
96extern int freespace_mb(vnode_t vp);
97
98/* XXX not in a Mach header anywhere */
99kern_return_t thread_getstatus(register thread_t act, int flavor,
100	thread_state_t tstate, mach_msg_type_number_t *count);
101void task_act_iterate_wth_args(task_t, void(*)(thread_t, void *), void *);
102extern kern_return_t task_suspend_internal(task_t);
103
104static cpu_type_t process_cpu_type(proc_t proc);
105static cpu_type_t process_cpu_subtype(proc_t proc);
106
107#ifdef SECURE_KERNEL
108__XNU_PRIVATE_EXTERN int do_coredump = 0;	/* default: don't dump cores */
109#else
110__XNU_PRIVATE_EXTERN int do_coredump = 1;	/* default: dump cores */
111#endif
112__XNU_PRIVATE_EXTERN int sugid_coredump = 0; /* default: but not SGUID binaries */
113
114
115/* cpu_type returns only the most generic indication of the current CPU. */
116/* in a core we want to know the kind of process. */
117
118static cpu_type_t
119process_cpu_type(proc_t core_proc)
120{
121	cpu_type_t what_we_think;
122#if defined (__i386__) || defined (__x86_64__)
123    if (IS_64BIT_PROCESS(core_proc)) {
124		what_we_think = CPU_TYPE_X86_64;
125	} else {
126		what_we_think = CPU_TYPE_I386;
127	}
128#endif
129	return what_we_think;
130}
131
132static cpu_type_t
133process_cpu_subtype(proc_t core_proc)
134{
135	cpu_type_t what_we_think;
136#if defined (__i386__) || defined (__x86_64__)
137    if (IS_64BIT_PROCESS(core_proc)) {
138		what_we_think = CPU_SUBTYPE_X86_64_ALL;
139	} else {
140		what_we_think = CPU_SUBTYPE_I386_ALL;
141	}
142#endif
143	return what_we_think;
144}
145
146void
147collectth_state(thread_t th_act, void *tirp)
148{
149	vm_offset_t	header;
150	int  hoffset, i ;
151	mythread_state_flavor_t *flavors;
152	struct thread_command	*tc;
153	tir_t *t = (tir_t *)tirp;
154
155		/*
156		 *	Fill in thread command structure.
157		 */
158		header = t->header;
159		hoffset = t->hoffset;
160		flavors = t->flavors;
161
162		tc = (struct thread_command *) (header + hoffset);
163		tc->cmd = LC_THREAD;
164		tc->cmdsize = sizeof(struct thread_command)
165				+ t->tstate_size;
166		hoffset += sizeof(struct thread_command);
167		/*
168		 * Follow with a struct thread_state_flavor and
169		 * the appropriate thread state struct for each
170		 * thread state flavor.
171		 */
172		for (i = 0; i < t->flavor_count; i++) {
173			*(mythread_state_flavor_t *)(header+hoffset) =
174			  flavors[i];
175			hoffset += sizeof(mythread_state_flavor_t);
176			thread_getstatus(th_act, flavors[i].flavor,
177					(thread_state_t)(header+hoffset),
178					&flavors[i].count);
179			hoffset += flavors[i].count*sizeof(int);
180		}
181
182		t->hoffset = hoffset;
183}
184
185
186/*
187 * coredump
188 *
189 * Description:	Create a core image on the file "core" for the process
190 *		indicated
191 *
192 * Parameters:	core_proc			Process to dump core [*]
193 *				reserve_mb			If non-zero, leave filesystem with
194 *									at least this much free space.
195 *				ignore_ulimit		If set, ignore the process's core file ulimit.
196 *
197 * Returns:	0				Success
198 *		EFAULT				Failed
199 *
200 * IMPORTANT:	This function can only be called on the current process, due
201 *		to assumptions below; see variable declaration section for
202 *		details.
203 */
204#define	MAX_TSTATE_FLAVORS	10
205int
206coredump(proc_t core_proc, uint32_t reserve_mb, int ignore_ulimit)
207{
208/* Begin assumptions that limit us to only the current process */
209	vfs_context_t ctx = vfs_context_current();
210	vm_map_t	map = current_map();
211	task_t		task = current_task();
212/* End assumptions */
213	kauth_cred_t cred = vfs_context_ucred(ctx);
214	int error = 0;
215	struct vnode_attr va;
216	int		thread_count, segment_count;
217	int		command_size, header_size, tstate_size;
218	int		hoffset;
219	off_t		foffset;
220	mach_vm_offset_t vmoffset;
221	vm_offset_t	header;
222	mach_vm_size_t	vmsize;
223	vm_prot_t	prot;
224	vm_prot_t	maxprot;
225	vm_inherit_t	inherit;
226	int		error1 = 0;
227	char		stack_name[MAXCOMLEN+6];
228	char		*alloced_name = NULL;
229	char		*name;
230	mythread_state_flavor_t flavors[MAX_TSTATE_FLAVORS];
231	vm_size_t	mapsize;
232	int		i;
233	uint32_t nesting_depth = 0;
234	kern_return_t	kret;
235	struct vm_region_submap_info_64 vbr;
236	mach_msg_type_number_t vbrcount = 0;
237	tir_t tir1;
238	struct vnode * vp;
239	struct mach_header	*mh = NULL;	/* protected by is_64 */
240	struct mach_header_64	*mh64 = NULL;	/* protected by is_64 */
241	int		is_64 = 0;
242	size_t		mach_header_sz = sizeof(struct mach_header);
243	size_t		segment_command_sz = sizeof(struct segment_command);
244
245	if (current_proc() != core_proc) {
246		panic("coredump() called against proc that is not current_proc: %p", core_proc);
247	}
248
249	if (do_coredump == 0 ||		/* Not dumping at all */
250	    ( (sugid_coredump == 0) &&	/* Not dumping SUID/SGID binaries */
251	      ( (kauth_cred_getsvuid(cred) != kauth_cred_getruid(cred)) ||
252	        (kauth_cred_getsvgid(cred) != kauth_cred_getrgid(cred))))) {
253
254#if CONFIG_AUDIT
255		audit_proc_coredump(core_proc, NULL, EFAULT);
256#endif
257		return (EFAULT);
258	}
259
260	if (IS_64BIT_PROCESS(core_proc)) {
261		is_64 = 1;
262		mach_header_sz = sizeof(struct mach_header_64);
263		segment_command_sz = sizeof(struct segment_command_64);
264	}
265
266	mapsize = get_vmmap_size(map);
267
268	if ((mapsize >=  core_proc->p_rlimit[RLIMIT_CORE].rlim_cur) && (ignore_ulimit == 0))
269		return (EFAULT);
270	(void) task_suspend_internal(task);
271
272	MALLOC(alloced_name, char *, MAXPATHLEN, M_TEMP, M_NOWAIT | M_ZERO);
273
274	/* create name according to sysctl'able format string */
275	/* if name creation fails, fall back to historical behaviour... */
276	if (alloced_name == NULL ||
277	    proc_core_name(core_proc->p_comm, kauth_cred_getuid(cred),
278			   core_proc->p_pid, alloced_name, MAXPATHLEN)) {
279		snprintf(stack_name, sizeof(stack_name),
280			 "/cores/core.%d", core_proc->p_pid);
281		name = stack_name;
282	} else
283		name = alloced_name;
284
285	if ((error = vnode_open(name, (O_CREAT | FWRITE | O_NOFOLLOW), S_IRUSR, VNODE_LOOKUP_NOFOLLOW, &vp, ctx)))
286		goto out2;
287
288	VATTR_INIT(&va);
289	VATTR_WANTED(&va, va_nlink);
290	/* Don't dump to non-regular files or files with links. */
291	if (vp->v_type != VREG ||
292	    vnode_getattr(vp, &va, ctx) || va.va_nlink != 1) {
293		error = EFAULT;
294		goto out;
295	}
296
297	VATTR_INIT(&va);	/* better to do it here than waste more stack in vnode_setsize */
298	VATTR_SET(&va, va_data_size, 0);
299	vnode_setattr(vp, &va, ctx);
300	core_proc->p_acflag |= ACORE;
301
302	if ((reserve_mb > 0) &&
303	    ((freespace_mb(vp) - (mapsize >> 20)) < reserve_mb)) {
304		error = ENOSPC;
305		goto out;
306	}
307
308	/*
309	 *	If the task is modified while dumping the file
310	 *	(e.g., changes in threads or VM, the resulting
311	 *	file will not necessarily be correct.
312	 */
313
314	thread_count = get_task_numacts(task);
315	segment_count = get_vmmap_entries(map);	/* XXX */
316	tir1.flavor_count = sizeof(thread_flavor_array)/sizeof(mythread_state_flavor_t);
317	bcopy(thread_flavor_array, flavors,sizeof(thread_flavor_array));
318	tstate_size = 0;
319	for (i = 0; i < tir1.flavor_count; i++)
320		tstate_size += sizeof(mythread_state_flavor_t) +
321		  (flavors[i].count * sizeof(int));
322	command_size = segment_count * segment_command_sz +
323	  thread_count*sizeof(struct thread_command) +
324	  tstate_size*thread_count;
325
326	header_size = command_size + mach_header_sz;
327
328	if (kmem_alloc(kernel_map, &header, (vm_size_t)header_size) != KERN_SUCCESS) {
329		error = ENOMEM;
330		goto out;
331	}
332
333	/*
334	 *	Set up Mach-O header.
335	 */
336	if (is_64) {
337		mh64 = (struct mach_header_64 *)header;
338		mh64->magic = MH_MAGIC_64;
339		mh64->cputype = process_cpu_type(core_proc);
340		mh64->cpusubtype = process_cpu_subtype(core_proc);
341		mh64->filetype = MH_CORE;
342		mh64->ncmds = segment_count + thread_count;
343		mh64->sizeofcmds = command_size;
344		mh64->reserved = 0;		/* 8 byte alignment */
345	} else {
346		mh = (struct mach_header *)header;
347		mh->magic = MH_MAGIC;
348		mh->cputype = process_cpu_type(core_proc);
349		mh->cpusubtype = process_cpu_subtype(core_proc);
350		mh->filetype = MH_CORE;
351		mh->ncmds = segment_count + thread_count;
352		mh->sizeofcmds = command_size;
353	}
354
355	hoffset = mach_header_sz;	/* offset into header */
356	foffset = round_page(header_size);	/* offset into file */
357	vmoffset = MACH_VM_MIN_ADDRESS;		/* offset into VM */
358
359	/*
360	 * We use to check for an error, here, now we try and get
361	 * as much as we can
362	 */
363	while (segment_count > 0) {
364		struct segment_command		*sc;
365		struct segment_command_64	*sc64;
366
367		/*
368		 *	Get region information for next region.
369		 */
370
371		while (1) {
372			vbrcount = VM_REGION_SUBMAP_INFO_COUNT_64;
373			if((kret = mach_vm_region_recurse(map,
374					&vmoffset, &vmsize, &nesting_depth,
375					(vm_region_recurse_info_t)&vbr,
376					&vbrcount)) != KERN_SUCCESS) {
377				break;
378			}
379			/*
380			 * If we get a valid mapping back, but we're dumping
381			 * a 32 bit process,  and it's over the allowable
382			 * address space of a 32 bit process, it's the same
383			 * as if mach_vm_region_recurse() failed.
384			 */
385			if (!(is_64) &&
386			    (vmoffset + vmsize > VM_MAX_ADDRESS)) {
387			    	kret = KERN_INVALID_ADDRESS;
388				break;
389			}
390			if(vbr.is_submap) {
391				nesting_depth++;
392				continue;
393			} else {
394				break;
395			}
396		}
397		if(kret != KERN_SUCCESS)
398			break;
399
400		prot = vbr.protection;
401		maxprot = vbr.max_protection;
402		inherit = vbr.inheritance;
403		/*
404		 *	Fill in segment command structure.
405		 */
406		if (is_64) {
407			sc64 = (struct segment_command_64 *)(header + hoffset);
408			sc64->cmd = LC_SEGMENT_64;
409			sc64->cmdsize = sizeof(struct segment_command_64);
410			/* segment name is zeroed by kmem_alloc */
411			sc64->segname[0] = 0;
412			sc64->vmaddr = vmoffset;
413			sc64->vmsize = vmsize;
414			sc64->fileoff = foffset;
415			sc64->filesize = vmsize;
416			sc64->maxprot = maxprot;
417			sc64->initprot = prot;
418			sc64->nsects = 0;
419		} else  {
420			sc = (struct segment_command *) (header + hoffset);
421			sc->cmd = LC_SEGMENT;
422			sc->cmdsize = sizeof(struct segment_command);
423			/* segment name is zeroed by kmem_alloc */
424			sc->segname[0] = 0;
425			sc->vmaddr = CAST_DOWN_EXPLICIT(vm_offset_t,vmoffset);
426			sc->vmsize = CAST_DOWN_EXPLICIT(vm_size_t,vmsize);
427			sc->fileoff = CAST_DOWN_EXPLICIT(uint32_t,foffset); /* will never truncate */
428			sc->filesize = CAST_DOWN_EXPLICIT(uint32_t,vmsize); /* will never truncate */
429			sc->maxprot = maxprot;
430			sc->initprot = prot;
431			sc->nsects = 0;
432		}
433
434		/*
435		 *	Write segment out.  Try as hard as possible to
436		 *	get read access to the data.
437		 */
438		if ((prot & VM_PROT_READ) == 0) {
439			mach_vm_protect(map, vmoffset, vmsize, FALSE,
440					   prot|VM_PROT_READ);
441		}
442		/*
443		 *	Only actually perform write if we can read.
444		 *	Note: if we can't read, then we end up with
445		 *	a hole in the file.
446		 */
447		if ((maxprot & VM_PROT_READ) == VM_PROT_READ
448			&& vbr.user_tag != VM_MEMORY_IOKIT
449			&& coredumpok(map,vmoffset)) {
450
451			error = vn_rdwr_64(UIO_WRITE, vp, vmoffset, vmsize, foffset,
452					(IS_64BIT_PROCESS(core_proc) ? UIO_USERSPACE64 : UIO_USERSPACE32),
453					IO_NOCACHE|IO_NODELOCKED|IO_UNIT, cred, (int64_t *) 0, core_proc);
454
455		}
456
457		hoffset += segment_command_sz;
458		foffset += vmsize;
459		vmoffset += vmsize;
460		segment_count--;
461	}
462
463	/*
464	 * If there are remaining segments which have not been written
465	 * out because break in the loop above, then they were not counted
466	 * because they exceed the real address space of the executable
467	 * type: remove them from the header's count.  This is OK, since
468	 * we are allowed to have a sparse area following the segments.
469	 */
470	if (is_64) {
471		mh64->ncmds -= segment_count;
472		mh64->sizeofcmds -= segment_count * segment_command_sz;
473	} else {
474		mh->ncmds -= segment_count;
475		mh->sizeofcmds -= segment_count * segment_command_sz;
476	}
477
478	tir1.header = header;
479	tir1.hoffset = hoffset;
480	tir1.flavors = flavors;
481	tir1.tstate_size = tstate_size;
482	task_act_iterate_wth_args(task, collectth_state,&tir1);
483
484	/*
485	 *	Write out the Mach header at the beginning of the
486	 *	file.  OK to use a 32 bit write for this.
487	 */
488	error = vn_rdwr(UIO_WRITE, vp, (caddr_t)header, header_size, (off_t)0,
489			UIO_SYSSPACE, IO_NOCACHE|IO_NODELOCKED|IO_UNIT, cred, (int *) 0, core_proc);
490	kmem_free(kernel_map, header, header_size);
491out:
492	error1 = vnode_close(vp, FWRITE, ctx);
493out2:
494#if CONFIG_AUDIT
495	audit_proc_coredump(core_proc, name, error);
496#endif
497	if (alloced_name != NULL)
498		FREE(alloced_name, M_TEMP);
499	if (error == 0)
500		error = error1;
501
502	return (error);
503}
504