1/*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @Apple_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License").  You may not use this file except in compliance with the
9 * License.  Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
21 */
22
23
24#include <machine/spl.h>
25
26#include <sys/errno.h>
27#include <sys/param.h>
28#include <sys/systm.h>
29#include <sys/proc_internal.h>
30#include <sys/vm.h>
31#include <sys/sysctl.h>
32#include <sys/kdebug.h>
33#include <sys/sysproto.h>
34#include <sys/bsdtask_info.h>
35#include <sys/random.h>
36
37#define HZ      100
38#include <mach/clock_types.h>
39#include <mach/mach_types.h>
40#include <mach/mach_time.h>
41#include <machine/machine_routines.h>
42
43#if defined(__i386__) || defined(__x86_64__)
44#include <i386/rtclock_protos.h>
45#include <i386/mp.h>
46#include <i386/machine_routines.h>
47#endif
48
49#include <kern/clock.h>
50
51#include <kern/thread.h>
52#include <kern/task.h>
53#include <kern/debug.h>
54#include <kern/kalloc.h>
55#include <kern/cpu_data.h>
56#include <kern/assert.h>
57#include <kern/telemetry.h>
58#include <vm/vm_kern.h>
59#include <sys/lock.h>
60
61#include <sys/malloc.h>
62#include <sys/mcache.h>
63#include <sys/kauth.h>
64
65#include <sys/vnode.h>
66#include <sys/vnode_internal.h>
67#include <sys/fcntl.h>
68#include <sys/file_internal.h>
69#include <sys/ubc.h>
70#include <sys/param.h>			/* for isset() */
71
72#include <mach/mach_host.h>		/* for host_info() */
73#include <libkern/OSAtomic.h>
74
75#include <machine/pal_routines.h>
76
77/*
78 * IOP(s)
79 *
80 * https://coreoswiki.apple.com/wiki/pages/U6z3i0q9/Consistent_Logging_Implementers_Guide.html
81 *
82 * IOP(s) are auxiliary cores that want to participate in kdebug event logging.
83 * They are registered dynamically. Each is assigned a cpu_id at registration.
84 *
85 * NOTE: IOP trace events may not use the same clock hardware as "normal"
86 * cpus. There is an effort made to synchronize the IOP timebase with the
87 * AP, but it should be understood that there may be discrepancies.
88 *
89 * Once registered, an IOP is permanent, it cannot be unloaded/unregistered.
90 * The current implementation depends on this for thread safety.
91 *
92 * New registrations occur by allocating an kd_iop struct and assigning
93 * a provisional cpu_id of list_head->cpu_id + 1. Then a CAS to claim the
94 * list_head pointer resolves any races.
95 *
96 * You may safely walk the kd_iops list at any time, without holding locks.
97 *
98 * When allocating buffers, the current kd_iops head is captured. Any operations
99 * that depend on the buffer state (such as flushing IOP traces on reads,
100 * etc.) should use the captured list head. This will allow registrations to
101 * take place while trace is in use.
102 */
103
104typedef struct kd_iop {
105	kd_callback_t	callback;
106	uint32_t	cpu_id;
107	uint64_t	last_timestamp; /* Prevent timer rollback */
108	struct kd_iop*	next;
109} kd_iop_t;
110
111static kd_iop_t* kd_iops = NULL;
112
113/* XXX should have prototypes, but Mach does not provide one */
114void task_act_iterate_wth_args(task_t, void(*)(thread_t, void *), void *);
115int cpu_number(void);	/* XXX <machine/...> include path broken */
116
117/* XXX should probably be static, but it's debugging code... */
118int kdbg_read(user_addr_t, size_t *, vnode_t, vfs_context_t);
119void kdbg_control_chud(int, void *);
120int kdbg_control(int *, u_int, user_addr_t, size_t *);
121int kdbg_readcpumap(user_addr_t, size_t *);
122int kdbg_readcurcpumap(user_addr_t, size_t *);
123int kdbg_readthrmap(user_addr_t, size_t *, vnode_t, vfs_context_t);
124int kdbg_readcurthrmap(user_addr_t, size_t *);
125int kdbg_getreg(kd_regtype *);
126int kdbg_setreg(kd_regtype *);
127int kdbg_setrtcdec(kd_regtype *);
128int kdbg_setpidex(kd_regtype *);
129int kdbg_setpid(kd_regtype *);
130void kdbg_thrmap_init(void);
131int kdbg_reinit(boolean_t);
132int kdbg_bootstrap(boolean_t);
133
134int kdbg_cpumap_init_internal(kd_iop_t* iops, uint32_t cpu_count, uint8_t** cpumap, uint32_t* cpumap_size);
135kd_threadmap* kdbg_thrmap_init_internal(unsigned int count, unsigned int *mapsize, unsigned int *mapcount);
136
137static int kdbg_enable_typefilter(void);
138static int kdbg_disable_typefilter(void);
139
140static int create_buffers(boolean_t);
141static void delete_buffers(void);
142
143extern void IOSleep(int);
144
145/* trace enable status */
146unsigned int kdebug_enable = 0;
147
148/* A static buffer to record events prior to the start of regular logging */
149#define	KD_EARLY_BUFFER_MAX	 64
150static kd_buf		kd_early_buffer[KD_EARLY_BUFFER_MAX];
151static int		kd_early_index = 0;
152static boolean_t	kd_early_overflow = FALSE;
153
154#define SLOW_NOLOG	0x01
155#define SLOW_CHECKS	0x02
156#define SLOW_ENTROPY	0x04			/* Obsolescent */
157#define SLOW_CHUD	0x08
158
159#define EVENTS_PER_STORAGE_UNIT		2048
160#define MIN_STORAGE_UNITS_PER_CPU	4
161
162#define POINTER_FROM_KDS_PTR(x) (&kd_bufs[x.buffer_index].kdsb_addr[x.offset])
163
164union kds_ptr {
165	struct {
166		uint32_t buffer_index:21;
167		uint16_t offset:11;
168	};
169	uint32_t raw;
170};
171
172struct kd_storage {
173	union	kds_ptr kds_next;
174	uint32_t kds_bufindx;
175	uint32_t kds_bufcnt;
176	uint32_t kds_readlast;
177	boolean_t kds_lostevents;
178	uint64_t  kds_timestamp;
179
180	kd_buf	kds_records[EVENTS_PER_STORAGE_UNIT];
181};
182
183#define MAX_BUFFER_SIZE			(1024 * 1024 * 128)
184#define N_STORAGE_UNITS_PER_BUFFER	(MAX_BUFFER_SIZE / sizeof(struct kd_storage))
185
186struct kd_storage_buffers {
187	struct	kd_storage	*kdsb_addr;
188	uint32_t		kdsb_size;
189};
190
191#define KDS_PTR_NULL 0xffffffff
192struct kd_storage_buffers *kd_bufs = NULL;
193int	n_storage_units = 0;
194int	n_storage_buffers = 0;
195int	n_storage_threshold = 0;
196int	kds_waiter = 0;
197
198#pragma pack(0)
199struct kd_bufinfo {
200	union  kds_ptr kd_list_head;
201	union  kds_ptr kd_list_tail;
202	boolean_t kd_lostevents;
203	uint32_t _pad;
204	uint64_t kd_prev_timebase;
205	uint32_t num_bufs;
206} __attribute__(( aligned(MAX_CPU_CACHE_LINE_SIZE) ));
207
208struct kd_ctrl_page_t {
209	union kds_ptr kds_free_list;
210	uint32_t enabled	:1;
211	uint32_t _pad0		:31;
212	int			kds_inuse_count;
213	uint32_t kdebug_flags;
214	uint32_t kdebug_slowcheck;
215	/*
216	 * The number of kd_bufinfo structs allocated may not match the current
217	 * number of active cpus. We capture the iops list head at initialization
218	 * which we could use to calculate the number of cpus we allocated data for,
219	 * unless it happens to be null. To avoid that case, we explicitly also
220	 * capture a cpu count.
221	 */
222	kd_iop_t* kdebug_iops;
223	uint32_t kdebug_cpus;
224} kd_ctrl_page = { .kds_free_list = {.raw = KDS_PTR_NULL}, .kdebug_slowcheck = SLOW_NOLOG };
225
226#pragma pack()
227
228struct kd_bufinfo *kdbip = NULL;
229
230#define KDCOPYBUF_COUNT	8192
231#define KDCOPYBUF_SIZE	(KDCOPYBUF_COUNT * sizeof(kd_buf))
232kd_buf *kdcopybuf = NULL;
233
234boolean_t kdlog_bg_trace = FALSE;
235boolean_t kdlog_bg_trace_running = FALSE;
236unsigned int bg_nkdbufs = 0;
237
238unsigned int nkdbufs = 0;
239unsigned int kdlog_beg=0;
240unsigned int kdlog_end=0;
241unsigned int kdlog_value1=0;
242unsigned int kdlog_value2=0;
243unsigned int kdlog_value3=0;
244unsigned int kdlog_value4=0;
245
246static lck_spin_t * kdw_spin_lock;
247static lck_spin_t * kds_spin_lock;
248static lck_mtx_t  * kd_trace_mtx_sysctl;
249static lck_grp_t  * kd_trace_mtx_sysctl_grp;
250static lck_attr_t * kd_trace_mtx_sysctl_attr;
251static lck_grp_attr_t   *kd_trace_mtx_sysctl_grp_attr;
252
253static lck_grp_t       *stackshot_subsys_lck_grp;
254static lck_grp_attr_t  *stackshot_subsys_lck_grp_attr;
255static lck_attr_t      *stackshot_subsys_lck_attr;
256static lck_mtx_t        stackshot_subsys_mutex;
257
258void *stackshot_snapbuf = NULL;
259
260int
261stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset, int32_t *retval);
262
263int
264stack_snapshot_from_kernel(pid_t pid, void *buf, uint32_t size, uint32_t flags, unsigned *bytesTraced);
265extern void
266kdp_snapshot_preflight(int pid, void  *tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset);
267
268extern int
269kdp_stack_snapshot_geterror(void);
270extern unsigned int
271kdp_stack_snapshot_bytes_traced(void);
272
273kd_threadmap *kd_mapptr = 0;
274unsigned int kd_mapsize = 0;
275unsigned int kd_mapcount = 0;
276
277off_t	RAW_file_offset = 0;
278int	RAW_file_written = 0;
279
280#define	RAW_FLUSH_SIZE	(2 * 1024 * 1024)
281
282pid_t global_state_pid = -1;       /* Used to control exclusive use of kd_buffer */
283
284#define DBG_FUNC_MASK	0xfffffffc
285
286/*  TODO: move to kdebug.h */
287#define CLASS_MASK      0xff000000
288#define CLASS_OFFSET    24
289#define SUBCLASS_MASK   0x00ff0000
290#define SUBCLASS_OFFSET 16
291#define CSC_MASK        0xffff0000	/*  class and subclass mask */
292#define CSC_OFFSET      SUBCLASS_OFFSET
293
294#define EXTRACT_CLASS(debugid)          ( (uint8_t) ( ((debugid) & CLASS_MASK   ) >> CLASS_OFFSET    ) )
295#define EXTRACT_SUBCLASS(debugid)       ( (uint8_t) ( ((debugid) & SUBCLASS_MASK) >> SUBCLASS_OFFSET ) )
296#define EXTRACT_CSC(debugid)            ( (uint16_t)( ((debugid) & CSC_MASK     ) >> CSC_OFFSET      ) )
297
298#define INTERRUPT	0x01050000
299#define MACH_vmfault	0x01300008
300#define BSC_SysCall	0x040c0000
301#define MACH_SysCall	0x010c0000
302#define DBG_SCALL_MASK	0xffff0000
303
304
305/* task to string structure */
306struct tts
307{
308  task_t    task;            /* from procs task */
309  pid_t     pid;             /* from procs p_pid  */
310  char      task_comm[20];   /* from procs p_comm */
311};
312
313typedef struct tts tts_t;
314
315struct krt
316{
317	kd_threadmap *map;    /* pointer to the map buffer */
318	int count;
319	int maxcount;
320	struct tts *atts;
321};
322
323typedef struct krt krt_t;
324
325/* This is for the CHUD toolkit call */
326typedef void (*kd_chudhook_fn) (uint32_t debugid, uintptr_t arg1,
327				uintptr_t arg2, uintptr_t arg3,
328				uintptr_t arg4, uintptr_t arg5);
329
330volatile kd_chudhook_fn kdebug_chudhook = 0;   /* pointer to CHUD toolkit function */
331
332__private_extern__ void stackshot_lock_init( void );
333
334static uint8_t *type_filter_bitmap;
335
336/*
337 * This allows kperf to swap out the global state pid when kperf ownership is
338 * passed from one process to another. It checks the old global state pid so
339 * that kperf can't accidentally steal control of trace when a non-kperf trace user has
340 * control of trace.
341 */
342void
343kdbg_swap_global_state_pid(pid_t old_pid, pid_t new_pid);
344
345void
346kdbg_swap_global_state_pid(pid_t old_pid, pid_t new_pid)
347{
348	if (!(kd_ctrl_page.kdebug_flags & KDBG_LOCKINIT))
349		return;
350
351	lck_mtx_lock(kd_trace_mtx_sysctl);
352
353	if (old_pid == global_state_pid)
354		global_state_pid = new_pid;
355
356	lck_mtx_unlock(kd_trace_mtx_sysctl);
357}
358
359static uint32_t
360kdbg_cpu_count(boolean_t early_trace)
361{
362	if (early_trace) {
363		/*
364		 * we've started tracing before the IOKit has even
365		 * started running... just use the static max value
366		 */
367		return max_ncpus;
368	}
369
370	host_basic_info_data_t hinfo;
371	mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
372	host_info((host_t)1 /* BSD_HOST */, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
373	assert(hinfo.logical_cpu_max > 0);
374	return hinfo.logical_cpu_max;
375}
376
377#if MACH_ASSERT
378#endif /* MACH_ASSERT */
379
380static void
381kdbg_iop_list_callback(kd_iop_t* iop, kd_callback_type type, void* arg)
382{
383	while (iop) {
384		iop->callback.func(iop->callback.context, type, arg);
385		iop = iop->next;
386	}
387}
388
389static void
390kdbg_set_tracing_enabled(boolean_t enabled, uint32_t trace_type)
391{
392	int s = ml_set_interrupts_enabled(FALSE);
393	lck_spin_lock(kds_spin_lock);
394
395	if (enabled) {
396		kdebug_enable |= trace_type;
397		kd_ctrl_page.kdebug_slowcheck &= ~SLOW_NOLOG;
398		kd_ctrl_page.enabled = 1;
399	} else {
400		kdebug_enable &= ~(KDEBUG_ENABLE_TRACE|KDEBUG_ENABLE_PPT);
401		kd_ctrl_page.kdebug_slowcheck |= SLOW_NOLOG;
402		kd_ctrl_page.enabled = 0;
403	}
404	lck_spin_unlock(kds_spin_lock);
405	ml_set_interrupts_enabled(s);
406
407	if (enabled) {
408		kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_KDEBUG_ENABLED, NULL);
409	} else {
410		/*
411		 * If you do not flush the IOP trace buffers, they can linger
412		 * for a considerable period; consider code which disables and
413		 * deallocates without a final sync flush.
414		 */
415		kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_KDEBUG_DISABLED, NULL);
416		kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_SYNC_FLUSH, NULL);
417	}
418}
419
420static void
421kdbg_set_flags(int slowflag, int enableflag, boolean_t enabled)
422{
423	int s = ml_set_interrupts_enabled(FALSE);
424	lck_spin_lock(kds_spin_lock);
425
426	if (enabled) {
427		kd_ctrl_page.kdebug_slowcheck |= slowflag;
428		kdebug_enable |= enableflag;
429	} else {
430		kd_ctrl_page.kdebug_slowcheck &= ~slowflag;
431		kdebug_enable &= ~enableflag;
432	}
433
434	lck_spin_unlock(kds_spin_lock);
435	ml_set_interrupts_enabled(s);
436}
437
438void
439disable_wrap(uint32_t *old_slowcheck, uint32_t *old_flags)
440{
441	int s = ml_set_interrupts_enabled(FALSE);
442	lck_spin_lock(kds_spin_lock);
443
444	*old_slowcheck = kd_ctrl_page.kdebug_slowcheck;
445	*old_flags = kd_ctrl_page.kdebug_flags;
446
447	kd_ctrl_page.kdebug_flags &= ~KDBG_WRAPPED;
448	kd_ctrl_page.kdebug_flags |= KDBG_NOWRAP;
449
450	lck_spin_unlock(kds_spin_lock);
451	ml_set_interrupts_enabled(s);
452}
453
454void
455enable_wrap(uint32_t old_slowcheck, boolean_t lostevents)
456{
457	int s = ml_set_interrupts_enabled(FALSE);
458	lck_spin_lock(kds_spin_lock);
459
460	kd_ctrl_page.kdebug_flags &= ~KDBG_NOWRAP;
461
462	if ( !(old_slowcheck & SLOW_NOLOG))
463		kd_ctrl_page.kdebug_slowcheck &= ~SLOW_NOLOG;
464
465	if (lostevents == TRUE)
466		kd_ctrl_page.kdebug_flags |= KDBG_WRAPPED;
467
468	lck_spin_unlock(kds_spin_lock);
469	ml_set_interrupts_enabled(s);
470}
471
472static int
473create_buffers(boolean_t early_trace)
474{
475        int	i;
476	int	p_buffer_size;
477	int	f_buffer_size;
478	int	f_buffers;
479	int	error = 0;
480
481	/*
482	 * For the duration of this allocation, trace code will only reference
483	 * kdebug_iops. Any iops registered after this enabling will not be
484	 * messaged until the buffers are reallocated.
485	 *
486	 * TLDR; Must read kd_iops once and only once!
487	 */
488	kd_ctrl_page.kdebug_iops = kd_iops;
489
490
491	/*
492	 * If the list is valid, it is sorted, newest -> oldest. Each iop entry
493	 * has a cpu_id of "the older entry + 1", so the highest cpu_id will
494	 * be the list head + 1.
495	 */
496
497	kd_ctrl_page.kdebug_cpus = kd_ctrl_page.kdebug_iops ? kd_ctrl_page.kdebug_iops->cpu_id + 1 : kdbg_cpu_count(early_trace);
498
499	if (kmem_alloc(kernel_map, (vm_offset_t *)&kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus) != KERN_SUCCESS) {
500		error = ENOSPC;
501		goto out;
502	}
503
504	if (nkdbufs < (kd_ctrl_page.kdebug_cpus * EVENTS_PER_STORAGE_UNIT * MIN_STORAGE_UNITS_PER_CPU))
505		n_storage_units = kd_ctrl_page.kdebug_cpus * MIN_STORAGE_UNITS_PER_CPU;
506	else
507		n_storage_units = nkdbufs / EVENTS_PER_STORAGE_UNIT;
508
509	nkdbufs = n_storage_units * EVENTS_PER_STORAGE_UNIT;
510
511	f_buffers = n_storage_units / N_STORAGE_UNITS_PER_BUFFER;
512	n_storage_buffers = f_buffers;
513
514	f_buffer_size = N_STORAGE_UNITS_PER_BUFFER * sizeof(struct kd_storage);
515	p_buffer_size = (n_storage_units % N_STORAGE_UNITS_PER_BUFFER) * sizeof(struct kd_storage);
516
517	if (p_buffer_size)
518		n_storage_buffers++;
519
520	kd_bufs = NULL;
521
522	if (kdcopybuf == 0) {
523	        if (kmem_alloc(kernel_map, (vm_offset_t *)&kdcopybuf, (vm_size_t)KDCOPYBUF_SIZE) != KERN_SUCCESS) {
524			error = ENOSPC;
525			goto out;
526		}
527	}
528	if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs, (vm_size_t)(n_storage_buffers * sizeof(struct kd_storage_buffers))) != KERN_SUCCESS) {
529		error = ENOSPC;
530		goto out;
531	}
532	bzero(kd_bufs, n_storage_buffers * sizeof(struct kd_storage_buffers));
533
534	for (i = 0; i < f_buffers; i++) {
535		if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)f_buffer_size) != KERN_SUCCESS) {
536			error = ENOSPC;
537			goto out;
538		}
539		bzero(kd_bufs[i].kdsb_addr, f_buffer_size);
540
541		kd_bufs[i].kdsb_size = f_buffer_size;
542	}
543	if (p_buffer_size) {
544		if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)p_buffer_size) != KERN_SUCCESS) {
545			error = ENOSPC;
546			goto out;
547		}
548		bzero(kd_bufs[i].kdsb_addr, p_buffer_size);
549
550		kd_bufs[i].kdsb_size = p_buffer_size;
551	}
552	n_storage_units = 0;
553
554	for (i = 0; i < n_storage_buffers; i++) {
555		struct kd_storage *kds;
556		int	n_elements;
557		int	n;
558
559		n_elements = kd_bufs[i].kdsb_size / sizeof(struct kd_storage);
560		kds = kd_bufs[i].kdsb_addr;
561
562		for (n = 0; n < n_elements; n++) {
563			kds[n].kds_next.buffer_index = kd_ctrl_page.kds_free_list.buffer_index;
564			kds[n].kds_next.offset = kd_ctrl_page.kds_free_list.offset;
565
566			kd_ctrl_page.kds_free_list.buffer_index = i;
567			kd_ctrl_page.kds_free_list.offset = n;
568		}
569		n_storage_units += n_elements;
570	}
571
572	bzero((char *)kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus);
573
574	for (i = 0; i < (int)kd_ctrl_page.kdebug_cpus; i++) {
575		kdbip[i].kd_list_head.raw = KDS_PTR_NULL;
576		kdbip[i].kd_list_tail.raw = KDS_PTR_NULL;
577		kdbip[i].kd_lostevents = FALSE;
578		kdbip[i].num_bufs = 0;
579	}
580
581	kd_ctrl_page.kdebug_flags |= KDBG_BUFINIT;
582
583	kd_ctrl_page.kds_inuse_count = 0;
584	n_storage_threshold = n_storage_units / 2;
585out:
586	if (error)
587		delete_buffers();
588
589	return(error);
590}
591
592static void
593delete_buffers(void)
594{
595	int i;
596
597	if (kd_bufs) {
598		for (i = 0; i < n_storage_buffers; i++) {
599			if (kd_bufs[i].kdsb_addr) {
600				kmem_free(kernel_map, (vm_offset_t)kd_bufs[i].kdsb_addr, (vm_size_t)kd_bufs[i].kdsb_size);
601			}
602		}
603		kmem_free(kernel_map, (vm_offset_t)kd_bufs, (vm_size_t)(n_storage_buffers * sizeof(struct kd_storage_buffers)));
604
605		kd_bufs = NULL;
606		n_storage_buffers = 0;
607	}
608	if (kdcopybuf) {
609		kmem_free(kernel_map, (vm_offset_t)kdcopybuf, KDCOPYBUF_SIZE);
610
611		kdcopybuf = NULL;
612	}
613	kd_ctrl_page.kds_free_list.raw = KDS_PTR_NULL;
614
615	if (kdbip) {
616		kmem_free(kernel_map, (vm_offset_t)kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus);
617
618		kdbip = NULL;
619	}
620        kd_ctrl_page.kdebug_iops = NULL;
621	kd_ctrl_page.kdebug_cpus = 0;
622	kd_ctrl_page.kdebug_flags &= ~KDBG_BUFINIT;
623}
624
625void
626release_storage_unit(int cpu, uint32_t kdsp_raw)
627{
628	int s = 0;
629	struct	kd_storage *kdsp_actual;
630	struct kd_bufinfo *kdbp;
631	union kds_ptr kdsp;
632
633	kdsp.raw = kdsp_raw;
634
635	s = ml_set_interrupts_enabled(FALSE);
636	lck_spin_lock(kds_spin_lock);
637
638	kdbp = &kdbip[cpu];
639
640	if (kdsp.raw == kdbp->kd_list_head.raw) {
641		/*
642		 * it's possible for the storage unit pointed to
643		 * by kdsp to have already been stolen... so
644		 * check to see if it's still the head of the list
645		 * now that we're behind the lock that protects
646		 * adding and removing from the queue...
647		 * since we only ever release and steal units from
648		 * that position, if it's no longer the head
649		 * we having nothing to do in this context
650		 */
651		kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
652		kdbp->kd_list_head = kdsp_actual->kds_next;
653
654		kdsp_actual->kds_next = kd_ctrl_page.kds_free_list;
655		kd_ctrl_page.kds_free_list = kdsp;
656
657		kd_ctrl_page.kds_inuse_count--;
658	}
659	lck_spin_unlock(kds_spin_lock);
660	ml_set_interrupts_enabled(s);
661}
662
663
664boolean_t
665allocate_storage_unit(int cpu)
666{
667	union	kds_ptr kdsp;
668	struct	kd_storage *kdsp_actual, *kdsp_next_actual;
669	struct  kd_bufinfo *kdbp, *kdbp_vict, *kdbp_try;
670	uint64_t	oldest_ts, ts;
671	boolean_t	retval = TRUE;
672	int			s = 0;
673
674	s = ml_set_interrupts_enabled(FALSE);
675	lck_spin_lock(kds_spin_lock);
676
677	kdbp = &kdbip[cpu];
678
679	/* If someone beat us to the allocate, return success */
680	if (kdbp->kd_list_tail.raw != KDS_PTR_NULL) {
681		kdsp_actual = POINTER_FROM_KDS_PTR(kdbp->kd_list_tail);
682
683		if (kdsp_actual->kds_bufindx < EVENTS_PER_STORAGE_UNIT)
684			goto out;
685	}
686
687	if ((kdsp = kd_ctrl_page.kds_free_list).raw != KDS_PTR_NULL) {
688		kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
689		kd_ctrl_page.kds_free_list = kdsp_actual->kds_next;
690
691		kd_ctrl_page.kds_inuse_count++;
692	} else {
693		if (kd_ctrl_page.kdebug_flags & KDBG_NOWRAP) {
694			kd_ctrl_page.kdebug_slowcheck |= SLOW_NOLOG;
695			kdbp->kd_lostevents = TRUE;
696			retval = FALSE;
697			goto out;
698		}
699		kdbp_vict = NULL;
700		oldest_ts = (uint64_t)-1;
701
702		for (kdbp_try = &kdbip[0]; kdbp_try < &kdbip[kd_ctrl_page.kdebug_cpus]; kdbp_try++) {
703
704			if (kdbp_try->kd_list_head.raw == KDS_PTR_NULL) {
705				/*
706				 * no storage unit to steal
707				 */
708				continue;
709			}
710
711			kdsp_actual = POINTER_FROM_KDS_PTR(kdbp_try->kd_list_head);
712
713			if (kdsp_actual->kds_bufcnt < EVENTS_PER_STORAGE_UNIT) {
714				/*
715				 * make sure we don't steal the storage unit
716				 * being actively recorded to...  need to
717				 * move on because we don't want an out-of-order
718				 * set of events showing up later
719				 */
720				continue;
721			}
722			ts = kdbg_get_timestamp(&kdsp_actual->kds_records[0]);
723
724			if (ts < oldest_ts) {
725				/*
726				 * when 'wrapping', we want to steal the
727				 * storage unit that has the 'earliest' time
728				 * associated with it (first event time)
729				 */
730				oldest_ts = ts;
731				kdbp_vict = kdbp_try;
732			}
733		}
734		if (kdbp_vict == NULL) {
735			kdebug_enable = 0;
736			kd_ctrl_page.enabled = 0;
737			retval = FALSE;
738			goto out;
739		}
740		kdsp = kdbp_vict->kd_list_head;
741		kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
742		kdbp_vict->kd_list_head = kdsp_actual->kds_next;
743
744		if (kdbp_vict->kd_list_head.raw != KDS_PTR_NULL) {
745			kdsp_next_actual = POINTER_FROM_KDS_PTR(kdbp_vict->kd_list_head);
746			kdsp_next_actual->kds_lostevents = TRUE;
747		} else
748			kdbp_vict->kd_lostevents = TRUE;
749
750		kd_ctrl_page.kdebug_flags |= KDBG_WRAPPED;
751	}
752	kdsp_actual->kds_timestamp = mach_absolute_time();
753	kdsp_actual->kds_next.raw = KDS_PTR_NULL;
754	kdsp_actual->kds_bufcnt	  = 0;
755	kdsp_actual->kds_readlast = 0;
756
757	kdsp_actual->kds_lostevents = kdbp->kd_lostevents;
758	kdbp->kd_lostevents = FALSE;
759	kdsp_actual->kds_bufindx  = 0;
760
761	if (kdbp->kd_list_head.raw == KDS_PTR_NULL)
762		kdbp->kd_list_head = kdsp;
763	else
764		POINTER_FROM_KDS_PTR(kdbp->kd_list_tail)->kds_next = kdsp;
765	kdbp->kd_list_tail = kdsp;
766out:
767	lck_spin_unlock(kds_spin_lock);
768	ml_set_interrupts_enabled(s);
769
770	return (retval);
771}
772
773int
774kernel_debug_register_callback(kd_callback_t callback)
775{
776	kd_iop_t* iop;
777	if (kmem_alloc(kernel_map, (vm_offset_t *)&iop, sizeof(kd_iop_t)) == KERN_SUCCESS) {
778		memcpy(&iop->callback, &callback, sizeof(kd_callback_t));
779
780		/*
781		 * <rdar://problem/13351477> Some IOP clients are not providing a name.
782		 *
783		 * Remove when fixed.
784		 */
785		{
786			boolean_t is_valid_name = FALSE;
787			for (uint32_t length=0; length<sizeof(callback.iop_name); ++length) {
788				/* This is roughly isprintable(c) */
789				if (callback.iop_name[length] > 0x20 && callback.iop_name[length] < 0x7F)
790					continue;
791				if (callback.iop_name[length] == 0) {
792					if (length)
793						is_valid_name = TRUE;
794					break;
795				}
796			}
797
798			if (!is_valid_name) {
799				strlcpy(iop->callback.iop_name, "IOP-???", sizeof(iop->callback.iop_name));
800			}
801		}
802
803		iop->last_timestamp = 0;
804
805		do {
806			/*
807			 * We use two pieces of state, the old list head
808			 * pointer, and the value of old_list_head->cpu_id.
809			 * If we read kd_iops more than once, it can change
810			 * between reads.
811			 *
812			 * TLDR; Must not read kd_iops more than once per loop.
813			 */
814			iop->next = kd_iops;
815			iop->cpu_id = iop->next ? (iop->next->cpu_id+1) : kdbg_cpu_count(FALSE);
816
817			/*
818			 * Header says OSCompareAndSwapPtr has a memory barrier
819			 */
820		} while (!OSCompareAndSwapPtr(iop->next, iop, (void* volatile*)&kd_iops));
821
822		return iop->cpu_id;
823	}
824
825	return 0;
826}
827
828void
829kernel_debug_enter(
830	uint32_t	coreid,
831	uint32_t	debugid,
832	uint64_t	timestamp,
833	uintptr_t	arg1,
834	uintptr_t	arg2,
835	uintptr_t	arg3,
836	uintptr_t	arg4,
837	uintptr_t	threadid
838	)
839{
840	uint32_t	bindx;
841	kd_buf		*kd;
842	struct kd_bufinfo *kdbp;
843	struct kd_storage *kdsp_actual;
844	union  kds_ptr kds_raw;
845
846	if (kd_ctrl_page.kdebug_slowcheck) {
847
848		if ( (kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) || !(kdebug_enable & (KDEBUG_ENABLE_TRACE|KDEBUG_ENABLE_PPT)))
849			goto out1;
850
851		if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
852			if (isset(type_filter_bitmap, EXTRACT_CSC(debugid)))
853				goto record_event;
854			goto out1;
855		}
856		else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) {
857			if (debugid >= kdlog_beg && debugid <= kdlog_end)
858				goto record_event;
859			goto out1;
860		}
861		else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) {
862			if ((debugid & DBG_FUNC_MASK) != kdlog_value1 &&
863				(debugid & DBG_FUNC_MASK) != kdlog_value2 &&
864				(debugid & DBG_FUNC_MASK) != kdlog_value3 &&
865				(debugid & DBG_FUNC_MASK) != kdlog_value4)
866				goto out1;
867		}
868	}
869
870record_event:
871
872	disable_preemption();
873
874	if (kd_ctrl_page.enabled == 0)
875		goto out;
876
877	kdbp = &kdbip[coreid];
878	timestamp &= KDBG_TIMESTAMP_MASK;
879
880retry_q:
881	kds_raw = kdbp->kd_list_tail;
882
883	if (kds_raw.raw != KDS_PTR_NULL) {
884		kdsp_actual = POINTER_FROM_KDS_PTR(kds_raw);
885		bindx = kdsp_actual->kds_bufindx;
886	} else
887		kdsp_actual = NULL;
888
889	if (kdsp_actual == NULL || bindx >= EVENTS_PER_STORAGE_UNIT) {
890		if (allocate_storage_unit(coreid) == FALSE) {
891			/*
892			 * this can only happen if wrapping
893			 * has been disabled
894			 */
895			goto out;
896		}
897		goto retry_q;
898	}
899	if ( !OSCompareAndSwap(bindx, bindx + 1, &kdsp_actual->kds_bufindx))
900		goto retry_q;
901
902	// IOP entries can be allocated before xnu allocates and inits the buffer
903	if (timestamp < kdsp_actual->kds_timestamp)
904		kdsp_actual->kds_timestamp = timestamp;
905
906	kd = &kdsp_actual->kds_records[bindx];
907
908	kd->debugid = debugid;
909	kd->arg1 = arg1;
910	kd->arg2 = arg2;
911	kd->arg3 = arg3;
912	kd->arg4 = arg4;
913	kd->arg5 = threadid;
914
915	kdbg_set_timestamp_and_cpu(kd, timestamp, coreid);
916
917	OSAddAtomic(1, &kdsp_actual->kds_bufcnt);
918out:
919	enable_preemption();
920out1:
921	if ((kds_waiter && kd_ctrl_page.kds_inuse_count >= n_storage_threshold)) {
922		boolean_t need_kds_wakeup = FALSE;
923		int	s;
924
925		/*
926		 * try to take the lock here to synchronize with the
927		 * waiter entering the blocked state... use the try
928		 * mode to prevent deadlocks caused by re-entering this
929		 * routine due to various trace points triggered in the
930		 * lck_spin_sleep_xxxx routines used to actually enter
931		 * our wait condition... no problem if we fail,
932		 * there will be lots of additional events coming in that
933		 * will eventually succeed in grabbing this lock
934		 */
935		s = ml_set_interrupts_enabled(FALSE);
936
937		if (lck_spin_try_lock(kdw_spin_lock)) {
938
939			if (kds_waiter && kd_ctrl_page.kds_inuse_count >= n_storage_threshold) {
940				kds_waiter = 0;
941				need_kds_wakeup = TRUE;
942			}
943			lck_spin_unlock(kdw_spin_lock);
944
945			ml_set_interrupts_enabled(s);
946
947			if (need_kds_wakeup == TRUE)
948				wakeup(&kds_waiter);
949		}
950	}
951}
952
953
954
955void
956kernel_debug_internal(
957	uint32_t	debugid,
958	uintptr_t	arg1,
959	uintptr_t	arg2,
960	uintptr_t	arg3,
961	uintptr_t	arg4,
962	uintptr_t	arg5);
963
964__attribute__((always_inline)) void
965kernel_debug_internal(
966	uint32_t	debugid,
967	uintptr_t	arg1,
968	uintptr_t	arg2,
969	uintptr_t	arg3,
970	uintptr_t	arg4,
971	uintptr_t	arg5)
972{
973	struct proc 	*curproc;
974	uint64_t 	now;
975	uint32_t	bindx;
976	boolean_t	s;
977	kd_buf		*kd;
978	int		cpu;
979	struct kd_bufinfo *kdbp;
980	struct kd_storage *kdsp_actual;
981	union  kds_ptr kds_raw;
982
983
984
985	if (kd_ctrl_page.kdebug_slowcheck) {
986
987		if (kdebug_enable & KDEBUG_ENABLE_CHUD) {
988			kd_chudhook_fn chudhook;
989			/*
990			 * Mask interrupts to minimize the interval across
991			 * which the driver providing the hook could be
992			 * unloaded.
993			 */
994			s = ml_set_interrupts_enabled(FALSE);
995			chudhook = kdebug_chudhook;
996			if (chudhook)
997				chudhook(debugid, arg1, arg2, arg3, arg4, arg5);
998			ml_set_interrupts_enabled(s);
999		}
1000		if ( (kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) || !(kdebug_enable & (KDEBUG_ENABLE_TRACE|KDEBUG_ENABLE_PPT)))
1001			goto out1;
1002
1003		if ( !ml_at_interrupt_context()) {
1004			if (kd_ctrl_page.kdebug_flags & KDBG_PIDCHECK) {
1005				/*
1006				 * If kdebug flag is not set for current proc, return
1007				 */
1008				curproc = current_proc();
1009
1010				if ((curproc && !(curproc->p_kdebug)) &&
1011				    ((debugid & 0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE)) &&
1012				      (debugid >> 24 != DBG_TRACE))
1013					goto out1;
1014			}
1015			else if (kd_ctrl_page.kdebug_flags & KDBG_PIDEXCLUDE) {
1016				/*
1017				 * If kdebug flag is set for current proc, return
1018				 */
1019				curproc = current_proc();
1020
1021				if ((curproc && curproc->p_kdebug) &&
1022				    ((debugid & 0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE)) &&
1023				      (debugid >> 24 != DBG_TRACE))
1024					goto out1;
1025			}
1026		}
1027
1028		if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
1029			/* Always record trace system info */
1030			if (EXTRACT_CLASS(debugid) == DBG_TRACE)
1031				goto record_event;
1032
1033			if (isset(type_filter_bitmap, EXTRACT_CSC(debugid)))
1034				goto record_event;
1035			goto out1;
1036		}
1037		else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) {
1038			/* Always record trace system info */
1039			if (EXTRACT_CLASS(debugid) == DBG_TRACE)
1040				goto record_event;
1041
1042			if (debugid < kdlog_beg || debugid > kdlog_end)
1043				goto out1;
1044		}
1045		else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) {
1046			/* Always record trace system info */
1047			if (EXTRACT_CLASS(debugid) == DBG_TRACE)
1048				goto record_event;
1049
1050			if ((debugid & DBG_FUNC_MASK) != kdlog_value1 &&
1051			    (debugid & DBG_FUNC_MASK) != kdlog_value2 &&
1052			    (debugid & DBG_FUNC_MASK) != kdlog_value3 &&
1053			    (debugid & DBG_FUNC_MASK) != kdlog_value4)
1054				goto out1;
1055		}
1056	}
1057record_event:
1058	disable_preemption();
1059
1060	if (kd_ctrl_page.enabled == 0)
1061		goto out;
1062
1063	cpu = cpu_number();
1064	kdbp = &kdbip[cpu];
1065retry_q:
1066	kds_raw = kdbp->kd_list_tail;
1067
1068	if (kds_raw.raw != KDS_PTR_NULL) {
1069		kdsp_actual = POINTER_FROM_KDS_PTR(kds_raw);
1070		bindx = kdsp_actual->kds_bufindx;
1071	} else
1072		kdsp_actual = NULL;
1073
1074	if (kdsp_actual == NULL || bindx >= EVENTS_PER_STORAGE_UNIT) {
1075		if (allocate_storage_unit(cpu) == FALSE) {
1076			/*
1077			 * this can only happen if wrapping
1078			 * has been disabled
1079			 */
1080			goto out;
1081		}
1082		goto retry_q;
1083	}
1084	now = mach_absolute_time() & KDBG_TIMESTAMP_MASK;
1085
1086	if ( !OSCompareAndSwap(bindx, bindx + 1, &kdsp_actual->kds_bufindx))
1087		goto retry_q;
1088
1089	kd = &kdsp_actual->kds_records[bindx];
1090
1091	kd->debugid = debugid;
1092	kd->arg1 = arg1;
1093	kd->arg2 = arg2;
1094	kd->arg3 = arg3;
1095	kd->arg4 = arg4;
1096	kd->arg5 = arg5;
1097
1098	kdbg_set_timestamp_and_cpu(kd, now, cpu);
1099
1100	OSAddAtomic(1, &kdsp_actual->kds_bufcnt);
1101out:
1102	enable_preemption();
1103out1:
1104	if (kds_waiter && kd_ctrl_page.kds_inuse_count >= n_storage_threshold) {
1105		uint32_t	etype;
1106		uint32_t	stype;
1107
1108		etype = debugid & DBG_FUNC_MASK;
1109		stype = debugid & DBG_SCALL_MASK;
1110
1111		if (etype == INTERRUPT || etype == MACH_vmfault ||
1112		    stype == BSC_SysCall || stype == MACH_SysCall) {
1113
1114			boolean_t need_kds_wakeup = FALSE;
1115
1116			/*
1117			 * try to take the lock here to synchronize with the
1118			 * waiter entering the blocked state... use the try
1119			 * mode to prevent deadlocks caused by re-entering this
1120			 * routine due to various trace points triggered in the
1121			 * lck_spin_sleep_xxxx routines used to actually enter
1122			 * one of our 2 wait conditions... no problem if we fail,
1123			 * there will be lots of additional events coming in that
1124			 * will eventually succeed in grabbing this lock
1125			 */
1126			s = ml_set_interrupts_enabled(FALSE);
1127
1128			if (lck_spin_try_lock(kdw_spin_lock)) {
1129
1130				if (kds_waiter && kd_ctrl_page.kds_inuse_count >= n_storage_threshold) {
1131					kds_waiter = 0;
1132					need_kds_wakeup = TRUE;
1133				}
1134				lck_spin_unlock(kdw_spin_lock);
1135			}
1136			ml_set_interrupts_enabled(s);
1137
1138			if (need_kds_wakeup == TRUE)
1139				wakeup(&kds_waiter);
1140		}
1141	}
1142}
1143
1144void
1145kernel_debug(
1146	uint32_t	debugid,
1147	uintptr_t	arg1,
1148	uintptr_t	arg2,
1149	uintptr_t	arg3,
1150	uintptr_t	arg4,
1151	__unused uintptr_t arg5)
1152{
1153	kernel_debug_internal(debugid, arg1, arg2, arg3, arg4, (uintptr_t)thread_tid(current_thread()));
1154}
1155
1156void
1157kernel_debug1(
1158	uint32_t	debugid,
1159	uintptr_t	arg1,
1160	uintptr_t	arg2,
1161	uintptr_t	arg3,
1162	uintptr_t	arg4,
1163	uintptr_t	arg5)
1164{
1165	kernel_debug_internal(debugid, arg1, arg2, arg3, arg4, arg5);
1166}
1167
1168void
1169kernel_debug_string(const char *message)
1170{
1171	uintptr_t arg[4] = {0, 0, 0, 0};
1172
1173	/* Stuff the message string in the args and log it. */
1174        strncpy((char *)arg, message, MIN(sizeof(arg), strlen(message)));
1175	KERNEL_DEBUG_EARLY(
1176		(TRACEDBG_CODE(DBG_TRACE_INFO, 4)) | DBG_FUNC_NONE,
1177		arg[0], arg[1], arg[2], arg[3]);
1178}
1179
1180extern int	master_cpu;		/* MACH_KERNEL_PRIVATE */
1181/*
1182 * Used prior to start_kern_tracing() being called.
1183 * Log temporarily into a static buffer.
1184 */
1185void
1186kernel_debug_early(
1187	uint32_t	debugid,
1188	uintptr_t	arg1,
1189	uintptr_t	arg2,
1190	uintptr_t	arg3,
1191	uintptr_t	arg4)
1192{
1193	/* If tracing is already initialized, use it */
1194	if (nkdbufs)
1195		KERNEL_DEBUG_CONSTANT(debugid, arg1, arg2, arg3, arg4, 0);
1196
1197	/* Do nothing if the buffer is full or we're not on the boot cpu */
1198	kd_early_overflow = kd_early_index >= KD_EARLY_BUFFER_MAX;
1199	if (kd_early_overflow ||
1200	    cpu_number() != master_cpu)
1201		return;
1202
1203	kd_early_buffer[kd_early_index].debugid = debugid;
1204	kd_early_buffer[kd_early_index].timestamp = mach_absolute_time();
1205	kd_early_buffer[kd_early_index].arg1 = arg1;
1206	kd_early_buffer[kd_early_index].arg2 = arg2;
1207	kd_early_buffer[kd_early_index].arg3 = arg3;
1208	kd_early_buffer[kd_early_index].arg4 = arg4;
1209	kd_early_buffer[kd_early_index].arg5 = 0;
1210	kd_early_index++;
1211}
1212
1213/*
1214 * Transfer the contents of the temporary buffer into the trace buffers.
1215 * Precede that by logging the rebase time (offset) - the TSC-based time (in ns)
1216 * when mach_absolute_time is set to 0.
1217 */
1218static void
1219kernel_debug_early_end(void)
1220{
1221	int	i;
1222
1223	if (cpu_number() != master_cpu)
1224		panic("kernel_debug_early_end() not call on boot processor");
1225
1226	/* Fake sentinel marking the start of kernel time relative to TSC */
1227	kernel_debug_enter(
1228		0,
1229		(TRACEDBG_CODE(DBG_TRACE_INFO, 1)) | DBG_FUNC_NONE,
1230		0,
1231		(uint32_t)(tsc_rebase_abs_time >> 32),
1232		(uint32_t)tsc_rebase_abs_time,
1233		0,
1234		0,
1235		0);
1236	for (i = 0; i < kd_early_index; i++) {
1237		kernel_debug_enter(
1238			0,
1239			kd_early_buffer[i].debugid,
1240			kd_early_buffer[i].timestamp,
1241			kd_early_buffer[i].arg1,
1242			kd_early_buffer[i].arg2,
1243			kd_early_buffer[i].arg3,
1244			kd_early_buffer[i].arg4,
1245			0);
1246	}
1247
1248	/* Cut events-lost event on overflow */
1249	if (kd_early_overflow)
1250		KERNEL_DEBUG_CONSTANT(
1251			TRACEDBG_CODE(DBG_TRACE_INFO, 2), 0, 0, 0, 0, 0);
1252
1253	/* This trace marks the start of kernel tracing */
1254	kernel_debug_string("early trace done");
1255}
1256
1257/*
1258 * Support syscall SYS_kdebug_trace
1259 */
1260int
1261kdebug_trace(__unused struct proc *p, struct kdebug_trace_args *uap, __unused int32_t *retval)
1262{
1263	if ( __probable(kdebug_enable == 0) )
1264		return(0);
1265
1266	kernel_debug_internal(uap->code, uap->arg1, uap->arg2, uap->arg3, uap->arg4, (uintptr_t)thread_tid(current_thread()));
1267
1268	return(0);
1269}
1270
1271
1272static void
1273kdbg_lock_init(void)
1274{
1275	if (kd_ctrl_page.kdebug_flags & KDBG_LOCKINIT)
1276		return;
1277
1278	/*
1279	 * allocate lock group attribute and group
1280	 */
1281	kd_trace_mtx_sysctl_grp_attr = lck_grp_attr_alloc_init();
1282	kd_trace_mtx_sysctl_grp = lck_grp_alloc_init("kdebug", kd_trace_mtx_sysctl_grp_attr);
1283
1284	/*
1285	 * allocate the lock attribute
1286	 */
1287	kd_trace_mtx_sysctl_attr = lck_attr_alloc_init();
1288
1289
1290	/*
1291	 * allocate and initialize mutex's
1292	 */
1293	kd_trace_mtx_sysctl = lck_mtx_alloc_init(kd_trace_mtx_sysctl_grp, kd_trace_mtx_sysctl_attr);
1294	kds_spin_lock = lck_spin_alloc_init(kd_trace_mtx_sysctl_grp, kd_trace_mtx_sysctl_attr);
1295	kdw_spin_lock = lck_spin_alloc_init(kd_trace_mtx_sysctl_grp, kd_trace_mtx_sysctl_attr);
1296
1297	kd_ctrl_page.kdebug_flags |= KDBG_LOCKINIT;
1298}
1299
1300
1301int
1302kdbg_bootstrap(boolean_t early_trace)
1303{
1304        kd_ctrl_page.kdebug_flags &= ~KDBG_WRAPPED;
1305
1306	return (create_buffers(early_trace));
1307}
1308
1309int
1310kdbg_reinit(boolean_t early_trace)
1311{
1312	int ret = 0;
1313
1314	/*
1315	 * Disable trace collecting
1316	 * First make sure we're not in
1317	 * the middle of cutting a trace
1318	 */
1319	kdbg_set_tracing_enabled(FALSE, KDEBUG_ENABLE_TRACE);
1320
1321	/*
1322	 * make sure the SLOW_NOLOG is seen
1323	 * by everyone that might be trying
1324	 * to cut a trace..
1325	 */
1326	IOSleep(100);
1327
1328	delete_buffers();
1329
1330	if ((kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr) {
1331		kmem_free(kernel_map, (vm_offset_t)kd_mapptr, kd_mapsize);
1332		kd_ctrl_page.kdebug_flags &= ~KDBG_MAPINIT;
1333		kd_mapsize = 0;
1334		kd_mapptr = (kd_threadmap *) 0;
1335		kd_mapcount = 0;
1336	}
1337	ret = kdbg_bootstrap(early_trace);
1338
1339	RAW_file_offset = 0;
1340	RAW_file_written = 0;
1341
1342	return(ret);
1343}
1344
1345void
1346kdbg_trace_data(struct proc *proc, long *arg_pid)
1347{
1348	if (!proc)
1349		*arg_pid = 0;
1350	else
1351		*arg_pid = proc->p_pid;
1352}
1353
1354
1355void
1356kdbg_trace_string(struct proc *proc, long *arg1, long *arg2, long *arg3, long *arg4)
1357{
1358	char *dbg_nameptr;
1359	int dbg_namelen;
1360	long dbg_parms[4];
1361
1362	if (!proc) {
1363		*arg1 = 0;
1364		*arg2 = 0;
1365		*arg3 = 0;
1366		*arg4 = 0;
1367		return;
1368	}
1369	/*
1370	 * Collect the pathname for tracing
1371	 */
1372	dbg_nameptr = proc->p_comm;
1373	dbg_namelen = (int)strlen(proc->p_comm);
1374	dbg_parms[0]=0L;
1375	dbg_parms[1]=0L;
1376	dbg_parms[2]=0L;
1377	dbg_parms[3]=0L;
1378
1379	if(dbg_namelen > (int)sizeof(dbg_parms))
1380		dbg_namelen = (int)sizeof(dbg_parms);
1381
1382	strncpy((char *)dbg_parms, dbg_nameptr, dbg_namelen);
1383
1384	*arg1=dbg_parms[0];
1385	*arg2=dbg_parms[1];
1386	*arg3=dbg_parms[2];
1387	*arg4=dbg_parms[3];
1388}
1389
1390static void
1391kdbg_resolve_map(thread_t th_act, void *opaque)
1392{
1393	kd_threadmap *mapptr;
1394	krt_t *t = (krt_t *)opaque;
1395
1396	if (t->count < t->maxcount) {
1397		mapptr = &t->map[t->count];
1398		mapptr->thread  = (uintptr_t)thread_tid(th_act);
1399
1400		(void) strlcpy (mapptr->command, t->atts->task_comm,
1401				sizeof(t->atts->task_comm));
1402		/*
1403		 * Some kernel threads have no associated pid.
1404		 * We still need to mark the entry as valid.
1405		 */
1406		if (t->atts->pid)
1407			mapptr->valid = t->atts->pid;
1408		else
1409			mapptr->valid = 1;
1410
1411		t->count++;
1412	}
1413}
1414
1415/*
1416 *
1417 * Writes a cpumap for the given iops_list/cpu_count to the provided buffer.
1418 *
1419 * You may provide a buffer and size, or if you set the buffer to NULL, a
1420 * buffer of sufficient size will be allocated.
1421 *
1422 * If you provide a buffer and it is too small, sets cpumap_size to the number
1423 * of bytes required and returns EINVAL.
1424 *
1425 * On success, if you provided a buffer, cpumap_size is set to the number of
1426 * bytes written. If you did not provide a buffer, cpumap is set to the newly
1427 * allocated buffer and cpumap_size is set to the number of bytes allocated.
1428 *
1429 * NOTE: It may seem redundant to pass both iops and a cpu_count.
1430 *
1431 * We may be reporting data from "now", or from the "past".
1432 *
1433 * The "now" data would be for something like kdbg_readcurcpumap().
1434 * The "past" data would be for kdbg_readcpumap().
1435 *
1436 * If we do not pass both iops and cpu_count, and iops is NULL, this function
1437 * will need to read "now" state to get the number of cpus, which would be in
1438 * error if we were reporting "past" state.
1439 */
1440
1441int
1442kdbg_cpumap_init_internal(kd_iop_t* iops, uint32_t cpu_count, uint8_t** cpumap, uint32_t* cpumap_size)
1443{
1444	assert(cpumap);
1445	assert(cpumap_size);
1446	assert(cpu_count);
1447	assert(!iops || iops->cpu_id + 1 == cpu_count);
1448
1449	uint32_t bytes_needed = sizeof(kd_cpumap_header) + cpu_count * sizeof(kd_cpumap);
1450	uint32_t bytes_available = *cpumap_size;
1451	*cpumap_size = bytes_needed;
1452
1453	if (*cpumap == NULL) {
1454		if (kmem_alloc(kernel_map, (vm_offset_t*)cpumap, (vm_size_t)*cpumap_size) != KERN_SUCCESS) {
1455			return ENOMEM;
1456		}
1457	} else if (bytes_available < bytes_needed) {
1458		return EINVAL;
1459	}
1460
1461	kd_cpumap_header* header = (kd_cpumap_header*)(uintptr_t)*cpumap;
1462
1463	header->version_no = RAW_VERSION1;
1464	header->cpu_count = cpu_count;
1465
1466	kd_cpumap* cpus = (kd_cpumap*)&header[1];
1467
1468	int32_t index = cpu_count - 1;
1469	while (iops) {
1470		cpus[index].cpu_id = iops->cpu_id;
1471		cpus[index].flags = KDBG_CPUMAP_IS_IOP;
1472		bzero(cpus[index].name, sizeof(cpus->name));
1473		strlcpy(cpus[index].name, iops->callback.iop_name, sizeof(cpus->name));
1474
1475		iops = iops->next;
1476		index--;
1477	}
1478
1479	while (index >= 0) {
1480		cpus[index].cpu_id = index;
1481		cpus[index].flags = 0;
1482		bzero(cpus[index].name, sizeof(cpus->name));
1483		strlcpy(cpus[index].name, "AP", sizeof(cpus->name));
1484
1485		index--;
1486	}
1487
1488	return KERN_SUCCESS;
1489}
1490
1491void
1492kdbg_thrmap_init(void)
1493{
1494        if (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT)
1495		return;
1496
1497	kd_mapptr = kdbg_thrmap_init_internal(0, &kd_mapsize, &kd_mapcount);
1498
1499	if (kd_mapptr)
1500		kd_ctrl_page.kdebug_flags |= KDBG_MAPINIT;
1501}
1502
1503
1504kd_threadmap* kdbg_thrmap_init_internal(unsigned int count, unsigned int *mapsize, unsigned int *mapcount)
1505{
1506	kd_threadmap	*mapptr;
1507	struct proc	*p;
1508	struct krt	akrt;
1509	int		tts_count;    /* number of task-to-string structures */
1510	struct tts	*tts_mapptr;
1511	unsigned int	tts_mapsize = 0;
1512	int		i;
1513	vm_offset_t	kaddr;
1514
1515	/*
1516	 * need to use PROC_SCANPROCLIST with proc_iterate
1517	 */
1518	proc_list_lock();
1519
1520	/*
1521	 * Calculate the sizes of map buffers
1522	 */
1523	for (p = allproc.lh_first, *mapcount=0, tts_count=0; p; p = p->p_list.le_next) {
1524		*mapcount += get_task_numacts((task_t)p->task);
1525		tts_count++;
1526	}
1527	proc_list_unlock();
1528
1529	/*
1530	 * The proc count could change during buffer allocation,
1531	 * so introduce a small fudge factor to bump up the
1532	 * buffer sizes. This gives new tasks some chance of
1533	 * making into the tables.  Bump up by 25%.
1534	 */
1535	*mapcount += *mapcount/4;
1536	tts_count += tts_count/4;
1537
1538	*mapsize = *mapcount * sizeof(kd_threadmap);
1539
1540	if (count && count < *mapcount)
1541		return (0);
1542
1543	if ((kmem_alloc(kernel_map, &kaddr, (vm_size_t)*mapsize) == KERN_SUCCESS)) {
1544		bzero((void *)kaddr, *mapsize);
1545		mapptr = (kd_threadmap *)kaddr;
1546	} else
1547		return (0);
1548
1549	tts_mapsize = tts_count * sizeof(struct tts);
1550
1551	if ((kmem_alloc(kernel_map, &kaddr, (vm_size_t)tts_mapsize) == KERN_SUCCESS)) {
1552		bzero((void *)kaddr, tts_mapsize);
1553		tts_mapptr = (struct tts *)kaddr;
1554	} else {
1555		kmem_free(kernel_map, (vm_offset_t)mapptr, *mapsize);
1556
1557		return (0);
1558	}
1559	/*
1560	 * We need to save the procs command string
1561	 * and take a reference for each task associated
1562	 * with a valid process
1563	 */
1564
1565	proc_list_lock();
1566
1567	/*
1568	 * should use proc_iterate
1569	 */
1570	for (p = allproc.lh_first, i=0; p && i < tts_count; p = p->p_list.le_next) {
1571		if (p->p_lflag & P_LEXIT)
1572			continue;
1573
1574		if (p->task) {
1575			task_reference(p->task);
1576			tts_mapptr[i].task = p->task;
1577			tts_mapptr[i].pid  = p->p_pid;
1578			(void)strlcpy(tts_mapptr[i].task_comm, p->p_comm, sizeof(tts_mapptr[i].task_comm));
1579			i++;
1580		}
1581	}
1582	tts_count = i;
1583
1584	proc_list_unlock();
1585
1586	/*
1587	 * Initialize thread map data
1588	 */
1589	akrt.map = mapptr;
1590	akrt.count = 0;
1591	akrt.maxcount = *mapcount;
1592
1593	for (i = 0; i < tts_count; i++) {
1594		akrt.atts = &tts_mapptr[i];
1595		task_act_iterate_wth_args(tts_mapptr[i].task, kdbg_resolve_map, &akrt);
1596		task_deallocate((task_t) tts_mapptr[i].task);
1597	}
1598	kmem_free(kernel_map, (vm_offset_t)tts_mapptr, tts_mapsize);
1599
1600	*mapcount = akrt.count;
1601
1602	return (mapptr);
1603}
1604
1605static void
1606kdbg_clear(void)
1607{
1608        /*
1609	 * Clean up the trace buffer
1610	 * First make sure we're not in
1611	 * the middle of cutting a trace
1612	 */
1613	kdbg_set_tracing_enabled(FALSE, KDEBUG_ENABLE_TRACE);
1614
1615	/*
1616	 * make sure the SLOW_NOLOG is seen
1617	 * by everyone that might be trying
1618	 * to cut a trace..
1619	 */
1620	IOSleep(100);
1621
1622        global_state_pid = -1;
1623	kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
1624	kd_ctrl_page.kdebug_flags &= ~(KDBG_NOWRAP | KDBG_RANGECHECK | KDBG_VALCHECK);
1625	kd_ctrl_page.kdebug_flags &= ~(KDBG_PIDCHECK | KDBG_PIDEXCLUDE);
1626
1627	kdbg_disable_typefilter();
1628
1629	delete_buffers();
1630	nkdbufs	= 0;
1631
1632	/* Clean up the thread map buffer */
1633	kd_ctrl_page.kdebug_flags &= ~KDBG_MAPINIT;
1634	if (kd_mapptr) {
1635		kmem_free(kernel_map, (vm_offset_t)kd_mapptr, kd_mapsize);
1636		kd_mapptr = (kd_threadmap *) 0;
1637	}
1638	kd_mapsize = 0;
1639	kd_mapcount = 0;
1640
1641	RAW_file_offset = 0;
1642	RAW_file_written = 0;
1643}
1644
1645int
1646kdbg_setpid(kd_regtype *kdr)
1647{
1648	pid_t pid;
1649	int flag, ret=0;
1650	struct proc *p;
1651
1652	pid = (pid_t)kdr->value1;
1653	flag = (int)kdr->value2;
1654
1655	if (pid > 0) {
1656		if ((p = proc_find(pid)) == NULL)
1657			ret = ESRCH;
1658		else {
1659			if (flag == 1) {
1660				/*
1661				 * turn on pid check for this and all pids
1662				 */
1663				kd_ctrl_page.kdebug_flags |= KDBG_PIDCHECK;
1664				kd_ctrl_page.kdebug_flags &= ~KDBG_PIDEXCLUDE;
1665				kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
1666
1667				p->p_kdebug = 1;
1668			} else {
1669				/*
1670				 * turn off pid check for this pid value
1671				 * Don't turn off all pid checking though
1672				 *
1673				 * kd_ctrl_page.kdebug_flags &= ~KDBG_PIDCHECK;
1674				 */
1675				p->p_kdebug = 0;
1676			}
1677			proc_rele(p);
1678		}
1679	}
1680	else
1681		ret = EINVAL;
1682
1683	return(ret);
1684}
1685
1686/* This is for pid exclusion in the trace buffer */
1687int
1688kdbg_setpidex(kd_regtype *kdr)
1689{
1690	pid_t pid;
1691	int flag, ret=0;
1692	struct proc *p;
1693
1694	pid = (pid_t)kdr->value1;
1695	flag = (int)kdr->value2;
1696
1697	if (pid > 0) {
1698		if ((p = proc_find(pid)) == NULL)
1699			ret = ESRCH;
1700		else {
1701			if (flag == 1) {
1702				/*
1703				 * turn on pid exclusion
1704				 */
1705				kd_ctrl_page.kdebug_flags |= KDBG_PIDEXCLUDE;
1706				kd_ctrl_page.kdebug_flags &= ~KDBG_PIDCHECK;
1707				kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
1708
1709				p->p_kdebug = 1;
1710			}
1711			else {
1712				/*
1713				 * turn off pid exclusion for this pid value
1714				 * Don't turn off all pid exclusion though
1715				 *
1716				 * kd_ctrl_page.kdebug_flags &= ~KDBG_PIDEXCLUDE;
1717				 */
1718				p->p_kdebug = 0;
1719			}
1720			proc_rele(p);
1721		}
1722	} else
1723		ret = EINVAL;
1724
1725	return(ret);
1726}
1727
1728
1729/*
1730 * This is for setting a maximum decrementer value
1731 */
1732int
1733kdbg_setrtcdec(kd_regtype *kdr)
1734{
1735	int ret = 0;
1736	natural_t decval;
1737
1738	decval = (natural_t)kdr->value1;
1739
1740	if (decval && decval < KDBG_MINRTCDEC)
1741		ret = EINVAL;
1742	else
1743		ret = ENOTSUP;
1744
1745	return(ret);
1746}
1747
1748int
1749kdbg_enable_typefilter(void)
1750{
1751	if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) {
1752		/* free the old filter */
1753		kdbg_disable_typefilter();
1754	}
1755
1756	if (kmem_alloc(kernel_map, (vm_offset_t *)&type_filter_bitmap, KDBG_TYPEFILTER_BITMAP_SIZE) != KERN_SUCCESS) {
1757		return ENOSPC;
1758	}
1759
1760	bzero(type_filter_bitmap, KDBG_TYPEFILTER_BITMAP_SIZE);
1761
1762	/* Turn off range and value checks */
1763	kd_ctrl_page.kdebug_flags &= ~(KDBG_RANGECHECK | KDBG_VALCHECK);
1764
1765	/* Enable filter checking */
1766	kd_ctrl_page.kdebug_flags |= KDBG_TYPEFILTER_CHECK;
1767	kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
1768	return 0;
1769}
1770
1771int
1772kdbg_disable_typefilter(void)
1773{
1774	/*  Disable filter checking */
1775	kd_ctrl_page.kdebug_flags &= ~KDBG_TYPEFILTER_CHECK;
1776
1777	/*  Turn off slow checks unless pid checks are using them */
1778	if ( (kd_ctrl_page.kdebug_flags & (KDBG_PIDCHECK | KDBG_PIDEXCLUDE)) )
1779		kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
1780	else
1781		kdbg_set_flags(SLOW_CHECKS, 0, FALSE);
1782
1783	if(type_filter_bitmap == NULL)
1784		return 0;
1785
1786	vm_offset_t old_bitmap = (vm_offset_t)type_filter_bitmap;
1787	type_filter_bitmap = NULL;
1788
1789	kmem_free(kernel_map, old_bitmap, KDBG_TYPEFILTER_BITMAP_SIZE);
1790	return 0;
1791}
1792
1793int
1794kdbg_setreg(kd_regtype * kdr)
1795{
1796	int ret=0;
1797	unsigned int val_1, val_2, val;
1798	switch (kdr->type) {
1799
1800	case KDBG_CLASSTYPE :
1801		val_1 = (kdr->value1 & 0xff);
1802		val_2 = (kdr->value2 & 0xff);
1803		kdlog_beg = (val_1<<24);
1804		kdlog_end = (val_2<<24);
1805		kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
1806		kd_ctrl_page.kdebug_flags &= ~KDBG_VALCHECK;       /* Turn off specific value check  */
1807		kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_CLASSTYPE);
1808		kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
1809		break;
1810	case KDBG_SUBCLSTYPE :
1811		val_1 = (kdr->value1 & 0xff);
1812		val_2 = (kdr->value2 & 0xff);
1813		val = val_2 + 1;
1814		kdlog_beg = ((val_1<<24) | (val_2 << 16));
1815		kdlog_end = ((val_1<<24) | (val << 16));
1816		kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
1817		kd_ctrl_page.kdebug_flags &= ~KDBG_VALCHECK;       /* Turn off specific value check  */
1818		kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_SUBCLSTYPE);
1819		kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
1820		break;
1821	case KDBG_RANGETYPE :
1822		kdlog_beg = (kdr->value1);
1823		kdlog_end = (kdr->value2);
1824		kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
1825		kd_ctrl_page.kdebug_flags &= ~KDBG_VALCHECK;       /* Turn off specific value check  */
1826		kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_RANGETYPE);
1827		kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
1828		break;
1829	case KDBG_VALCHECK:
1830		kdlog_value1 = (kdr->value1);
1831		kdlog_value2 = (kdr->value2);
1832		kdlog_value3 = (kdr->value3);
1833		kdlog_value4 = (kdr->value4);
1834		kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
1835		kd_ctrl_page.kdebug_flags &= ~KDBG_RANGECHECK;    /* Turn off range check */
1836		kd_ctrl_page.kdebug_flags |= KDBG_VALCHECK;       /* Turn on specific value check  */
1837		kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
1838		break;
1839	case KDBG_TYPENONE :
1840		kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
1841
1842		if ( (kd_ctrl_page.kdebug_flags & (KDBG_RANGECHECK | KDBG_VALCHECK   |
1843						   KDBG_PIDCHECK   | KDBG_PIDEXCLUDE |
1844						   KDBG_TYPEFILTER_CHECK)) )
1845			kdbg_set_flags(SLOW_CHECKS, 0, TRUE);
1846		else
1847			kdbg_set_flags(SLOW_CHECKS, 0, FALSE);
1848
1849		kdlog_beg = 0;
1850		kdlog_end = 0;
1851		break;
1852	default :
1853		ret = EINVAL;
1854		break;
1855	}
1856	return(ret);
1857}
1858
1859int
1860kdbg_getreg(__unused kd_regtype * kdr)
1861{
1862#if 0
1863	int i,j, ret=0;
1864	unsigned int val_1, val_2, val;
1865
1866	switch (kdr->type) {
1867	case KDBG_CLASSTYPE :
1868		val_1 = (kdr->value1 & 0xff);
1869		val_2 = val_1 + 1;
1870		kdlog_beg = (val_1<<24);
1871		kdlog_end = (val_2<<24);
1872		kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
1873		kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_CLASSTYPE);
1874		break;
1875	case KDBG_SUBCLSTYPE :
1876		val_1 = (kdr->value1 & 0xff);
1877		val_2 = (kdr->value2 & 0xff);
1878		val = val_2 + 1;
1879		kdlog_beg = ((val_1<<24) | (val_2 << 16));
1880		kdlog_end = ((val_1<<24) | (val << 16));
1881		kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
1882		kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_SUBCLSTYPE);
1883		break;
1884	case KDBG_RANGETYPE :
1885		kdlog_beg = (kdr->value1);
1886		kdlog_end = (kdr->value2);
1887		kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
1888		kd_ctrl_page.kdebug_flags |= (KDBG_RANGECHECK | KDBG_RANGETYPE);
1889		break;
1890	case KDBG_TYPENONE :
1891		kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES;
1892		kdlog_beg = 0;
1893		kdlog_end = 0;
1894		break;
1895	default :
1896		ret = EINVAL;
1897		break;
1898	}
1899#endif /* 0 */
1900	return(EINVAL);
1901}
1902
1903int
1904kdbg_readcpumap(user_addr_t user_cpumap, size_t *user_cpumap_size)
1905{
1906	uint8_t* cpumap = NULL;
1907	uint32_t cpumap_size = 0;
1908	int ret = KERN_SUCCESS;
1909
1910	if (kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) {
1911		if (kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, &cpumap, &cpumap_size) == KERN_SUCCESS) {
1912			if (user_cpumap) {
1913				size_t bytes_to_copy = (*user_cpumap_size >= cpumap_size) ? cpumap_size : *user_cpumap_size;
1914				if (copyout(cpumap, user_cpumap, (size_t)bytes_to_copy)) {
1915					ret = EFAULT;
1916				}
1917			}
1918			*user_cpumap_size = cpumap_size;
1919			kmem_free(kernel_map, (vm_offset_t)cpumap, cpumap_size);
1920		} else
1921			ret = EINVAL;
1922	} else
1923		ret = EINVAL;
1924
1925	return (ret);
1926}
1927
1928int
1929kdbg_readcurthrmap(user_addr_t buffer, size_t *bufsize)
1930{
1931	kd_threadmap *mapptr;
1932	unsigned int mapsize;
1933	unsigned int mapcount;
1934	unsigned int count = 0;
1935	int ret = 0;
1936
1937	count = *bufsize/sizeof(kd_threadmap);
1938	*bufsize = 0;
1939
1940	if ( (mapptr = kdbg_thrmap_init_internal(count, &mapsize, &mapcount)) ) {
1941		if (copyout(mapptr, buffer, mapcount * sizeof(kd_threadmap)))
1942			ret = EFAULT;
1943		else
1944			*bufsize = (mapcount * sizeof(kd_threadmap));
1945
1946		kmem_free(kernel_map, (vm_offset_t)mapptr, mapsize);
1947	} else
1948		ret = EINVAL;
1949
1950	return (ret);
1951}
1952
1953int
1954kdbg_readthrmap(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx)
1955{
1956	int avail = *number;
1957	int ret = 0;
1958	uint32_t count = 0;
1959	unsigned int mapsize;
1960
1961	count = avail/sizeof (kd_threadmap);
1962
1963	mapsize = kd_mapcount * sizeof(kd_threadmap);
1964
1965	if (count && (count <= kd_mapcount))
1966	{
1967		if ((kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr)
1968		{
1969			if (*number < mapsize)
1970				ret = EINVAL;
1971			else
1972			{
1973				if (vp)
1974				{
1975					RAW_header	header;
1976					clock_sec_t	secs;
1977					clock_usec_t	usecs;
1978					char	*pad_buf;
1979					uint32_t pad_size;
1980					uint32_t extra_thread_count = 0;
1981					uint32_t cpumap_size;
1982
1983					/*
1984					 * To write a RAW_VERSION1+ file, we
1985					 * must embed a cpumap in the "padding"
1986					 * used to page align the events folloing
1987					 * the threadmap. If the threadmap happens
1988					 * to not require enough padding, we
1989					 * artificially increase its footprint
1990					 * until it needs enough padding.
1991					 */
1992
1993					pad_size = PAGE_SIZE - ((sizeof(RAW_header) + (count * sizeof(kd_threadmap))) & PAGE_MASK_64);
1994					cpumap_size = sizeof(kd_cpumap_header) + kd_ctrl_page.kdebug_cpus * sizeof(kd_cpumap);
1995
1996					if (cpumap_size > pad_size) {
1997						/* Force an overflow onto the next page, we get a full page of padding */
1998						extra_thread_count = (pad_size / sizeof(kd_threadmap)) + 1;
1999					}
2000
2001					header.version_no = RAW_VERSION1;
2002					header.thread_count = count + extra_thread_count;
2003
2004					clock_get_calendar_microtime(&secs, &usecs);
2005					header.TOD_secs = secs;
2006					header.TOD_usecs = usecs;
2007
2008					ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)&header, sizeof(RAW_header), RAW_file_offset,
2009						      UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
2010					if (ret)
2011						goto write_error;
2012					RAW_file_offset += sizeof(RAW_header);
2013
2014					ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)kd_mapptr, mapsize, RAW_file_offset,
2015						      UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
2016					if (ret)
2017						goto write_error;
2018					RAW_file_offset += mapsize;
2019
2020					if (extra_thread_count) {
2021						pad_size = extra_thread_count * sizeof(kd_threadmap);
2022						pad_buf = (char *)kalloc(pad_size);
2023						memset(pad_buf, 0, pad_size);
2024
2025						ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)pad_buf, pad_size, RAW_file_offset,
2026							      UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
2027						kfree(pad_buf, pad_size);
2028
2029						if (ret)
2030							goto write_error;
2031						RAW_file_offset += pad_size;
2032
2033					}
2034
2035					pad_size = PAGE_SIZE - (RAW_file_offset & PAGE_MASK_64);
2036					if (pad_size) {
2037						pad_buf = (char *)kalloc(pad_size);
2038						memset(pad_buf, 0, pad_size);
2039
2040						/*
2041						 * embed a cpumap in the padding bytes.
2042						 * older code will skip this.
2043						 * newer code will know how to read it.
2044						 */
2045						uint32_t temp = pad_size;
2046						if (kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, (uint8_t**)&pad_buf, &temp) != KERN_SUCCESS) {
2047							memset(pad_buf, 0, pad_size);
2048						}
2049
2050						ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)pad_buf, pad_size, RAW_file_offset,
2051							      UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
2052						kfree(pad_buf, pad_size);
2053
2054						if (ret)
2055							goto write_error;
2056						RAW_file_offset += pad_size;
2057					}
2058					RAW_file_written += sizeof(RAW_header) + mapsize + pad_size;
2059
2060				} else {
2061					if (copyout(kd_mapptr, buffer, mapsize))
2062						ret = EINVAL;
2063				}
2064			}
2065		}
2066		else
2067			ret = EINVAL;
2068	}
2069	else
2070		ret = EINVAL;
2071
2072	if (ret && vp)
2073	{
2074		count = 0;
2075
2076		vn_rdwr(UIO_WRITE, vp, (caddr_t)&count, sizeof(uint32_t), RAW_file_offset,
2077			UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
2078		RAW_file_offset += sizeof(uint32_t);
2079		RAW_file_written += sizeof(uint32_t);
2080	}
2081write_error:
2082	if ((kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr)
2083	{
2084		kmem_free(kernel_map, (vm_offset_t)kd_mapptr, kd_mapsize);
2085		kd_ctrl_page.kdebug_flags &= ~KDBG_MAPINIT;
2086		kd_mapsize = 0;
2087		kd_mapptr = (kd_threadmap *) 0;
2088		kd_mapcount = 0;
2089	}
2090	return(ret);
2091}
2092
2093
2094static int
2095kdbg_set_nkdbufs(unsigned int value)
2096{
2097        /*
2098	 * We allow a maximum buffer size of 50% of either ram or max mapped address, whichever is smaller
2099	 * 'value' is the desired number of trace entries
2100	 */
2101        unsigned int max_entries = (sane_size/2) / sizeof(kd_buf);
2102
2103	if (value <= max_entries)
2104		return (value);
2105	else
2106		return (max_entries);
2107}
2108
2109
2110static int
2111kdbg_enable_bg_trace(void)
2112{
2113	int ret = 0;
2114
2115	if (kdlog_bg_trace == TRUE && kdlog_bg_trace_running == FALSE && n_storage_buffers == 0) {
2116		nkdbufs = bg_nkdbufs;
2117		ret = kdbg_reinit(FALSE);
2118		if (0 == ret) {
2119			kdbg_set_tracing_enabled(TRUE, KDEBUG_ENABLE_TRACE);
2120			kdlog_bg_trace_running = TRUE;
2121		}
2122	}
2123	return ret;
2124}
2125
2126static void
2127kdbg_disable_bg_trace(void)
2128{
2129	if (kdlog_bg_trace_running == TRUE) {
2130		kdlog_bg_trace_running = FALSE;
2131		kdbg_clear();
2132	}
2133}
2134
2135
2136
2137/*
2138 * This function is provided for the CHUD toolkit only.
2139 *    int val:
2140 *        zero disables kdebug_chudhook function call
2141 *        non-zero enables kdebug_chudhook function call
2142 *    char *fn:
2143 *        address of the enabled kdebug_chudhook function
2144*/
2145
2146void
2147kdbg_control_chud(int val, void *fn)
2148{
2149	kdbg_lock_init();
2150
2151	if (val) {
2152		/* enable chudhook */
2153		kdebug_chudhook = fn;
2154		kdbg_set_flags(SLOW_CHUD, KDEBUG_ENABLE_CHUD, TRUE);
2155	}
2156	else {
2157		/* disable chudhook */
2158		kdbg_set_flags(SLOW_CHUD, KDEBUG_ENABLE_CHUD, FALSE);
2159		kdebug_chudhook = 0;
2160	}
2161}
2162
2163
2164int
2165kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep)
2166{
2167	int ret = 0;
2168	size_t size = *sizep;
2169	unsigned int value = 0;
2170	kd_regtype kd_Reg;
2171	kbufinfo_t kd_bufinfo;
2172	pid_t curpid;
2173	proc_t p, curproc;
2174
2175	if (name[0] == KERN_KDGETENTROPY ||
2176		name[0] == KERN_KDWRITETR ||
2177		name[0] == KERN_KDWRITEMAP ||
2178		name[0] == KERN_KDEFLAGS ||
2179		name[0] == KERN_KDDFLAGS ||
2180		name[0] == KERN_KDENABLE ||
2181		name[0] == KERN_KDENABLE_BG_TRACE ||
2182		name[0] == KERN_KDSETBUF) {
2183
2184		if ( namelen < 2 )
2185			return(EINVAL);
2186		value = name[1];
2187	}
2188
2189	kdbg_lock_init();
2190
2191	if ( !(kd_ctrl_page.kdebug_flags & KDBG_LOCKINIT))
2192		return(ENOSPC);
2193
2194	lck_mtx_lock(kd_trace_mtx_sysctl);
2195
2196	switch(name[0]) {
2197		case KERN_KDGETBUF:
2198			/*
2199			 * Does not alter the global_state_pid
2200			 * This is a passive request.
2201			 */
2202			if (size < sizeof(kd_bufinfo.nkdbufs)) {
2203				/*
2204				 * There is not enough room to return even
2205				 * the first element of the info structure.
2206				 */
2207				ret = EINVAL;
2208				goto out;
2209			}
2210			kd_bufinfo.nkdbufs = nkdbufs;
2211			kd_bufinfo.nkdthreads = kd_mapcount;
2212
2213			if ( (kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) )
2214				kd_bufinfo.nolog = 1;
2215			else
2216				kd_bufinfo.nolog = 0;
2217
2218			kd_bufinfo.flags = kd_ctrl_page.kdebug_flags;
2219#if defined(__LP64__)
2220			kd_bufinfo.flags |= KDBG_LP64;
2221#endif
2222			kd_bufinfo.bufid = global_state_pid;
2223
2224			if (size >= sizeof(kd_bufinfo)) {
2225				/*
2226				 * Provide all the info we have
2227				 */
2228				if (copyout(&kd_bufinfo, where, sizeof(kd_bufinfo)))
2229					ret = EINVAL;
2230			} else {
2231				/*
2232				 * For backwards compatibility, only provide
2233				 * as much info as there is room for.
2234				 */
2235				if (copyout(&kd_bufinfo, where, size))
2236					ret = EINVAL;
2237			}
2238			goto out;
2239
2240		case KERN_KDGETENTROPY: {
2241			/* Obsolescent - just fake with a random buffer */
2242			char	*buffer = (char *) kalloc(size);
2243			read_frandom((void *) buffer, size);
2244			ret = copyout(buffer, where, size);
2245			kfree(buffer, size);
2246			goto out;
2247		}
2248
2249		case KERN_KDENABLE_BG_TRACE:
2250			bg_nkdbufs = kdbg_set_nkdbufs(value);
2251			kdlog_bg_trace = TRUE;
2252			ret = kdbg_enable_bg_trace();
2253			goto out;
2254
2255		case KERN_KDDISABLE_BG_TRACE:
2256			kdlog_bg_trace = FALSE;
2257			kdbg_disable_bg_trace();
2258			goto out;
2259	}
2260
2261	if ((curproc = current_proc()) != NULL)
2262		curpid = curproc->p_pid;
2263	else {
2264		ret = ESRCH;
2265		goto out;
2266	}
2267	if (global_state_pid == -1)
2268		global_state_pid = curpid;
2269	else if (global_state_pid != curpid) {
2270		if ((p = proc_find(global_state_pid)) == NULL) {
2271			/*
2272			 * The global pid no longer exists
2273			 */
2274			global_state_pid = curpid;
2275		} else {
2276			/*
2277			 * The global pid exists, deny this request
2278			 */
2279			proc_rele(p);
2280
2281			ret = EBUSY;
2282			goto out;
2283		}
2284	}
2285
2286	switch(name[0]) {
2287		case KERN_KDEFLAGS:
2288			kdbg_disable_bg_trace();
2289
2290			value &= KDBG_USERFLAGS;
2291			kd_ctrl_page.kdebug_flags |= value;
2292			break;
2293		case KERN_KDDFLAGS:
2294			kdbg_disable_bg_trace();
2295
2296			value &= KDBG_USERFLAGS;
2297			kd_ctrl_page.kdebug_flags &= ~value;
2298			break;
2299		case KERN_KDENABLE:
2300			/*
2301			 * Enable tracing mechanism.  Two types:
2302			 * KDEBUG_TRACE is the standard one,
2303			 * and KDEBUG_PPT which is a carefully
2304			 * chosen subset to avoid performance impact.
2305			 */
2306			if (value) {
2307				/*
2308				 * enable only if buffer is initialized
2309				 */
2310				if (!(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) ||
2311				    !(value == KDEBUG_ENABLE_TRACE || value == KDEBUG_ENABLE_PPT)) {
2312					ret = EINVAL;
2313					break;
2314				}
2315				kdbg_thrmap_init();
2316
2317				kdbg_set_tracing_enabled(TRUE, value);
2318			}
2319			else
2320			{
2321				kdbg_set_tracing_enabled(FALSE, 0);
2322			}
2323			break;
2324		case KERN_KDSETBUF:
2325			kdbg_disable_bg_trace();
2326
2327			nkdbufs = kdbg_set_nkdbufs(value);
2328			break;
2329		case KERN_KDSETUP:
2330			kdbg_disable_bg_trace();
2331
2332			ret = kdbg_reinit(FALSE);
2333			break;
2334		case KERN_KDREMOVE:
2335			kdbg_clear();
2336			ret = kdbg_enable_bg_trace();
2337			break;
2338		case KERN_KDSETREG:
2339			if(size < sizeof(kd_regtype)) {
2340				ret = EINVAL;
2341				break;
2342			}
2343			if (copyin(where, &kd_Reg, sizeof(kd_regtype))) {
2344				ret = EINVAL;
2345				break;
2346			}
2347			kdbg_disable_bg_trace();
2348
2349			ret = kdbg_setreg(&kd_Reg);
2350			break;
2351		case KERN_KDGETREG:
2352			if (size < sizeof(kd_regtype)) {
2353				ret = EINVAL;
2354				break;
2355			}
2356			ret = kdbg_getreg(&kd_Reg);
2357			if (copyout(&kd_Reg, where, sizeof(kd_regtype))) {
2358				ret = EINVAL;
2359			}
2360			kdbg_disable_bg_trace();
2361
2362			break;
2363		case KERN_KDREADTR:
2364			ret = kdbg_read(where, sizep, NULL, NULL);
2365			break;
2366		case KERN_KDWRITETR:
2367		case KERN_KDWRITEMAP:
2368		{
2369			struct	vfs_context context;
2370			struct	fileproc *fp;
2371			size_t	number;
2372			vnode_t	vp;
2373			int	fd;
2374
2375			kdbg_disable_bg_trace();
2376
2377			if (name[0] == KERN_KDWRITETR) {
2378				int s;
2379				int wait_result = THREAD_AWAKENED;
2380				u_int64_t abstime;
2381				u_int64_t ns;
2382
2383				if (*sizep) {
2384					ns = ((u_int64_t)*sizep) * (u_int64_t)(1000 * 1000);
2385					nanoseconds_to_absolutetime(ns,  &abstime );
2386					clock_absolutetime_interval_to_deadline( abstime, &abstime );
2387				} else
2388					abstime = 0;
2389
2390				s = ml_set_interrupts_enabled(FALSE);
2391				lck_spin_lock(kdw_spin_lock);
2392
2393				while (wait_result == THREAD_AWAKENED && kd_ctrl_page.kds_inuse_count < n_storage_threshold) {
2394
2395					kds_waiter = 1;
2396
2397					if (abstime)
2398						wait_result = lck_spin_sleep_deadline(kdw_spin_lock, 0, &kds_waiter, THREAD_ABORTSAFE, abstime);
2399					else
2400						wait_result = lck_spin_sleep(kdw_spin_lock, 0, &kds_waiter, THREAD_ABORTSAFE);
2401
2402					kds_waiter = 0;
2403				}
2404				lck_spin_unlock(kdw_spin_lock);
2405				ml_set_interrupts_enabled(s);
2406			}
2407			p = current_proc();
2408			fd = value;
2409
2410			proc_fdlock(p);
2411			if ( (ret = fp_lookup(p, fd, &fp, 1)) ) {
2412				proc_fdunlock(p);
2413				break;
2414			}
2415			context.vc_thread = current_thread();
2416			context.vc_ucred = fp->f_fglob->fg_cred;
2417
2418			if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) {
2419				fp_drop(p, fd, fp, 1);
2420				proc_fdunlock(p);
2421
2422				ret = EBADF;
2423				break;
2424			}
2425			vp = (struct vnode *)fp->f_fglob->fg_data;
2426			proc_fdunlock(p);
2427
2428			if ((ret = vnode_getwithref(vp)) == 0) {
2429				RAW_file_offset = fp->f_fglob->fg_offset;
2430				if (name[0] == KERN_KDWRITETR) {
2431					number = nkdbufs * sizeof(kd_buf);
2432
2433					KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_INFO, 3)) | DBG_FUNC_START, 0, 0, 0, 0, 0);
2434					ret = kdbg_read(0, &number, vp, &context);
2435					KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_INFO, 3)) | DBG_FUNC_END, number, 0, 0, 0, 0);
2436
2437					*sizep = number;
2438				} else {
2439					number = kd_mapcount * sizeof(kd_threadmap);
2440					kdbg_readthrmap(0, &number, vp, &context);
2441				}
2442				fp->f_fglob->fg_offset = RAW_file_offset;
2443				vnode_put(vp);
2444			}
2445			fp_drop(p, fd, fp, 0);
2446
2447			break;
2448		}
2449		case KERN_KDBUFWAIT:
2450		{
2451			/* WRITETR lite -- just block until there's data */
2452			int s;
2453			int wait_result = THREAD_AWAKENED;
2454			u_int64_t abstime;
2455			u_int64_t ns;
2456			size_t	number = 0;
2457
2458			kdbg_disable_bg_trace();
2459
2460
2461			if (*sizep) {
2462				ns = ((u_int64_t)*sizep) * (u_int64_t)(1000 * 1000);
2463				nanoseconds_to_absolutetime(ns,  &abstime );
2464				clock_absolutetime_interval_to_deadline( abstime, &abstime );
2465			} else
2466				abstime = 0;
2467
2468			s = ml_set_interrupts_enabled(FALSE);
2469			if( !s )
2470				panic("trying to wait with interrupts off");
2471			lck_spin_lock(kdw_spin_lock);
2472
2473			/* drop the mutex so don't exclude others from
2474			 * accessing trace
2475			 */
2476			lck_mtx_unlock(kd_trace_mtx_sysctl);
2477
2478			while (wait_result == THREAD_AWAKENED &&
2479				kd_ctrl_page.kds_inuse_count < n_storage_threshold) {
2480
2481				kds_waiter = 1;
2482
2483				if (abstime)
2484					wait_result = lck_spin_sleep_deadline(kdw_spin_lock, 0, &kds_waiter, THREAD_ABORTSAFE, abstime);
2485				else
2486					wait_result = lck_spin_sleep(kdw_spin_lock, 0, &kds_waiter, THREAD_ABORTSAFE);
2487
2488				kds_waiter = 0;
2489			}
2490
2491			/* check the count under the spinlock */
2492			number = (kd_ctrl_page.kds_inuse_count >= n_storage_threshold);
2493
2494			lck_spin_unlock(kdw_spin_lock);
2495			ml_set_interrupts_enabled(s);
2496
2497			/* pick the mutex back up again */
2498			lck_mtx_lock(kd_trace_mtx_sysctl);
2499
2500			/* write out whether we've exceeded the threshold */
2501			*sizep = number;
2502			break;
2503		}
2504		case KERN_KDPIDTR:
2505			if (size < sizeof(kd_regtype)) {
2506				ret = EINVAL;
2507				break;
2508			}
2509			if (copyin(where, &kd_Reg, sizeof(kd_regtype))) {
2510				ret = EINVAL;
2511				break;
2512			}
2513			kdbg_disable_bg_trace();
2514
2515			ret = kdbg_setpid(&kd_Reg);
2516			break;
2517		case KERN_KDPIDEX:
2518			if (size < sizeof(kd_regtype)) {
2519				ret = EINVAL;
2520				break;
2521			}
2522			if (copyin(where, &kd_Reg, sizeof(kd_regtype))) {
2523				ret = EINVAL;
2524				break;
2525			}
2526			kdbg_disable_bg_trace();
2527
2528			ret = kdbg_setpidex(&kd_Reg);
2529			break;
2530		case KERN_KDCPUMAP:
2531			ret = kdbg_readcpumap(where, sizep);
2532			break;
2533		case KERN_KDTHRMAP:
2534			ret = kdbg_readthrmap(where, sizep, NULL, NULL);
2535			break;
2536		case KERN_KDREADCURTHRMAP:
2537			ret = kdbg_readcurthrmap(where, sizep);
2538			break;
2539		case KERN_KDSETRTCDEC:
2540			if (size < sizeof(kd_regtype)) {
2541				ret = EINVAL;
2542				break;
2543			}
2544			if (copyin(where, &kd_Reg, sizeof(kd_regtype))) {
2545				ret = EINVAL;
2546				break;
2547			}
2548			kdbg_disable_bg_trace();
2549
2550			ret = kdbg_setrtcdec(&kd_Reg);
2551			break;
2552		case KERN_KDSET_TYPEFILTER:
2553			kdbg_disable_bg_trace();
2554
2555			if ((kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) == 0){
2556				if ((ret = kdbg_enable_typefilter()))
2557					break;
2558			}
2559
2560			if (size != KDBG_TYPEFILTER_BITMAP_SIZE) {
2561				ret = EINVAL;
2562				break;
2563			}
2564
2565			if (copyin(where, type_filter_bitmap, KDBG_TYPEFILTER_BITMAP_SIZE)) {
2566				ret = EINVAL;
2567				break;
2568			}
2569			kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_TYPEFILTER_CHANGED, type_filter_bitmap);
2570			break;
2571		default:
2572			ret = EINVAL;
2573	}
2574out:
2575	lck_mtx_unlock(kd_trace_mtx_sysctl);
2576
2577	return(ret);
2578}
2579
2580
2581/*
2582 * This code can run for the most part concurrently with kernel_debug_internal()...
2583 * 'release_storage_unit' will take the kds_spin_lock which may cause us to briefly
2584 * synchronize with the recording side of this puzzle... otherwise, we are able to
2585 * move through the lists w/o use of any locks
2586 */
2587int
2588kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx)
2589{
2590	unsigned int count;
2591	unsigned int cpu, min_cpu;
2592	uint64_t  mintime, t, barrier = 0;
2593	int error = 0;
2594	kd_buf *tempbuf;
2595	uint32_t rcursor;
2596	kd_buf lostevent;
2597	union kds_ptr kdsp;
2598	struct kd_storage *kdsp_actual;
2599	struct kd_bufinfo *kdbp;
2600	struct kd_bufinfo *min_kdbp;
2601	uint32_t tempbuf_count;
2602	uint32_t tempbuf_number;
2603	uint32_t old_kdebug_flags;
2604	uint32_t old_kdebug_slowcheck;
2605	boolean_t lostevents = FALSE;
2606	boolean_t out_of_events = FALSE;
2607
2608	count = *number/sizeof(kd_buf);
2609	*number = 0;
2610
2611	if (count == 0 || !(kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) || kdcopybuf == 0)
2612		return EINVAL;
2613
2614	memset(&lostevent, 0, sizeof(lostevent));
2615	lostevent.debugid = TRACEDBG_CODE(DBG_TRACE_INFO, 2);
2616
2617	/* Capture timestamp. Only sort events that have occured before the timestamp.
2618	 * Since the iop is being flushed here, its possible that events occur on the AP
2619	 * while running live tracing. If we are disabled, no new events should
2620	 * occur on the AP.
2621	*/
2622
2623	if (kd_ctrl_page.enabled)
2624	{
2625		// timestamp is non-zero value
2626		barrier = mach_absolute_time() & KDBG_TIMESTAMP_MASK;
2627	}
2628
2629	// Request each IOP to provide us with up to date entries before merging buffers together.
2630	kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_SYNC_FLUSH, NULL);
2631
2632	/*
2633	 * because we hold kd_trace_mtx_sysctl, no other control threads can
2634	 * be playing with kdebug_flags... the code that cuts new events could
2635	 * be running, but it grabs kds_spin_lock if it needs to acquire a new
2636	 * storage chunk which is where it examines kdebug_flags... it its adding
2637	 * to the same chunk we're reading from, no problem...
2638	 */
2639
2640	disable_wrap(&old_kdebug_slowcheck, &old_kdebug_flags);
2641
2642	if (count > nkdbufs)
2643		count = nkdbufs;
2644
2645	if ((tempbuf_count = count) > KDCOPYBUF_COUNT)
2646	        tempbuf_count = KDCOPYBUF_COUNT;
2647
2648	while (count) {
2649		tempbuf = kdcopybuf;
2650		tempbuf_number = 0;
2651
2652		// While space
2653		while (tempbuf_count) {
2654			mintime = 0xffffffffffffffffULL;
2655			min_kdbp = NULL;
2656			min_cpu = 0;
2657
2658			// Check all CPUs
2659			for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page.kdebug_cpus; cpu++, kdbp++) {
2660
2661				// Find one with raw data
2662				if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL)
2663				        continue;
2664				/* Debugging aid: maintain a copy of the "kdsp"
2665				 * index.
2666				 */
2667				volatile union kds_ptr kdsp_shadow;
2668
2669				kdsp_shadow = kdsp;
2670
2671				// Get from cpu data to buffer header to buffer
2672				kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
2673
2674				volatile struct kd_storage *kdsp_actual_shadow;
2675
2676				kdsp_actual_shadow = kdsp_actual;
2677
2678				// See if there are actual data left in this buffer
2679				rcursor = kdsp_actual->kds_readlast;
2680
2681				if (rcursor == kdsp_actual->kds_bufindx)
2682					continue;
2683
2684				t = kdbg_get_timestamp(&kdsp_actual->kds_records[rcursor]);
2685
2686				if ((t > barrier) && (barrier > 0)) {
2687					/*
2688					 * Need to wait to flush iop again before we
2689					 * sort any more data from the buffers
2690					*/
2691					out_of_events = TRUE;
2692					break;
2693				}
2694				if (t < kdsp_actual->kds_timestamp) {
2695					/*
2696					 * indicates we've not yet completed filling
2697					 * in this event...
2698					 * this should only occur when we're looking
2699					 * at the buf that the record head is utilizing
2700					 * we'll pick these events up on the next
2701					 * call to kdbg_read
2702					 * we bail at this point so that we don't
2703					 * get an out-of-order timestream by continuing
2704					 * to read events from the other CPUs' timestream(s)
2705					 */
2706					out_of_events = TRUE;
2707					break;
2708				}
2709				if (t < mintime) {
2710				        mintime = t;
2711					min_kdbp = kdbp;
2712					min_cpu = cpu;
2713				}
2714			}
2715			if (min_kdbp == NULL || out_of_events == TRUE) {
2716				/*
2717				 * all buffers ran empty
2718				 */
2719				out_of_events = TRUE;
2720				break;
2721			}
2722
2723			// Get data
2724			kdsp = min_kdbp->kd_list_head;
2725			kdsp_actual = POINTER_FROM_KDS_PTR(kdsp);
2726
2727			if (kdsp_actual->kds_lostevents == TRUE) {
2728				kdbg_set_timestamp_and_cpu(&lostevent, kdsp_actual->kds_records[kdsp_actual->kds_readlast].timestamp, min_cpu);
2729				*tempbuf = lostevent;
2730
2731				kdsp_actual->kds_lostevents = FALSE;
2732				lostevents = TRUE;
2733
2734				goto nextevent;
2735			}
2736
2737			// Copy into buffer
2738			*tempbuf = kdsp_actual->kds_records[kdsp_actual->kds_readlast++];
2739
2740			if (kdsp_actual->kds_readlast == EVENTS_PER_STORAGE_UNIT)
2741				release_storage_unit(min_cpu, kdsp.raw);
2742
2743			/*
2744			 * Watch for out of order timestamps
2745			 */
2746			if (mintime < min_kdbp->kd_prev_timebase) {
2747				/*
2748				 * if so, use the previous timestamp + 1 cycle
2749				 */
2750				min_kdbp->kd_prev_timebase++;
2751				kdbg_set_timestamp_and_cpu(tempbuf, min_kdbp->kd_prev_timebase, kdbg_get_cpu(tempbuf));
2752			} else
2753				min_kdbp->kd_prev_timebase = mintime;
2754nextevent:
2755			tempbuf_count--;
2756			tempbuf_number++;
2757			tempbuf++;
2758
2759			if ((RAW_file_written += sizeof(kd_buf)) >= RAW_FLUSH_SIZE)
2760				break;
2761		}
2762		if (tempbuf_number) {
2763
2764			if (vp) {
2765				error = vn_rdwr(UIO_WRITE, vp, (caddr_t)kdcopybuf, tempbuf_number * sizeof(kd_buf), RAW_file_offset,
2766						UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
2767
2768				RAW_file_offset += (tempbuf_number * sizeof(kd_buf));
2769
2770				if (RAW_file_written >= RAW_FLUSH_SIZE) {
2771					cluster_push(vp, 0);
2772
2773					RAW_file_written = 0;
2774				}
2775			} else {
2776				error = copyout(kdcopybuf, buffer, tempbuf_number * sizeof(kd_buf));
2777				buffer += (tempbuf_number * sizeof(kd_buf));
2778			}
2779			if (error) {
2780				*number = 0;
2781				error = EINVAL;
2782				break;
2783			}
2784			count   -= tempbuf_number;
2785			*number += tempbuf_number;
2786		}
2787		if (out_of_events == TRUE)
2788		       /*
2789			* all trace buffers are empty
2790			*/
2791		        break;
2792
2793		if ((tempbuf_count = count) > KDCOPYBUF_COUNT)
2794		        tempbuf_count = KDCOPYBUF_COUNT;
2795	}
2796	if ( !(old_kdebug_flags & KDBG_NOWRAP)) {
2797		enable_wrap(old_kdebug_slowcheck, lostevents);
2798	}
2799	return (error);
2800}
2801
2802
2803unsigned char *getProcName(struct proc *proc);
2804unsigned char *getProcName(struct proc *proc) {
2805
2806	return (unsigned char *) &proc->p_comm;	/* Return pointer to the proc name */
2807
2808}
2809
2810#define STACKSHOT_SUBSYS_LOCK() lck_mtx_lock(&stackshot_subsys_mutex)
2811#define STACKSHOT_SUBSYS_UNLOCK() lck_mtx_unlock(&stackshot_subsys_mutex)
2812#if defined(__i386__) || defined (__x86_64__)
2813#define TRAP_DEBUGGER __asm__ volatile("int3");
2814#else
2815#error No TRAP_DEBUGGER definition for this architecture
2816#endif
2817
2818#define SANE_TRACEBUF_SIZE (8 * 1024 * 1024)
2819#define SANE_BOOTPROFILE_TRACEBUF_SIZE (64 * 1024 * 1024)
2820
2821/* Initialize the mutex governing access to the stack snapshot subsystem */
2822__private_extern__ void
2823stackshot_lock_init( void )
2824{
2825	stackshot_subsys_lck_grp_attr = lck_grp_attr_alloc_init();
2826
2827	stackshot_subsys_lck_grp = lck_grp_alloc_init("stackshot_subsys_lock", stackshot_subsys_lck_grp_attr);
2828
2829	stackshot_subsys_lck_attr = lck_attr_alloc_init();
2830
2831	lck_mtx_init(&stackshot_subsys_mutex, stackshot_subsys_lck_grp, stackshot_subsys_lck_attr);
2832}
2833
2834/*
2835 * stack_snapshot:   Obtains a coherent set of stack traces for all threads
2836 *		     on the system, tracing both kernel and user stacks
2837 *		     where available. Uses machine specific trace routines
2838 *		     for ppc, ppc64 and x86.
2839 * Inputs:	     uap->pid - process id of process to be traced, or -1
2840 *		     for the entire system
2841 *		     uap->tracebuf - address of the user space destination
2842 *		     buffer
2843 *		     uap->tracebuf_size - size of the user space trace buffer
2844 *		     uap->options - various options, including the maximum
2845 *		     number of frames to trace.
2846 * Outputs:	     EPERM if the caller is not privileged
2847 *		     EINVAL if the supplied trace buffer isn't sanely sized
2848 *		     ENOMEM if we don't have enough memory to satisfy the
2849 *		     request
2850 *		     ENOENT if the target pid isn't found
2851 *		     ENOSPC if the supplied buffer is insufficient
2852 *		     *retval contains the number of bytes traced, if successful
2853 *		     and -1 otherwise. If the request failed due to
2854 *		     tracebuffer exhaustion, we copyout as much as possible.
2855 */
2856int
2857stack_snapshot(struct proc *p, register struct stack_snapshot_args *uap, int32_t *retval) {
2858	int error = 0;
2859
2860	if ((error = suser(kauth_cred_get(), &p->p_acflag)))
2861                return(error);
2862
2863	return stack_snapshot2(uap->pid, uap->tracebuf, uap->tracebuf_size,
2864	    uap->flags, uap->dispatch_offset, retval);
2865}
2866
2867int
2868stack_snapshot_from_kernel(pid_t pid, void *buf, uint32_t size, uint32_t flags, unsigned *bytesTraced)
2869{
2870	int error = 0;
2871	boolean_t istate;
2872
2873	if ((buf == NULL) || (size <= 0) || (bytesTraced == NULL)) {
2874		return -1;
2875	}
2876
2877	/* cap in individual stackshot to SANE_TRACEBUF_SIZE */
2878	if (size > SANE_TRACEBUF_SIZE) {
2879		size = SANE_TRACEBUF_SIZE;
2880	}
2881
2882/* Serialize tracing */
2883	STACKSHOT_SUBSYS_LOCK();
2884	istate = ml_set_interrupts_enabled(FALSE);
2885
2886
2887/* Preload trace parameters*/
2888	kdp_snapshot_preflight(pid, buf, size, flags, 0);
2889
2890/* Trap to the debugger to obtain a coherent stack snapshot; this populates
2891 * the trace buffer
2892 */
2893	TRAP_DEBUGGER;
2894
2895	ml_set_interrupts_enabled(istate);
2896
2897	*bytesTraced = kdp_stack_snapshot_bytes_traced();
2898
2899	error = kdp_stack_snapshot_geterror();
2900
2901	STACKSHOT_SUBSYS_UNLOCK();
2902
2903    return error;
2904
2905}
2906
2907int
2908stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset, int32_t *retval)
2909{
2910	boolean_t istate;
2911	int error = 0;
2912	unsigned bytesTraced = 0;
2913
2914#if CONFIG_TELEMETRY
2915	if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_ENABLE) {
2916		telemetry_global_ctl(1);
2917		*retval = 0;
2918		return (0);
2919	} else if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_DISABLE) {
2920		telemetry_global_ctl(0);
2921		*retval = 0;
2922		return (0);
2923	}
2924
2925	if (flags & STACKSHOT_WINDOWED_MICROSTACKSHOTS_ENABLE) {
2926		error = telemetry_enable_window();
2927
2928		if (error != KERN_SUCCESS) {
2929			/* We are probably out of memory */
2930			*retval = -1;
2931			return ENOMEM;
2932		}
2933
2934		*retval = 0;
2935		return (0);
2936	} else if (flags & STACKSHOT_WINDOWED_MICROSTACKSHOTS_DISABLE) {
2937		telemetry_disable_window();
2938		*retval = 0;
2939		return (0);
2940	}
2941#endif
2942
2943	*retval = -1;
2944/* Serialize tracing */
2945	STACKSHOT_SUBSYS_LOCK();
2946
2947	if (tracebuf_size <= 0) {
2948		error = EINVAL;
2949		goto error_exit;
2950	}
2951
2952#if CONFIG_TELEMETRY
2953	if (flags & STACKSHOT_GET_MICROSTACKSHOT) {
2954
2955		if (tracebuf_size > SANE_TRACEBUF_SIZE) {
2956			error = EINVAL;
2957			goto error_exit;
2958		}
2959
2960		bytesTraced = tracebuf_size;
2961		error = telemetry_gather(tracebuf, &bytesTraced,
2962		                         (flags & STACKSHOT_SET_MICROSTACKSHOT_MARK) ? TRUE : FALSE);
2963		if (error == KERN_NO_SPACE) {
2964			error = ENOSPC;
2965		}
2966
2967		*retval = (int)bytesTraced;
2968		goto error_exit;
2969	}
2970
2971	if (flags & STACKSHOT_GET_WINDOWED_MICROSTACKSHOTS) {
2972
2973		if (tracebuf_size > SANE_TRACEBUF_SIZE) {
2974			error = EINVAL;
2975			goto error_exit;
2976		}
2977
2978		bytesTraced = tracebuf_size;
2979		error = telemetry_gather_windowed(tracebuf, &bytesTraced);
2980		if (error == KERN_NO_SPACE) {
2981			error = ENOSPC;
2982		}
2983
2984		*retval = (int)bytesTraced;
2985		goto error_exit;
2986	}
2987
2988	if (flags & STACKSHOT_GET_BOOT_PROFILE) {
2989
2990		if (tracebuf_size > SANE_BOOTPROFILE_TRACEBUF_SIZE) {
2991			error = EINVAL;
2992			goto error_exit;
2993		}
2994
2995		bytesTraced = tracebuf_size;
2996		error = bootprofile_gather(tracebuf, &bytesTraced);
2997		if (error == KERN_NO_SPACE) {
2998			error = ENOSPC;
2999		}
3000
3001		*retval = (int)bytesTraced;
3002		goto error_exit;
3003	}
3004#endif
3005
3006	if (tracebuf_size > SANE_TRACEBUF_SIZE) {
3007		error = EINVAL;
3008		goto error_exit;
3009	}
3010
3011	assert(stackshot_snapbuf == NULL);
3012	if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&stackshot_snapbuf, tracebuf_size) != KERN_SUCCESS) {
3013		error = ENOMEM;
3014		goto error_exit;
3015	}
3016
3017	if (panic_active()) {
3018		error = ENOMEM;
3019		goto error_exit;
3020	}
3021
3022	istate = ml_set_interrupts_enabled(FALSE);
3023/* Preload trace parameters*/
3024	kdp_snapshot_preflight(pid, stackshot_snapbuf, tracebuf_size, flags, dispatch_offset);
3025
3026/* Trap to the debugger to obtain a coherent stack snapshot; this populates
3027 * the trace buffer
3028 */
3029
3030	TRAP_DEBUGGER;
3031
3032	ml_set_interrupts_enabled(istate);
3033
3034	bytesTraced = kdp_stack_snapshot_bytes_traced();
3035
3036	if (bytesTraced > 0) {
3037		if ((error = copyout(stackshot_snapbuf, tracebuf,
3038			((bytesTraced < tracebuf_size) ?
3039			    bytesTraced : tracebuf_size))))
3040			goto error_exit;
3041		*retval = bytesTraced;
3042	}
3043	else {
3044		error = ENOENT;
3045		goto error_exit;
3046	}
3047
3048	error = kdp_stack_snapshot_geterror();
3049	if (error == -1) {
3050		error = ENOSPC;
3051		*retval = -1;
3052		goto error_exit;
3053	}
3054
3055error_exit:
3056	if (stackshot_snapbuf != NULL)
3057		kmem_free(kernel_map, (vm_offset_t) stackshot_snapbuf, tracebuf_size);
3058	stackshot_snapbuf = NULL;
3059	STACKSHOT_SUBSYS_UNLOCK();
3060	return error;
3061}
3062
3063void
3064start_kern_tracing(unsigned int new_nkdbufs, boolean_t need_map)
3065{
3066
3067	if (!new_nkdbufs)
3068		return;
3069	nkdbufs = kdbg_set_nkdbufs(new_nkdbufs);
3070	kdbg_lock_init();
3071
3072	kernel_debug_string("start_kern_tracing");
3073
3074	if (0 == kdbg_reinit(TRUE)) {
3075
3076		if (need_map == TRUE) {
3077			uint32_t old1, old2;
3078
3079			kdbg_thrmap_init();
3080
3081			disable_wrap(&old1, &old2);
3082		}
3083
3084		/* Hold off interrupts until the early traces are cut */
3085		boolean_t	s = ml_set_interrupts_enabled(FALSE);
3086
3087		kdbg_set_tracing_enabled(TRUE, KDEBUG_ENABLE_TRACE);
3088
3089		/*
3090		 * Transfer all very early events from the static buffer
3091		 * into the real buffers.
3092		 */
3093		kernel_debug_early_end();
3094
3095		ml_set_interrupts_enabled(s);
3096
3097		printf("kernel tracing started\n");
3098	} else {
3099		printf("error from kdbg_reinit,kernel tracing not started\n");
3100	}
3101}
3102
3103void
3104start_kern_tracing_with_typefilter(unsigned int new_nkdbufs,
3105		                   boolean_t need_map,
3106		                   unsigned int typefilter)
3107{
3108	/* startup tracing */
3109	start_kern_tracing(new_nkdbufs, need_map);
3110
3111	/* check that tracing was actually enabled */
3112	if (!(kdebug_enable & KDEBUG_ENABLE_TRACE))
3113		return;
3114
3115	/* setup the typefiltering */
3116	if (0 == kdbg_enable_typefilter())
3117		setbit(type_filter_bitmap, typefilter & (CSC_MASK >> CSC_OFFSET));
3118}
3119
3120void
3121kdbg_dump_trace_to_file(const char *filename)
3122{
3123	vfs_context_t	ctx;
3124	vnode_t		vp;
3125	int		error;
3126	size_t		number;
3127
3128
3129	if ( !(kdebug_enable & KDEBUG_ENABLE_TRACE))
3130		return;
3131
3132        if (global_state_pid != -1) {
3133		if ((proc_find(global_state_pid)) != NULL) {
3134			/*
3135			 * The global pid exists, we're running
3136			 * due to fs_usage, latency, etc...
3137			 * don't cut the panic/shutdown trace file
3138			 * Disable tracing from this point to avoid
3139			 * perturbing state.
3140			 */
3141			kdebug_enable = 0;
3142			kd_ctrl_page.enabled = 0;
3143			return;
3144		}
3145	}
3146	KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_INFO, 0)) | DBG_FUNC_NONE, 0, 0, 0, 0, 0);
3147
3148	kdebug_enable = 0;
3149	kd_ctrl_page.enabled = 0;
3150
3151	ctx = vfs_context_kernel();
3152
3153	if ((error = vnode_open(filename, (O_CREAT | FWRITE | O_NOFOLLOW), 0600, 0, &vp, ctx)))
3154		return;
3155
3156	number = kd_mapcount * sizeof(kd_threadmap);
3157	kdbg_readthrmap(0, &number, vp, ctx);
3158
3159	number = nkdbufs*sizeof(kd_buf);
3160	kdbg_read(0, &number, vp, ctx);
3161
3162	vnode_close(vp, FWRITE, ctx);
3163
3164	sync(current_proc(), (void *)NULL, (int *)NULL);
3165}
3166
3167/* Helper function for filling in the BSD name for an address space
3168 * Defined here because the machine bindings know only Mach threads
3169 * and nothing about BSD processes.
3170 *
3171 * FIXME: need to grab a lock during this?
3172 */
3173void kdbg_get_task_name(char* name_buf, int len, task_t task)
3174{
3175	proc_t proc;
3176
3177	/* Note: we can't use thread->task (and functions that rely on it) here
3178	 * because it hasn't been initialized yet when this function is called.
3179	 * We use the explicitly-passed task parameter instead.
3180	 */
3181	proc = get_bsdtask_info(task);
3182	if (proc != PROC_NULL)
3183		snprintf(name_buf, len, "%s/%d", proc->p_comm, proc->p_pid);
3184	else
3185		snprintf(name_buf, len, "%p [!bsd]", task);
3186}
3187