1/*
2 * Copyright (c) 2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <mach/mach_types.h>
30#include <mach/vm_param.h>
31#ifdef IMPORTANCE_INHERITANCE
32#include <ipc/ipc_importance.h>
33#endif
34#include <sys/appleapiopts.h>
35#include <kern/debug.h>
36#include <uuid/uuid.h>
37
38#include <kdp/kdp_dyld.h>
39#include <kdp/kdp_en_debugger.h>
40
41#include <libsa/types.h>
42#include <libkern/version.h>
43
44#include <string.h> /* bcopy */
45
46#include <kern/processor.h>
47#include <kern/thread.h>
48#include <kern/clock.h>
49#include <vm/vm_map.h>
50#include <vm/vm_kern.h>
51#include <vm/vm_pageout.h>
52#include <vm/vm_shared_region.h>
53#include <libkern/OSKextLibPrivate.h>
54
55extern unsigned int not_in_kdp;
56
57/*
58 * TODO: Even hackier than the other pieces.  This should really
59 * be moved off of kdp_pmap, and we should probably separate
60 * machine_trace_thread out of the kdp code.
61 */
62extern pmap_t kdp_pmap;
63extern addr64_t kdp_vtophys(pmap_t pmap, addr64_t va);
64
65int kdp_snapshot = 0;
66static int stack_snapshot_ret = 0;
67static unsigned stack_snapshot_bytes_traced = 0;
68
69static void *stack_snapshot_buf;
70static uint32_t stack_snapshot_bufsize;
71int stack_snapshot_pid;
72static uint32_t stack_snapshot_flags;
73static uint32_t stack_snapshot_dispatch_offset;
74static unsigned int old_debugger;
75
76void 			do_stackshot(void);
77void			kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size,
78    				uint32_t flags, uint32_t dispatch_offset);
79void			kdp_snapshot_postflight(void);
80static int		kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size,
81    				uint32_t flags, uint32_t dispatch_offset, uint32_t *pbytesTraced);
82int			kdp_stack_snapshot_geterror(void);
83int			kdp_stack_snapshot_bytes_traced(void);
84int 			kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_flags, uint32_t dispatch_offset, uint32_t *pbytesTraced);
85static int 		pid_from_task(task_t task);
86static uint64_t 	proc_uniqueid_from_task(task_t task);
87static void		kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap);
88static boolean_t	kdp_copyin(pmap_t p, uint64_t uaddr, void *dest, size_t size);
89static uint64_t		proc_was_throttled_from_task(task_t task);
90
91extern int		proc_pid(void *p);
92extern uint64_t		proc_uniqueid(void *p);
93extern uint64_t		proc_was_throttled(void *p);
94extern uint64_t		proc_did_throttle(void *p);
95static uint64_t		proc_did_throttle_from_task(task_t task);
96extern void		proc_name_kdp(task_t  task, char *buf, int size);
97extern int		proc_threadname_kdp(void *uth, char *buf, size_t size);
98extern void		proc_starttime_kdp(void *p, uint64_t *tv_sec, uint64_t *tv_usec);
99
100extern int 		count_busy_buffers(void);   /* must track with declaration in bsd/sys/buf_internal.h */
101extern void 		bcopy_phys(addr64_t, addr64_t, vm_size_t);
102extern int		machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p);
103extern int		machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p);
104
105/* Validates that the given address is both a valid page and has
106 * default caching attributes for the current kdp_pmap.  Returns
107 * 0 if the address is invalid, and a kernel virtual address for
108 * the given address if it is valid.
109 */
110vm_offset_t machine_trace_thread_get_kva(vm_offset_t cur_target_addr);
111
112/* Clears caching information used by the above validation routine
113 * (in case the kdp_pmap has been changed or cleared).
114 */
115void machine_trace_thread_clear_validation_cache(void);
116
117#define MAX_FRAMES 1000
118
119typedef struct thread_snapshot *thread_snapshot_t;
120typedef struct task_snapshot *task_snapshot_t;
121
122#if CONFIG_KDP_INTERACTIVE_DEBUGGING
123extern kdp_send_t    kdp_en_send_pkt;
124#endif
125
126/*
127 * Globals to support machine_trace_thread_get_kva.
128 */
129static vm_offset_t prev_target_page = 0;
130static vm_offset_t prev_target_kva = 0;
131static boolean_t validate_next_addr = TRUE;
132
133
134/*
135 * Method for grabbing timer values safely, in the sense that no infinite loop will occur
136 * Certain flavors of the timer_grab function, which would seem to be the thing to use,
137 * can loop infinitely if called while the timer is in the process of being updated.
138 * Unfortunately, it is (rarely) possible to get inconsistent top and bottom halves of
139 * the timer using this method. This seems insoluble, since stackshot runs in a context
140 * where the timer might be half-updated, and has no way of yielding control just long
141 * enough to finish the update.
142 */
143
144static uint64_t safe_grab_timer_value(struct timer *t)
145{
146#if   defined(__LP64__)
147  return t->all_bits;
148#else
149  uint64_t time = t->high_bits;    /* endian independent grab */
150  time = (time << 32) | t->low_bits;
151  return time;
152#endif
153}
154
155/* Cache stack snapshot parameters in preparation for a trace */
156void
157kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset)
158{
159	stack_snapshot_pid = pid;
160	stack_snapshot_buf = tracebuf;
161	stack_snapshot_bufsize = tracebuf_size;
162	stack_snapshot_flags = flags;
163	stack_snapshot_dispatch_offset = dispatch_offset;
164	kdp_snapshot++;
165	/* Mark this debugger as active, since the polled mode driver that
166	 * ordinarily does this may not be enabled (yet), or since KDB may be
167	 * the primary debugger.
168	 */
169	old_debugger = current_debugger;
170	if (old_debugger != KDP_CUR_DB) {
171		current_debugger = KDP_CUR_DB;
172	}
173}
174
175void
176kdp_snapshot_postflight(void)
177{
178	kdp_snapshot--;
179#if CONFIG_KDP_INTERACTIVE_DEBUGGING
180	if (
181			(kdp_en_send_pkt == NULL) || (old_debugger == KDB_CUR_DB))
182		current_debugger = old_debugger;
183#else
184	current_debugger = old_debugger;
185#endif
186}
187
188int
189kdp_stack_snapshot_geterror(void)
190{
191	return stack_snapshot_ret;
192}
193
194int
195kdp_stack_snapshot_bytes_traced(void)
196{
197	return stack_snapshot_bytes_traced;
198}
199
200static int
201kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_flags, uint32_t dispatch_offset, uint32_t *pbytesTraced)
202{
203	char *tracepos = (char *) tracebuf;
204	char *tracebound = tracepos + tracebuf_size;
205	uint32_t tracebytes = 0;
206	int error = 0, i;
207
208	task_t task = TASK_NULL;
209	thread_t thread = THREAD_NULL;
210	unsigned framesize = 2 * sizeof(vm_offset_t);
211
212	queue_head_t *task_list = &tasks;
213	boolean_t is_active_list = TRUE;
214
215	boolean_t dispatch_p = ((trace_flags & STACKSHOT_GET_DQ) != 0);
216	boolean_t save_loadinfo_p = ((trace_flags & STACKSHOT_SAVE_LOADINFO) != 0);
217	boolean_t save_kextloadinfo_p = ((trace_flags & STACKSHOT_SAVE_KEXT_LOADINFO) != 0);
218	boolean_t save_userframes_p = ((trace_flags & STACKSHOT_SAVE_KERNEL_FRAMES_ONLY) == 0);
219	boolean_t save_donating_pids_p = ((trace_flags & STACKSHOT_SAVE_IMP_DONATION_PIDS) != 0);
220
221	if(trace_flags & STACKSHOT_GET_GLOBAL_MEM_STATS) {
222	  if(tracepos + sizeof(struct mem_and_io_snapshot) > tracebound) {
223	    error = -1;
224	    goto error_exit;
225	  }
226	  kdp_mem_and_io_snapshot((struct mem_and_io_snapshot *)tracepos);
227	  tracepos += sizeof(struct mem_and_io_snapshot);
228	}
229
230
231walk_list:
232	queue_iterate(task_list, task, task_t, tasks) {
233		if ((task == NULL) || !ml_validate_nofault((vm_offset_t) task, sizeof(struct task)))
234			goto error_exit;
235
236		int task_pid = pid_from_task(task);
237		uint64_t task_uniqueid = proc_uniqueid_from_task(task);
238		boolean_t task64 = task_has_64BitAddr(task);
239
240		if (!task->active) {
241			/*
242			 * Not interested in terminated tasks without threads, and
243			 * at the moment, stackshot can't handle a task  without a name.
244			 */
245			if (queue_empty(&task->threads) || task_pid == -1) {
246				continue;
247			}
248		}
249
250		/* Trace everything, unless a process was specified */
251		if ((pid == -1) || (pid == task_pid)) {
252			task_snapshot_t task_snap;
253			thread_snapshot_t tsnap = NULL;
254			uint32_t uuid_info_count = 0;
255			mach_vm_address_t uuid_info_addr = 0;
256			boolean_t have_map = (task->map != NULL) &&
257				(ml_validate_nofault((vm_offset_t)(task->map), sizeof(struct _vm_map)));
258			boolean_t have_pmap = have_map && (task->map->pmap != NULL) &&
259				(ml_validate_nofault((vm_offset_t)(task->map->pmap), sizeof(struct pmap)));
260			uint64_t shared_cache_base_address = 0;
261
262			if (have_pmap && task->active && save_loadinfo_p && task_pid > 0) {
263				// Read the dyld_all_image_infos struct from the task memory to get UUID array count and location
264				if (task64) {
265					struct user64_dyld_all_image_infos task_image_infos;
266					if (kdp_copyin(task->map->pmap, task->all_image_info_addr, &task_image_infos, sizeof(struct user64_dyld_all_image_infos))) {
267						uuid_info_count = (uint32_t)task_image_infos.uuidArrayCount;
268						uuid_info_addr = task_image_infos.uuidArray;
269					}
270				} else {
271					struct user32_dyld_all_image_infos task_image_infos;
272					if (kdp_copyin(task->map->pmap, task->all_image_info_addr, &task_image_infos, sizeof(struct user32_dyld_all_image_infos))) {
273						uuid_info_count = task_image_infos.uuidArrayCount;
274						uuid_info_addr = task_image_infos.uuidArray;
275					}
276				}
277
278				// If we get a NULL uuid_info_addr (which can happen when we catch dyld in the middle of updating
279				// this data structure), we zero the uuid_info_count so that we won't even try to save load info
280				// for this task.
281				if (!uuid_info_addr) {
282					uuid_info_count = 0;
283				}
284			}
285
286			if (have_pmap && save_kextloadinfo_p && task_pid == 0) {
287				if (ml_validate_nofault((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader))) {
288					uuid_info_count = gLoadedKextSummaries->numSummaries + 1; /* include main kernel UUID */
289				}
290			}
291
292			if (tracepos + sizeof(struct task_snapshot) > tracebound) {
293				error = -1;
294				goto error_exit;
295			}
296
297			task_snap = (task_snapshot_t) tracepos;
298			task_snap->snapshot_magic = STACKSHOT_TASK_SNAPSHOT_MAGIC;
299			task_snap->pid = task_pid;
300			task_snap->uniqueid = task_uniqueid;
301			task_snap->nloadinfos = uuid_info_count;
302			task_snap->donating_pid_count = 0;
303
304			/* Add the BSD process identifiers */
305			if (task_pid != -1)
306				proc_name_kdp(task, task_snap->p_comm, sizeof(task_snap->p_comm));
307			else
308				task_snap->p_comm[0] = '\0';
309			task_snap->ss_flags = 0;
310			if (task64)
311				task_snap->ss_flags |= kUser64_p;
312			if (task64 && task_pid == 0)
313				task_snap->ss_flags |= kKernel64_p;
314			if (!task->active)
315				task_snap->ss_flags |= kTerminatedSnapshot;
316			if(task->pidsuspended) task_snap->ss_flags |= kPidSuspended;
317			if(task->frozen) task_snap->ss_flags |= kFrozen;
318
319			if (task->effective_policy.darwinbg == 1) {
320				task_snap->ss_flags |= kTaskDarwinBG;
321			}
322
323			if (task->requested_policy.t_role == TASK_FOREGROUND_APPLICATION) {
324				task_snap->ss_flags |= kTaskIsForeground;
325			}
326
327			if (task->requested_policy.t_boosted == 1) {
328				task_snap->ss_flags |= kTaskIsBoosted;
329			}
330
331			if (task->effective_policy.t_sup_active == 1)
332				task_snap->ss_flags |= kTaskIsSuppressed;
333#if IMPORTANCE_INHERITANCE
334			if (task->task_imp_base) {
335				if (task->task_imp_base->iit_donor) {
336					task_snap->ss_flags |= kTaskIsImpDonor;
337}
338
339				if (task->task_imp_base->iit_live_donor) {
340					task_snap->ss_flags |= kTaskIsLiveImpDonor;
341				}
342			}
343#endif
344
345			task_snap->latency_qos = (task->effective_policy.t_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED) ?
346			                         LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | task->effective_policy.t_latency_qos);
347
348			task_snap->suspend_count = task->suspend_count;
349			task_snap->task_size = have_pmap ? pmap_resident_count(task->map->pmap) : 0;
350			task_snap->faults = task->faults;
351			task_snap->pageins = task->pageins;
352			task_snap->cow_faults = task->cow_faults;
353
354			task_snap->user_time_in_terminated_threads = task->total_user_time;
355			task_snap->system_time_in_terminated_threads = task->total_system_time;
356			/*
357			 * The throttling counters are maintained as 64-bit counters in the proc
358			 * structure. However, we reserve 32-bits (each) for them in the task_snapshot
359			 * struct to save space and since we do not expect them to overflow 32-bits. If we
360			 * find these values overflowing in the future, the fix would be to simply
361			 * upgrade these counters to 64-bit in the task_snapshot struct
362			 */
363			task_snap->was_throttled = (uint32_t) proc_was_throttled_from_task(task);
364			task_snap->did_throttle = (uint32_t) proc_did_throttle_from_task(task);
365
366			/* fetch some useful BSD info: */
367			task_snap->p_start_sec = task_snap->p_start_usec = 0;
368			proc_starttime_kdp(task->bsd_info, &task_snap->p_start_sec, &task_snap->p_start_usec);
369			if (task->shared_region && ml_validate_nofault((vm_offset_t)task->shared_region,
370														   sizeof(struct vm_shared_region))) {
371				struct vm_shared_region *sr = task->shared_region;
372
373				shared_cache_base_address = sr->sr_base_address + sr->sr_first_mapping;
374			}
375			if (!shared_cache_base_address
376				|| !kdp_copyin(task->map->pmap, shared_cache_base_address + offsetof(struct _dyld_cache_header, uuid), task_snap->shared_cache_identifier, sizeof(task_snap->shared_cache_identifier))) {
377				memset(task_snap->shared_cache_identifier, 0x0, sizeof(task_snap->shared_cache_identifier));
378			}
379			if (task->shared_region) {
380				/*
381				 * No refcounting here, but we are in debugger
382				 * context, so that should be safe.
383				 */
384				task_snap->shared_cache_slide = task->shared_region->sr_slide_info.slide;
385			} else {
386				task_snap->shared_cache_slide = 0;
387			}
388
389			/* I/O Statistics */
390			assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
391
392			if (task->task_io_stats) {
393				task_snap->disk_reads_count = task->task_io_stats->disk_reads.count;
394				task_snap->disk_reads_size = task->task_io_stats->disk_reads.size;
395				task_snap->disk_writes_count = (task->task_io_stats->total_io.count - task->task_io_stats->disk_reads.count);
396				task_snap->disk_writes_size = (task->task_io_stats->total_io.size - task->task_io_stats->disk_reads.size);
397				for(i = 0; i < IO_NUM_PRIORITIES; i++) {
398					task_snap->io_priority_count[i] = task->task_io_stats->io_priority[i].count;
399					task_snap->io_priority_size[i] = task->task_io_stats->io_priority[i].size;
400				}
401				task_snap->paging_count = task->task_io_stats->paging.count;
402				task_snap->paging_size = task->task_io_stats->paging.size;
403				task_snap->non_paging_count = (task->task_io_stats->total_io.count - task->task_io_stats->paging.count);
404				task_snap->non_paging_size = (task->task_io_stats->total_io.size - task->task_io_stats->paging.size);
405				task_snap->metadata_count = task->task_io_stats->metadata.count;
406				task_snap->metadata_size = task->task_io_stats->metadata.size;
407				task_snap->data_count = (task->task_io_stats->total_io.count - task->task_io_stats->metadata.count);
408				task_snap->data_size = (task->task_io_stats->total_io.size - task->task_io_stats->metadata.size);
409			} else {
410				/* zero from disk_reads_count to end of structure */
411				memset(&task_snap->disk_reads_count, 0, offsetof(struct task_snapshot, metadata_size) - offsetof(struct task_snapshot, disk_reads_count));
412			}
413			tracepos += sizeof(struct task_snapshot);
414
415			if (task_pid > 0 && uuid_info_count > 0) {
416				uint32_t uuid_info_size = (uint32_t)(task64 ? sizeof(struct user64_dyld_uuid_info) : sizeof(struct user32_dyld_uuid_info));
417				uint32_t uuid_info_array_size = uuid_info_count * uuid_info_size;
418
419				if (tracepos + uuid_info_array_size > tracebound) {
420					error = -1;
421					goto error_exit;
422				}
423
424				// Copy in the UUID info array
425				// It may be nonresident, in which case just fix up nloadinfos to 0 in the task_snap
426				if (have_pmap && !kdp_copyin(task->map->pmap, uuid_info_addr, tracepos, uuid_info_array_size))
427					task_snap->nloadinfos = 0;
428				else
429					tracepos += uuid_info_array_size;
430			} else if (task_pid == 0 && uuid_info_count > 0) {
431				uint32_t uuid_info_size = (uint32_t)sizeof(kernel_uuid_info);
432				uint32_t uuid_info_array_size = uuid_info_count * uuid_info_size;
433				kernel_uuid_info *output_uuids;
434
435				if (tracepos + uuid_info_array_size > tracebound) {
436					error = -1;
437					goto error_exit;
438				}
439
440				output_uuids = (kernel_uuid_info *)tracepos;
441
442				do {
443
444					if (!kernel_uuid || !ml_validate_nofault((vm_offset_t)kernel_uuid, sizeof(uuid_t))) {
445						/* Kernel UUID not found or inaccessible */
446						task_snap->nloadinfos = 0;
447						break;
448					}
449
450					output_uuids[0].imageLoadAddress = (uintptr_t)VM_KERNEL_UNSLIDE(vm_kernel_stext);
451					memcpy(&output_uuids[0].imageUUID, kernel_uuid, sizeof(uuid_t));
452
453					if (ml_validate_nofault((vm_offset_t)(&gLoadedKextSummaries->summaries[0]),
454											gLoadedKextSummaries->entry_size * gLoadedKextSummaries->numSummaries)) {
455						uint32_t kexti;
456
457						for (kexti=0 ; kexti < gLoadedKextSummaries->numSummaries; kexti++) {
458							output_uuids[1+kexti].imageLoadAddress = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].address);
459							memcpy(&output_uuids[1+kexti].imageUUID, &gLoadedKextSummaries->summaries[kexti].uuid, sizeof(uuid_t));
460						}
461
462						tracepos += uuid_info_array_size;
463					} else {
464						/* kext summary invalid, but kernel UUID was copied */
465						task_snap->nloadinfos = 1;
466						tracepos += uuid_info_size;
467						break;
468					}
469				} while(0);
470			}
471
472			if (save_donating_pids_p) {
473				task_snap->donating_pid_count = task_importance_list_pids(task, TASK_IMP_LIST_DONATING_PIDS, (int *)tracepos, (unsigned int)((tracebound - tracepos)/sizeof(int)));
474				tracepos += sizeof(int) * task_snap->donating_pid_count;
475			}
476
477			queue_iterate(&task->threads, thread, thread_t, task_threads){
478				uint64_t tval;
479
480				if ((thread == NULL) || !ml_validate_nofault((vm_offset_t) thread, sizeof(struct thread)))
481					goto error_exit;
482
483				if (((tracepos + 4 * sizeof(struct thread_snapshot)) > tracebound)) {
484					error = -1;
485					goto error_exit;
486				}
487                if (!save_userframes_p && thread->kernel_stack == 0)
488                    continue;
489
490				/* Populate the thread snapshot header */
491				tsnap = (thread_snapshot_t) tracepos;
492				tsnap->thread_id = thread_tid(thread);
493				tsnap->state = thread->state;
494				tsnap->priority = thread->priority;
495				tsnap->sched_pri = thread->sched_pri;
496				tsnap->sched_flags = thread->sched_flags;
497				tsnap->wait_event = VM_KERNEL_UNSLIDE_OR_PERM(thread->wait_event);
498				tsnap->continuation = VM_KERNEL_UNSLIDE(thread->continuation);
499				tval = safe_grab_timer_value(&thread->user_timer);
500				tsnap->user_time = tval;
501				tval = safe_grab_timer_value(&thread->system_timer);
502				if (thread->precise_user_kernel_time) {
503					tsnap->system_time = tval;
504				} else {
505					tsnap->user_time += tval;
506					tsnap->system_time = 0;
507				}
508				tsnap->snapshot_magic = STACKSHOT_THREAD_SNAPSHOT_MAGIC;
509				bzero(&tsnap->pth_name, STACKSHOT_MAX_THREAD_NAME_SIZE);
510				proc_threadname_kdp(thread->uthread, &tsnap->pth_name[0], STACKSHOT_MAX_THREAD_NAME_SIZE);
511				tracepos += sizeof(struct thread_snapshot);
512				tsnap->ss_flags = 0;
513				/* I/O Statistics */
514				assert(IO_NUM_PRIORITIES == STACKSHOT_IO_NUM_PRIORITIES);
515				if (thread->thread_io_stats) {
516					tsnap->disk_reads_count = thread->thread_io_stats->disk_reads.count;
517					tsnap->disk_reads_size = thread->thread_io_stats->disk_reads.size;
518					tsnap->disk_writes_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->disk_reads.count);
519					tsnap->disk_writes_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->disk_reads.size);
520					for(i = 0; i < IO_NUM_PRIORITIES; i++) {
521						tsnap->io_priority_count[i] = thread->thread_io_stats->io_priority[i].count;
522						tsnap->io_priority_size[i] = thread->thread_io_stats->io_priority[i].size;
523					}
524					tsnap->paging_count = thread->thread_io_stats->paging.count;
525					tsnap->paging_size = thread->thread_io_stats->paging.size;
526					tsnap->non_paging_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->paging.count);
527					tsnap->non_paging_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->paging.size);
528					tsnap->metadata_count = thread->thread_io_stats->metadata.count;
529					tsnap->metadata_size = thread->thread_io_stats->metadata.size;
530					tsnap->data_count = (thread->thread_io_stats->total_io.count - thread->thread_io_stats->metadata.count);
531					tsnap->data_size = (thread->thread_io_stats->total_io.size - thread->thread_io_stats->metadata.size);
532				} else {
533					/* zero from disk_reads_count to end of structure */
534					memset(&tsnap->disk_reads_count, 0,
535						offsetof(struct thread_snapshot, metadata_size) - offsetof(struct thread_snapshot, disk_reads_count));
536				}
537
538				if (thread->effective_policy.darwinbg) {
539					tsnap->ss_flags |= kThreadDarwinBG;
540				}
541
542				tsnap->io_tier = proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
543				if (proc_get_effective_thread_policy(thread, TASK_POLICY_PASSIVE_IO)) {
544					tsnap->ss_flags |= kThreadIOPassive;
545				}
546
547				if (thread->suspend_count > 0) {
548					tsnap->ss_flags |= kThreadSuspended;
549				}
550				if (IPC_VOUCHER_NULL != thread->ith_voucher) {
551					tsnap->voucher_identifier = VM_KERNEL_ADDRPERM(thread->ith_voucher);
552				}
553
554				tsnap->ts_qos = thread->effective_policy.thep_qos;
555				tsnap->total_syscalls = thread->syscalls_mach + thread->syscalls_unix;
556
557				if (dispatch_p && (task != kernel_task) && (task->active) && have_pmap) {
558					uint64_t dqkeyaddr = thread_dispatchqaddr(thread);
559					if (dqkeyaddr != 0) {
560						uint64_t dqaddr = 0;
561						if (kdp_copyin(task->map->pmap, dqkeyaddr, &dqaddr, (task64 ? 8 : 4)) && (dqaddr != 0)) {
562							uint64_t dqserialnumaddr = dqaddr + dispatch_offset;
563							uint64_t dqserialnum = 0;
564							if (kdp_copyin(task->map->pmap, dqserialnumaddr, &dqserialnum, (task64 ? 8 : 4))) {
565								tsnap->ss_flags |= kHasDispatchSerial;
566								*(uint64_t *)tracepos = dqserialnum;
567								tracepos += 8;
568							}
569						}
570					}
571				}
572/* Call through to the machine specific trace routines
573 * Frames are added past the snapshot header.
574 */
575				tracebytes = 0;
576				if (thread->kernel_stack != 0) {
577#if defined(__LP64__)
578					tracebytes = machine_trace_thread64(thread, tracepos, tracebound, MAX_FRAMES, FALSE);
579					tsnap->ss_flags |= kKernel64_p;
580					framesize = 16;
581#else
582					tracebytes = machine_trace_thread(thread, tracepos, tracebound, MAX_FRAMES, FALSE);
583					framesize = 8;
584#endif
585				}
586				tsnap->nkern_frames = tracebytes/framesize;
587				tracepos += tracebytes;
588				tracebytes = 0;
589				/* Trace user stack, if any */
590				if (save_userframes_p && task->active && thread->task->map != kernel_map) {
591					/* 64-bit task? */
592					if (task_has_64BitAddr(thread->task)) {
593						tracebytes = machine_trace_thread64(thread, tracepos, tracebound, MAX_FRAMES, TRUE);
594						tsnap->ss_flags |= kUser64_p;
595						framesize = 16;
596					}
597					else {
598						tracebytes = machine_trace_thread(thread, tracepos, tracebound, MAX_FRAMES, TRUE);
599						framesize = 8;
600					}
601				}
602				tsnap->nuser_frames = tracebytes/framesize;
603				tracepos += tracebytes;
604				tracebytes = 0;
605			}
606
607            if (!save_userframes_p && tsnap == NULL) {
608                /*
609                 * No thread info is collected due to lack of kernel frames.
610                 * Remove information about this task also
611                 */
612                tracepos = (char *)task_snap;
613            }
614		}
615	}
616
617	if (is_active_list) {
618		is_active_list = FALSE;
619		task_list = &terminated_tasks;
620		goto walk_list;
621	}
622
623error_exit:
624	/* Release stack snapshot wait indicator */
625	kdp_snapshot_postflight();
626
627	*pbytesTraced = (uint32_t)(tracepos - (char *) tracebuf);
628
629	return error;
630}
631
632static int pid_from_task(task_t task)
633{
634	int pid = -1;
635
636	if (task->bsd_info)
637		pid = proc_pid(task->bsd_info);
638
639	return pid;
640}
641
642static uint64_t
643proc_uniqueid_from_task(task_t task)
644{
645	uint64_t uniqueid = ~(0ULL);
646
647	if (task->bsd_info)
648		uniqueid = proc_uniqueid(task->bsd_info);
649
650	return uniqueid;
651}
652
653static uint64_t
654proc_was_throttled_from_task(task_t task)
655{
656	uint64_t was_throttled = 0;
657
658	if (task->bsd_info)
659		was_throttled = proc_was_throttled(task->bsd_info);
660
661	return was_throttled;
662}
663
664static uint64_t
665proc_did_throttle_from_task(task_t task)
666{
667	uint64_t did_throttle = 0;
668
669	if (task->bsd_info)
670		did_throttle = proc_did_throttle(task->bsd_info);
671
672	return did_throttle;
673}
674
675static void
676kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap)
677{
678	unsigned int pages_reclaimed;
679	unsigned int pages_wanted;
680	kern_return_t kErr;
681
682	processor_t processor;
683	vm_statistics64_t stat;
684	vm_statistics64_data_t host_vm_stat;
685
686	processor = processor_list;
687	stat = &PROCESSOR_DATA(processor, vm_stat);
688	host_vm_stat = *stat;
689
690	if (processor_count > 1) {
691		/*
692		 * processor_list may be in the process of changing as we are
693		 * attempting a stackshot.  Ordinarily it will be lock protected,
694		 * but it is not safe to lock in the context of the debugger.
695		 * Fortunately we never remove elements from the processor list,
696		 * and only add to to the end of the list, so we SHOULD be able
697		 * to walk it.  If we ever want to truly tear down processors,
698		 * this will have to change.
699		 */
700		while ((processor = processor->processor_list) != NULL) {
701			stat = &PROCESSOR_DATA(processor, vm_stat);
702			host_vm_stat.compressions += stat->compressions;
703			host_vm_stat.decompressions += stat->decompressions;
704		}
705	}
706
707	memio_snap->snapshot_magic = STACKSHOT_MEM_AND_IO_SNAPSHOT_MAGIC;
708	memio_snap->free_pages = vm_page_free_count;
709	memio_snap->active_pages = vm_page_active_count;
710	memio_snap->inactive_pages = vm_page_inactive_count;
711	memio_snap->purgeable_pages = vm_page_purgeable_count;
712	memio_snap->wired_pages = vm_page_wire_count;
713	memio_snap->speculative_pages = vm_page_speculative_count;
714	memio_snap->throttled_pages = vm_page_throttled_count;
715	memio_snap->busy_buffer_count = count_busy_buffers();
716	memio_snap->filebacked_pages = vm_page_pageable_external_count;
717	memio_snap->compressions = (uint32_t)host_vm_stat.compressions;
718	memio_snap->decompressions = (uint32_t)host_vm_stat.decompressions;
719	memio_snap->compressor_size = VM_PAGE_COMPRESSOR_COUNT;
720	kErr = mach_vm_pressure_monitor(FALSE, VM_PRESSURE_TIME_WINDOW, &pages_reclaimed, &pages_wanted);
721
722	if ( ! kErr ) {
723		memio_snap->pages_wanted = (uint32_t)pages_wanted;
724		memio_snap->pages_reclaimed = (uint32_t)pages_reclaimed;
725		memio_snap->pages_wanted_reclaimed_valid = 1;
726	} else {
727		memio_snap->pages_wanted = 0;
728		memio_snap->pages_reclaimed = 0;
729		memio_snap->pages_wanted_reclaimed_valid = 0;
730	}
731}
732
733boolean_t
734kdp_copyin(pmap_t p, uint64_t uaddr, void *dest, size_t size)
735{
736	size_t rem = size;
737	char *kvaddr = dest;
738
739	while (rem) {
740		ppnum_t upn = pmap_find_phys(p, uaddr);
741		uint64_t phys_src = ptoa_64(upn) | (uaddr & PAGE_MASK);
742		uint64_t phys_dest = kvtophys((vm_offset_t)kvaddr);
743		uint64_t src_rem = PAGE_SIZE - (phys_src & PAGE_MASK);
744		uint64_t dst_rem = PAGE_SIZE - (phys_dest & PAGE_MASK);
745		size_t cur_size = (uint32_t) MIN(src_rem, dst_rem);
746		cur_size = MIN(cur_size, rem);
747
748		if (upn && pmap_valid_page(upn) && phys_dest) {
749			bcopy_phys(phys_src, phys_dest, cur_size);
750		}
751		else
752			break;
753		uaddr += cur_size;
754		kvaddr += cur_size;
755		rem -= cur_size;
756	}
757	return (rem == 0);
758}
759
760void
761do_stackshot()
762{
763    stack_snapshot_ret = kdp_stackshot(stack_snapshot_pid,
764	    stack_snapshot_buf, stack_snapshot_bufsize,
765	    stack_snapshot_flags, stack_snapshot_dispatch_offset,
766		&stack_snapshot_bytes_traced);
767
768}
769
770/*
771 * A fantastical routine that tries to be fast about returning
772 * translations.  Caches the last page we found a translation
773 * for, so that we can be quick about multiple queries to the
774 * same page.  It turns out this is exactly the workflow
775 * machine_trace_thread and its relatives tend to throw at us.
776 *
777 * Please zero the nasty global this uses after a bulk lookup;
778 * this isn't safe across a switch of the kdp_pmap or changes
779 * to a pmap.
780 *
781 * This also means that if zero is a valid KVA, we are
782 * screwed.  Sucks to be us.  Fortunately, this should never
783 * happen.
784 */
785vm_offset_t
786machine_trace_thread_get_kva(vm_offset_t cur_target_addr)
787{
788	unsigned cur_wimg_bits;
789	vm_offset_t cur_target_page;
790	vm_offset_t cur_phys_addr;
791	vm_offset_t kern_virt_target_addr;
792
793	cur_target_page = atop(cur_target_addr);
794
795	if ((cur_target_page != prev_target_page) || validate_next_addr) {
796		/*
797		 * Alright; it wasn't our previous page.  So
798		 * we must validate that there is a page
799		 * table entry for this address under the
800		 * current kdp_pmap, and that it has default
801		 * cache attributes (otherwise it may not be
802		 * safe to access it).
803		 */
804		cur_phys_addr = kdp_vtophys(kdp_pmap ? kdp_pmap : kernel_pmap, cur_target_addr);
805
806		if (!pmap_valid_page((ppnum_t) atop(cur_phys_addr))) {
807			return 0;
808		}
809
810		cur_wimg_bits = pmap_cache_attributes((ppnum_t) atop(cur_phys_addr));
811
812		if ((cur_wimg_bits & VM_WIMG_MASK) != VM_WIMG_DEFAULT) {
813			return 0;
814		}
815
816#if __x86_64__
817		kern_virt_target_addr = (vm_offset_t) PHYSMAP_PTOV(cur_phys_addr);
818#else
819#error Oh come on... we should really unify the physical -> kernel virtual interface
820#endif
821		prev_target_page = cur_target_page;
822		prev_target_kva = (kern_virt_target_addr & ~PAGE_MASK);
823		validate_next_addr = FALSE;
824		return kern_virt_target_addr;
825	} else {
826		/* We found a translation, so stash this page */
827		kern_virt_target_addr = prev_target_kva + (cur_target_addr & PAGE_MASK);
828		return kern_virt_target_addr;
829	}
830}
831
832void
833machine_trace_thread_clear_validation_cache(void)
834{
835	validate_next_addr = TRUE;
836}
837
838