1/*
2 * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24/*
25   cc -I/System/Library/Frameworks/System.framework/Versions/B/PrivateHeaders -DPRIVATE -D__APPLE_PRIVATE -arch x86_64 -arch i386 -O -o latency latency.c -lncurses -lutil
26*/
27
28#include <mach/mach.h>
29#include <stdlib.h>
30#include <stdio.h>
31#include <unistd.h>
32#include <signal.h>
33#include <strings.h>
34#include <nlist.h>
35#include <fcntl.h>
36#include <string.h>
37#include <libc.h>
38#include <termios.h>
39#include <curses.h>
40#include <libutil.h>
41#include <errno.h>
42#include <err.h>
43
44#include <sys/types.h>
45#include <sys/param.h>
46#include <sys/time.h>
47#include <sys/sysctl.h>
48#include <sys/ioctl.h>
49
50#ifndef KERNEL_PRIVATE
51#define KERNEL_PRIVATE
52#include <sys/kdebug.h>
53#undef KERNEL_PRIVATE
54#else
55#include <sys/kdebug.h>
56#endif /*KERNEL_PRIVATE*/
57
58#include <mach/mach_error.h>
59#include <mach/mach_types.h>
60#include <mach/message.h>
61#include <mach/mach_syscalls.h>
62#include <mach/clock_types.h>
63#include <mach/mach_time.h>
64
65#include <libkern/OSTypes.h>
66
67
68int      s_usec_10_bins[10];
69int      s_usec_100_bins[10];
70int      s_msec_1_bins[10];
71int      s_msec_10_bins[5];
72int      s_too_slow;
73int      s_max_latency;
74int      s_min_latency = 0;
75long long s_total_latency = 0;
76int      s_total_samples = 0;
77long     s_thresh_hold;
78int      s_exceeded_threshold = 0;
79
80
81#define N_HIGH_RES_BINS 500
82int      use_high_res_bins = false;
83
84struct i_latencies {
85	int      i_usec_10_bins[10];
86	int      i_usec_100_bins[10];
87	int      i_msec_1_bins[10];
88	int      i_msec_10_bins[5];
89	int      i_too_slow;
90	int      i_max_latency;
91	int      i_min_latency;
92	int      i_total_samples;
93	int	 i_total;
94	int      i_exceeded_threshold;
95	uint64_t i_total_latency;
96};
97
98struct	i_latencies *i_lat;
99boolean_t i_latency_per_cpu = FALSE;
100
101int      i_high_res_bins[N_HIGH_RES_BINS];
102
103long     i_thresh_hold;
104
105int	 watch_priority = 97;
106
107long     start_time;
108long     curr_time;
109long     refresh_time;
110
111
112char *kernelpath = NULL;
113
114typedef struct {
115	void	 *k_sym_addr;	/* kernel symbol address from nm */
116	u_int	  k_sym_len;	/* length of kernel symbol string */
117	char     *k_sym_name;	/* kernel symbol string from nm */
118} kern_sym_t;
119
120kern_sym_t *kern_sym_tbl;	/* pointer to the nm table       */
121int        kern_sym_count;	/* number of entries in nm table */
122
123
124
125#define MAX_ENTRIES 4096
126struct ct {
127	int type;
128	char name[32];
129} codes_tab[MAX_ENTRIES];
130
131char *code_file = NULL;
132int	num_of_codes = 0;
133
134
135double divisor;
136sig_atomic_t gotSIGWINCH = 0;
137int	trace_enabled = 0;
138int	need_new_map = 1;
139int	set_remove_flag = 1;	/* By default, remove trace buffer */
140
141int	RAW_flag = 0;
142int	RAW_fd	 = 0;
143
144uint64_t first_now = 0;
145uint64_t last_now = 0;
146int	first_read = 1;
147
148
149#define	SAMPLE_TIME_USECS 50000
150#define SAMPLE_SIZE 300000
151#define MAX_LOG_COUNT  30       /* limits the number of entries dumped in log_decrementer */
152
153kbufinfo_t bufinfo = {0, 0, 0};
154
155FILE *log_fp = NULL;
156
157uint64_t sample_TOD_secs;
158uint32_t sample_TOD_usecs;
159
160uint64_t cpu_mask;
161
162int	sample_generation = 0;
163int	num_i_latency_cpus = 1;
164int	num_cpus;
165char *my_buffer;
166int	num_entries;
167
168kd_buf **last_decrementer_kd;   /* last DECR_TRAP per cpu */
169
170
171#define NUMPARMS 23
172
173typedef struct event *event_t;
174
175struct event {
176	event_t	  ev_next;
177
178	uintptr_t ev_thread;
179	uint32_t  ev_type;
180	uint64_t  ev_timestamp;
181};
182
183
184typedef struct lookup *lookup_t;
185
186struct lookup {
187	lookup_t  lk_next;
188
189	uintptr_t lk_thread;
190	uintptr_t lk_dvp;
191	long	 *lk_pathptr;
192	long	  lk_pathname[NUMPARMS + 1];
193};
194
195
196typedef struct threadmap *threadmap_t;
197
198struct threadmap {
199	threadmap_t	tm_next;
200
201	uintptr_t	tm_thread;
202	uintptr_t	tm_pthread;
203	char		tm_command[MAXCOMLEN + 1];
204	char		tm_orig_command[MAXCOMLEN + 1];
205};
206
207
208typedef struct threadrun *threadrun_t;
209
210struct threadrun {
211	threadrun_t	tr_next;
212
213	uintptr_t	tr_thread;
214	kd_buf		*tr_entry;
215	uint64_t	tr_timestamp;
216};
217
218
219typedef struct thread_entry *thread_entry_t;
220
221struct thread_entry {
222	thread_entry_t	te_next;
223
224	uintptr_t	te_thread;
225};
226
227
228#define HASH_SIZE       1024
229#define HASH_MASK       1023
230
231event_t         event_hash[HASH_SIZE];
232lookup_t        lookup_hash[HASH_SIZE];
233threadmap_t     threadmap_hash[HASH_SIZE];
234threadrun_t	threadrun_hash[HASH_SIZE];
235
236event_t         event_freelist;
237lookup_t        lookup_freelist;
238threadrun_t	threadrun_freelist;
239threadmap_t     threadmap_freelist;
240threadmap_t     threadmap_temp;
241
242thread_entry_t	thread_entry_freelist;
243thread_entry_t	thread_delete_list;
244thread_entry_t	thread_reset_list;
245thread_entry_t	thread_event_list;
246thread_entry_t	thread_lookup_list;
247thread_entry_t	thread_run_list;
248
249
250#ifndef	RAW_VERSION1
251typedef struct {
252	int version_no;
253	int thread_count;
254	uint64_t TOD_secs;
255	uint32_t TOD_usecs;
256} RAW_header;
257
258#define RAW_VERSION0    0x55aa0000
259#define RAW_VERSION1    0x55aa0101
260#endif
261
262
263#define	USER_MODE	0
264#define KERNEL_MODE	1
265
266
267#define TRACE_DATA_NEWTHREAD	0x07000004
268#define TRACE_STRING_NEWTHREAD	0x07010004
269#define TRACE_STRING_EXEC	0x07010008
270
271#define INTERRUPT         	0x01050000
272#define DECR_TRAP         	0x01090000
273#define DECR_SET          	0x01090004
274#define MACH_vmfault     	0x01300008
275#define MACH_sched        	0x01400000
276#define MACH_stkhandoff   	0x01400008
277#define MACH_makerunnable	0x01400018
278#define MACH_idle		0x01400024
279#define VFS_LOOKUP        	0x03010090
280#define IES_action        	0x050b0018
281#define IES_filter        	0x050b001c
282#define TES_action        	0x050c0010
283#define CQ_action         	0x050d0018
284#define CPUPM_CPUSTER_RUNCOUNT	0x05310144
285
286#define BSC_exit          	0x040C0004
287#define BSC_thread_terminate	0x040c05a4
288
289#define DBG_FUNC_MASK	~(DBG_FUNC_START | DBG_FUNC_END)
290
291#define CPU_NUMBER(kp)	kdbg_get_cpu(kp)
292
293#define EMPTYSTRING	""
294
295
296const char *fault_name[] = {
297	"",
298	"ZeroFill",
299	"PageIn",
300	"COW",
301	"CacheHit",
302	"NoZeroFill",
303	"Guard",
304	"PageInFile",
305	"PageInAnon"
306};
307
308const char *sched_reasons[] = {
309	"N",
310	"P",
311	"Q",
312	"?",
313	"u",
314	"U",
315	"?",
316	"?",
317	"H",
318	"?",
319	"?",
320	"?",
321	"?",
322	"?",
323	"?",
324	"?",
325	"Y"
326};
327
328#define ARRAYSIZE(x) ((int)(sizeof(x) / sizeof(*x)))
329#define MAX_REASON ARRAYSIZE(sched_reasons)
330
331static double handle_decrementer(kd_buf *, int);
332static kd_buf *log_decrementer(kd_buf *kd_beg, kd_buf *kd_end, kd_buf *end_of_sample, double i_latency);
333static void read_command_map(void);
334static void enter_syscall(FILE *fp, kd_buf *kd, int thread, int type, char *command, uint64_t now, uint64_t idelta, uint64_t start_bias, int print_info);
335static void exit_syscall(FILE *fp, kd_buf *kd, int thread, int type, char *command, uint64_t now, uint64_t idelta, uint64_t start_bias, int print_info);
336static void print_entry(FILE *fp, kd_buf *kd, int thread, int type, char *command, uint64_t now, uint64_t idelta, uint64_t start_bias, kd_buf *kd_note);
337static void log_info(uint64_t now, uint64_t idelta, uint64_t start_bias, kd_buf *kd, kd_buf *kd_note);
338static char *find_code(int);
339static void pc_to_string(char *pcstring, uintptr_t pc, int max_len, int mode);
340static void getdivisor(void);
341static int sample_sc(void);
342static void init_code_file(void);
343static void do_kernel_nm(void);
344static void open_logfile(const char*);
345static int binary_search(kern_sym_t *list, int low, int high, uintptr_t addr);
346
347static void create_map_entry(uintptr_t, char *);
348static void check_for_thread_update(uintptr_t thread, int debugid_base, kd_buf *kbufp, char **command);
349static void log_scheduler(kd_buf *kd_start, kd_buf *kd_stop, kd_buf *end_of_sample, double s_latency, uintptr_t thread);
350static int check_for_scheduler_latency(int type, uintptr_t *thread, uint64_t now, kd_buf *kd, kd_buf **kd_start, double *latency);
351static void open_rawfile(const char *path);
352
353static void screen_update(FILE *);
354
355static void set_enable(int);
356static void set_remove(void);
357
358static int
359quit(char *s)
360{
361	if (!RAW_flag) {
362		if (trace_enabled) {
363			set_enable(0);
364		}
365		/*
366		 *  This flag is turned off when calling
367		 * quit() due to a set_remove() failure.
368		 */
369		if (set_remove_flag) {
370			set_remove();
371		}
372	}
373	printf("latency: ");
374	if (s) {
375		printf("%s", s);
376	}
377	exit(1);
378}
379
380void
381set_enable(int val)
382{
383	int mib[] = { CTL_KERN, KERN_KDEBUG, KERN_KDENABLE, val };
384	size_t needed;
385
386	if (sysctl(mib, ARRAYSIZE(mib), NULL, &needed, NULL, 0) < 0) {
387		quit("trace facility failure, KERN_KDENABLE\n");
388	}
389}
390
391void
392set_numbufs(int nbufs)
393{
394	int mib1[] = { CTL_KERN, KERN_KDEBUG, KERN_KDSETBUF, nbufs };
395	int mib2[] = { CTL_KERN, KERN_KDEBUG, KERN_KDSETUP };
396	size_t needed;
397
398	if (sysctl(mib1, ARRAYSIZE(mib1), NULL, &needed, NULL, 0) < 0) {
399		quit("trace facility failure, KERN_KDSETBUF\n");
400	}
401	if (sysctl(mib2, ARRAYSIZE(mib2), NULL, &needed, NULL, 0) < 0) {
402		quit("trace facility failure, KERN_KDSETUP\n");
403	}
404}
405
406void
407set_pidexclude(int pid, int on_off)
408{
409	int mib[] = { CTL_KERN, KERN_KDEBUG, KERN_KDPIDEX };
410	size_t needed = sizeof(kd_regtype);
411
412	kd_regtype kr = {
413		.type = KDBG_TYPENONE,
414		.value1 = pid,
415		.value2 = on_off
416	};
417
418	sysctl(mib, ARRAYSIZE(mib), &kr, &needed, NULL, 0);
419}
420
421void
422get_bufinfo(kbufinfo_t *val)
423{
424	int mib[] = { CTL_KERN, KERN_KDEBUG, KERN_KDGETBUF };
425	size_t needed = sizeof (*val);
426
427	if (sysctl(mib, ARRAYSIZE(mib), val, &needed, 0, 0) < 0) {
428		quit("trace facility failure, KERN_KDGETBUF\n");
429	}
430}
431
432void
433set_remove(void)
434{
435	int mib[] = { CTL_KERN, KERN_KDEBUG, KERN_KDREMOVE };
436	size_t needed;
437
438	errno = 0;
439
440	if (sysctl(mib, ARRAYSIZE(mib), NULL, &needed, NULL, 0) < 0) {
441		set_remove_flag = 0;
442		if (errno == EBUSY) {
443			quit("the trace facility is currently in use...\n         fs_usage, sc_usage, and latency use this feature.\n\n");
444		} else {
445			quit("trace facility failure, KERN_KDREMOVE\n");
446		}
447	}
448}
449
450
451void
452write_high_res_latencies(void)
453{
454	int i;
455	FILE *f;
456
457	if (use_high_res_bins) {
458		if ((f = fopen("latencies.csv","w"))) {
459			for (i = 0; i < N_HIGH_RES_BINS; i++) {
460				fprintf(f, "%d,%d\n", i, i_high_res_bins[i]);
461			}
462			fclose(f);
463		}
464	}
465}
466
467void
468sigintr(int signo __attribute__((unused)))
469{
470	write_high_res_latencies();
471
472	set_enable(0);
473	set_pidexclude(getpid(), 0);
474	screen_update(log_fp);
475	endwin();
476	set_remove();
477
478	exit(1);
479}
480
481/* exit under normal conditions -- signal handler */
482void
483leave(int signo __attribute__((unused)))
484{
485	write_high_res_latencies();
486
487	set_enable(0);
488	set_pidexclude(getpid(), 0);
489	endwin();
490	set_remove();
491
492	exit(1);
493}
494
495void
496sigwinch(int signo __attribute__((unused)))
497{
498	gotSIGWINCH = 1;
499}
500
501void
502print_total(FILE *fp, char *s, int total)
503{
504	int  cpu;
505	int  clen;
506	int  itotal;
507	struct i_latencies *il;
508	char tbuf[512];
509
510	for (itotal = 0, cpu = 0; cpu < num_i_latency_cpus; cpu++) {
511		il = &i_lat[cpu];
512		itotal += il->i_total;
513	}
514	clen = sprintf(tbuf, "%s  %10d      %9d", s, total, itotal);
515
516	for (cpu = 0; cpu < num_i_latency_cpus; cpu++) {
517		il = &i_lat[cpu];
518
519		if (i_latency_per_cpu == TRUE) {
520			clen += sprintf(&tbuf[clen], " %9d", il->i_total);
521		}
522
523		il->i_total = 0;
524	}
525	sprintf(&tbuf[clen], "\n");
526	if (fp) {
527		fprintf(fp, "%s", tbuf);
528	} else {
529		printw(tbuf);
530	}
531}
532
533
534
535void
536screen_update(FILE *fp)
537{
538	int i;
539	int cpu;
540	int clen;
541	int itotal, stotal;
542	int elapsed_secs;
543	int elapsed_mins;
544	int elapsed_hours;
545	int min_lat, max_lat;
546	uint64_t tot_lat;
547	unsigned int average_s_latency;
548	unsigned int average_i_latency;
549	struct i_latencies *il;
550	char tbuf[1024];
551
552	if (fp == NULL) {
553		erase();
554		move(0, 0);
555	} else {
556		fprintf(fp,"\n\n===================================================================================================\n");
557	}
558	/*
559	 *  Display the current time.
560	 *  "ctime" always returns a string that looks like this:
561	 *
562	 *	Sun Sep 16 01:03:52 1973
563	 *      012345678901234567890123
564	 *	          1         2
565	 *
566	 *  We want indices 11 thru 18 (length 8).
567	 */
568	if (RAW_flag) {
569		curr_time = sample_TOD_secs;
570		elapsed_secs = ((last_now - first_now) / divisor) / 1000000;
571	} else {
572		elapsed_secs = curr_time - start_time;
573	}
574
575	elapsed_hours = elapsed_secs / 3600;
576	elapsed_secs -= elapsed_hours * 3600;
577	elapsed_mins = elapsed_secs / 60;
578	elapsed_secs -= elapsed_mins * 60;
579
580	sprintf(tbuf, "%-19.19s                            %2ld:%02ld:%02ld\n", &(ctime(&curr_time)[0]),
581		(long)elapsed_hours, (long)elapsed_mins, (long)elapsed_secs);
582	if (fp) {
583		fprintf(fp, "%s", tbuf);
584	} else {
585		printw(tbuf);
586	}
587
588	sprintf(tbuf, "                     SCHEDULER     INTERRUPTS\n");
589	if (fp) {
590		fprintf(fp, "%s", tbuf);
591	} else {
592		printw(tbuf);
593	}
594
595	if (i_latency_per_cpu == TRUE) {
596		clen = sprintf(tbuf, "                                        Total");
597
598		for (cpu = 0; cpu < num_i_latency_cpus; cpu++) {
599			if (cpu <= 9) {
600				clen += sprintf(&tbuf[clen], "     CPU %d", cpu);
601			} else {
602				clen += sprintf(&tbuf[clen], "    CPU %d", cpu);
603			}
604		}
605		if (fp) {
606			fprintf(fp, "%s", tbuf);
607		} else {
608			printw(tbuf);
609		}
610
611		clen = sprintf(tbuf, "\n-------------------------------------------------------");
612
613		for (cpu = 1; cpu < num_i_latency_cpus; cpu++) {
614			clen += sprintf(&tbuf[clen], "----------");
615		}
616		if (fp) {
617			fprintf(fp, "%s", tbuf);
618		} else {
619			printw(tbuf);
620		}
621	} else {
622		sprintf(tbuf, "---------------------------------------------");
623		if (fp) {
624			fprintf(fp, "%s", tbuf);
625		} else {
626			printw(tbuf);
627		}
628	}
629	for (itotal = 0, cpu = 0; cpu < num_i_latency_cpus; cpu++) {
630		il = &i_lat[cpu];
631		itotal += il->i_total_samples;
632	}
633	clen = sprintf(tbuf, "\ntotal_samples       %10d      %9d", s_total_samples, itotal);
634
635	if (i_latency_per_cpu == TRUE) {
636		for (cpu = 0; cpu < num_i_latency_cpus; cpu++) {
637			il = &i_lat[cpu];
638
639			clen += sprintf(&tbuf[clen], " %9d", il->i_total_samples);
640		}
641	}
642	sprintf(&tbuf[clen], "\n");
643	if (fp) {
644		fprintf(fp, "%s", tbuf);
645	} else {
646		printw(tbuf);
647	}
648
649
650	for (stotal = 0, i = 0; i < 10; i++) {
651		for (itotal = 0, cpu = 0; cpu < num_i_latency_cpus; cpu++) {
652			il = &i_lat[cpu];
653
654			itotal += il->i_usec_10_bins[i];
655			il->i_total += il->i_usec_10_bins[i];
656		}
657		clen = sprintf(tbuf, "\ndelays < %3d usecs  %10d      %9d", (i + 1) * 10, s_usec_10_bins[i], itotal);
658
659		stotal += s_usec_10_bins[i];
660
661		if (i_latency_per_cpu == TRUE) {
662			for (cpu = 0; cpu < num_i_latency_cpus; cpu++) {
663				il = &i_lat[cpu];
664
665				clen += sprintf(&tbuf[clen], " %9d", il->i_usec_10_bins[i]);
666			}
667		}
668		if (fp) {
669			fprintf(fp, "%s", tbuf);
670		} else {
671			printw(tbuf);
672		}
673	}
674	print_total(fp, "\ntotal  < 100 usecs", stotal);
675
676	for (stotal = 0, i = 1; i < 10; i++) {
677		for (itotal = 0, cpu = 0; cpu < num_i_latency_cpus; cpu++) {
678			il = &i_lat[cpu];
679
680			itotal += il->i_usec_100_bins[i];
681			il->i_total += il->i_usec_100_bins[i];
682		}
683		if (i < 9) {
684			clen = sprintf(tbuf, "\ndelays < %3d usecs  %10d      %9d", (i + 1) * 100, s_usec_100_bins[i], itotal);
685		} else {
686			clen = sprintf(tbuf, "\ndelays <   1 msec   %10d      %9d", s_usec_100_bins[i], itotal);
687		}
688
689		stotal += s_usec_100_bins[i];
690
691		if (i_latency_per_cpu == TRUE) {
692			for (cpu = 0; cpu < num_i_latency_cpus; cpu++) {
693				il = &i_lat[cpu];
694
695				clen += sprintf(&tbuf[clen], " %9d", il->i_usec_100_bins[i]);
696			}
697		}
698		if (fp) {
699			fprintf(fp, "%s", tbuf);
700		} else {
701			printw(tbuf);
702		}
703	}
704	print_total(fp, "\ntotal  <   1 msec ", stotal);
705
706
707	for (stotal = 0, i = 1; i < 10; i++) {
708		for (itotal = 0, cpu = 0; cpu < num_i_latency_cpus; cpu++) {
709			il = &i_lat[cpu];
710
711			itotal += il->i_msec_1_bins[i];
712			il->i_total += il->i_msec_1_bins[i];
713		}
714		clen = sprintf(tbuf, "\ndelays < %3d msecs  %10d      %9d", (i + 1), s_msec_1_bins[i], itotal);
715
716		stotal += s_msec_1_bins[i];
717
718		if (i_latency_per_cpu == TRUE) {
719			for (cpu = 0; cpu < num_i_latency_cpus; cpu++) {
720				il = &i_lat[cpu];
721
722				clen += sprintf(&tbuf[clen], " %9d", il->i_msec_1_bins[i]);
723			}
724		}
725		if (fp) {
726			fprintf(fp, "%s", tbuf);
727		} else {
728			printw(tbuf);
729		}
730	}
731	print_total(fp, "\ntotal  <  10 msecs", stotal);
732
733	for (stotal = 0, i = 1; i < 5; i++) {
734		for (itotal = 0, cpu = 0; cpu < num_i_latency_cpus; cpu++) {
735			il = &i_lat[cpu];
736
737			itotal += il->i_msec_10_bins[i];
738			il->i_total += il->i_msec_10_bins[i];
739		}
740		clen = sprintf(tbuf, "\ndelays < %3d msecs  %10d      %9d", (i + 1)*10, s_msec_10_bins[i], itotal);
741
742		stotal += s_msec_10_bins[i];
743
744		if (i_latency_per_cpu == TRUE) {
745			for (cpu = 0; cpu < num_i_latency_cpus; cpu++) {
746				il = &i_lat[cpu];
747
748				clen += sprintf(&tbuf[clen], " %9d", il->i_msec_10_bins[i]);
749			}
750		}
751		if (fp) {
752			fprintf(fp, "%s", tbuf);
753		} else {
754			printw(tbuf);
755		}
756	}
757	print_total(fp, "\ntotal  <  50 msecs", stotal);
758
759
760	for (itotal = 0, cpu = 0; cpu < num_i_latency_cpus; cpu++) {
761		il = &i_lat[cpu];
762		itotal += il->i_too_slow;
763	}
764	clen = sprintf(tbuf, "\ndelays >  50 msecs  %10d      %9d", s_too_slow, itotal);
765
766	if (i_latency_per_cpu == TRUE) {
767		for (cpu = 0; cpu < num_i_latency_cpus; cpu++) {
768			il = &i_lat[cpu];
769
770			clen += sprintf(&tbuf[clen], " %9d", il->i_too_slow);
771		}
772	}
773	if (fp) {
774		fprintf(fp, "%s", tbuf);
775	} else {
776		printw(tbuf);
777	}
778
779	for (cpu = 0; cpu < num_i_latency_cpus; cpu++) {
780		il = &i_lat[cpu];
781
782		if (cpu == 0 || (il->i_min_latency < min_lat)) {
783			min_lat = il->i_min_latency;
784		}
785	}
786	clen = sprintf(tbuf, "\n\nminimum latency(usecs) %7d      %9d", s_min_latency, min_lat);
787
788	if (i_latency_per_cpu == TRUE) {
789		for (cpu = 0; cpu < num_i_latency_cpus; cpu++) {
790			il = &i_lat[cpu];
791
792			clen += sprintf(&tbuf[clen], " %9d", il->i_min_latency);
793		}
794	}
795	if (fp) {
796		fprintf(fp, "%s", tbuf);
797	} else {
798		printw(tbuf);
799	}
800
801
802	for (cpu = 0; cpu < num_i_latency_cpus; cpu++) {
803		il = &i_lat[cpu];
804
805		if (cpu == 0 || (il->i_max_latency > max_lat)) {
806			max_lat = il->i_max_latency;
807		}
808	}
809	clen = sprintf(tbuf, "\nmaximum latency(usecs) %7d      %9d", s_max_latency, max_lat);
810
811	if (i_latency_per_cpu == TRUE) {
812		for (cpu = 0; cpu < num_i_latency_cpus; cpu++) {
813			il = &i_lat[cpu];
814
815			clen += sprintf(&tbuf[clen], " %9d", il->i_max_latency);
816		}
817	}
818	if (fp) {
819		fprintf(fp, "%s", tbuf);
820	} else {
821		printw(tbuf);
822	}
823
824	if (s_total_samples) {
825		average_s_latency = (unsigned int)(s_total_latency/s_total_samples);
826	} else {
827		average_s_latency = 0;
828	}
829
830	for (itotal = 0, tot_lat = 0, cpu = 0; cpu < num_i_latency_cpus; cpu++) {
831		il = &i_lat[cpu];
832
833		itotal += il->i_total_samples;
834		tot_lat += il->i_total_latency;
835	}
836	if (itotal) {
837		average_i_latency = (unsigned)(tot_lat/itotal);
838	} else {
839		average_i_latency = 0;
840	}
841
842	clen = sprintf(tbuf, "\naverage latency(usecs) %7d      %9d", average_s_latency, average_i_latency);
843
844	if (i_latency_per_cpu == TRUE) {
845		for (cpu = 0; cpu < num_i_latency_cpus; cpu++) {
846			il = &i_lat[cpu];
847
848			if (il->i_total_samples) {
849				average_i_latency = (unsigned int)(il->i_total_latency/il->i_total_samples);
850			} else {
851				average_i_latency = 0;
852			}
853
854			clen += sprintf(&tbuf[clen], " %9d", average_i_latency);
855		}
856	}
857	if (fp) {
858		fprintf(fp, "%s", tbuf);
859	} else {
860		printw(tbuf);
861	}
862
863	for (itotal = 0, cpu = 0; cpu < num_i_latency_cpus; cpu++) {
864		il = &i_lat[cpu];
865
866		itotal += il->i_exceeded_threshold;
867	}
868	clen = sprintf(tbuf, "\nexceeded threshold     %7d      %9d", s_exceeded_threshold, itotal);
869
870	if (i_latency_per_cpu == TRUE) {
871		for (cpu = 0; cpu < num_i_latency_cpus; cpu++) {
872			il = &i_lat[cpu];
873
874			clen += sprintf(&tbuf[clen], " %9d", il->i_exceeded_threshold);
875		}
876	}
877	sprintf(&tbuf[clen], "\n");
878
879	if (fp) {
880		fprintf(fp, "%s", tbuf);
881	} else {
882		printw(tbuf);
883	}
884
885	if (fp == NULL) {
886		refresh();
887	} else {
888		fflush(fp);
889	}
890}
891
892int
893exit_usage(void)
894{
895	fprintf(stderr, "Usage: latency [-p priority] [-h] [-m] [-st threshold] [-it threshold]\n");
896	fprintf(stderr, "               [-c codefile] [-l logfile] [-R rawfile] [-n kernel]\n\n");
897
898	fprintf(stderr, "  -p    specify scheduling priority to watch... default is realtime\n");
899	fprintf(stderr, "  -h    Display high resolution interrupt latencies and write them to latencies.csv (truncate existing file) upon exit.\n");
900	fprintf(stderr, "  -st   set scheduler latency threshold in microseconds... if latency exceeds this, then log trace\n");
901	fprintf(stderr, "  -m    specify per-CPU interrupt latency reporting\n");
902	fprintf(stderr, "  -it   set interrupt latency threshold in microseconds... if latency exceeds this, then log trace\n");
903	fprintf(stderr, "  -c    specify name of codes file... default is /usr/share/misc/trace.codes\n");
904	fprintf(stderr, "  -l    specify name of file to log trace entries to when the specified threshold is exceeded\n");
905	fprintf(stderr, "  -R    specify name of raw trace file to process\n");
906	fprintf(stderr, "  -n    specify kernel... default is /mach_kernel\n");
907
908	fprintf(stderr, "\nlatency must be run as root\n\n");
909
910	exit(1);
911}
912
913
914int
915main(int argc, char *argv[])
916{
917	int i;
918
919	if (0 != reexec_to_match_kernel()) {
920		fprintf(stderr, "Could not re-execute: %d\n", errno);
921		exit(1);
922	}
923	while (argc > 1) {
924
925		if (strcmp(argv[1], "-R") == 0) {
926			argc--;
927			argv++;
928
929			if (argc > 1) {
930				open_rawfile(argv[1]);
931			} else {
932				exit_usage();
933			}
934
935			RAW_flag = 1;
936
937		} else if (strcmp(argv[1], "-p") == 0) {
938			argc--;
939			argv++;
940
941			if (argc > 1) {
942				watch_priority = atoi(argv[1]);
943			} else {
944				exit_usage();
945			}
946		} else if (strcmp(argv[1], "-st") == 0) {
947			argc--;
948			argv++;
949
950			if (argc > 1) {
951				s_thresh_hold = atoi(argv[1]);
952			} else {
953				exit_usage();
954			}
955		} else if (strcmp(argv[1], "-it") == 0) {
956			argc--;
957			argv++;
958
959			if (argc > 1) {
960				i_thresh_hold = atoi(argv[1]);
961			} else {
962				exit_usage();
963			}
964		} else if (strcmp(argv[1], "-c") == 0) {
965			argc--;
966			argv++;
967
968			if (argc > 1) {
969				code_file = argv[1];
970			} else {
971				exit_usage();
972			}
973		} else if (strcmp(argv[1], "-l") == 0) {
974			argc--;
975			argv++;
976
977			if (argc > 1) {
978				open_logfile(argv[1]);
979			} else {
980				exit_usage();
981			}
982		} else if (strcmp(argv[1], "-n") == 0) {
983			argc--;
984			argv++;
985
986			if (argc > 1) {
987				kernelpath = argv[1];
988			} else {
989				exit_usage();
990			}
991		} else if (strcmp(argv[1], "-h") == 0) {
992			use_high_res_bins = TRUE;
993
994		} else if (strcmp(argv[1], "-m") == 0) {
995			i_latency_per_cpu = TRUE;
996
997		} else {
998		        exit_usage();
999		}
1000
1001		argc--;
1002		argv++;
1003	}
1004	if (!RAW_flag) {
1005		if (geteuid() != 0) {
1006			printf("'latency' must be run as root...\n");
1007			exit(1);
1008		}
1009	}
1010	if (kernelpath == NULL) {
1011		kernelpath = "/mach_kernel";
1012	}
1013
1014	if (code_file == NULL) {
1015		code_file = "/usr/share/misc/trace.codes";
1016	}
1017
1018	do_kernel_nm();
1019
1020	getdivisor();
1021
1022	init_code_file();
1023
1024	if (!RAW_flag) {
1025		if (initscr() == NULL) {
1026			printf("Unrecognized TERM type, try vt100\n");
1027			exit(1);
1028		}
1029		clear();
1030		refresh();
1031
1032		signal(SIGWINCH, sigwinch);
1033		signal(SIGINT, sigintr);
1034		signal(SIGQUIT, leave);
1035		signal(SIGTERM, leave);
1036		signal(SIGHUP, leave);
1037
1038		/*
1039		 * grab the number of cpus and scale the buffer size
1040		 */
1041		int mib[] = { CTL_HW, HW_NCPU };
1042		size_t len = sizeof(num_cpus);
1043
1044		sysctl(mib, ARRAYSIZE(mib), &num_cpus, &len, NULL, 0);
1045
1046		set_remove();
1047		set_numbufs(SAMPLE_SIZE * num_cpus);
1048
1049		get_bufinfo(&bufinfo);
1050
1051		set_enable(0);
1052
1053		set_pidexclude(getpid(), 1);
1054		set_enable(1);
1055
1056		num_entries = bufinfo.nkdbufs;
1057	} else {
1058		num_entries = 50000;
1059		num_cpus    = 128;
1060	}
1061
1062	for (cpu_mask = 0, i = 0; i < num_cpus; i++)
1063		cpu_mask |= ((uint64_t)1 << i);
1064
1065	if ((my_buffer = malloc(num_entries * sizeof(kd_buf))) == NULL) {
1066		quit("can't allocate memory for tracing info\n");
1067	}
1068
1069	if ((last_decrementer_kd = (kd_buf **)malloc(num_cpus * sizeof(kd_buf *))) == NULL) {
1070		quit("can't allocate memory for decrementer tracing info\n");
1071	}
1072
1073	if (i_latency_per_cpu == FALSE) {
1074		num_i_latency_cpus = 1;
1075	} else {
1076		num_i_latency_cpus = num_cpus;
1077	}
1078
1079	if ((i_lat = (struct i_latencies *)malloc(num_i_latency_cpus * sizeof(struct i_latencies))) == NULL) {
1080		quit("can't allocate memory for interrupt latency info\n");
1081	}
1082
1083	bzero((char *)i_lat, num_i_latency_cpus * sizeof(struct i_latencies));
1084
1085	if (RAW_flag) {
1086		while (sample_sc()) {
1087			continue;
1088		}
1089
1090		if (log_fp) {
1091			screen_update(log_fp);
1092		}
1093
1094		screen_update(stdout);
1095
1096	} else {
1097		uint64_t adelay;
1098		double	fdelay;
1099		double	nanosecs_to_sleep;
1100
1101		nanosecs_to_sleep = (double)(SAMPLE_TIME_USECS * 1000);
1102		fdelay = nanosecs_to_sleep * (divisor /1000);
1103		adelay = (uint64_t)fdelay;
1104
1105		trace_enabled = 1;
1106
1107		start_time = time(NULL);
1108		refresh_time = start_time;
1109
1110		for (;;) {
1111			curr_time = time(NULL);
1112
1113			if (curr_time >= refresh_time) {
1114				screen_update(NULL);
1115				refresh_time = curr_time + 1;
1116			}
1117			mach_wait_until(mach_absolute_time() + adelay);
1118
1119			sample_sc();
1120
1121			if (gotSIGWINCH) {
1122				/*
1123				 * No need to check for initscr error return.
1124				 * We won't get here if it fails on the first call.
1125				 */
1126				endwin();
1127				clear();
1128				refresh();
1129
1130				gotSIGWINCH = 0;
1131			}
1132		}
1133	}
1134}
1135
1136
1137
1138void
1139read_command_map(void)
1140{
1141	kd_threadmap *mapptr = 0;
1142	int	total_threads = 0;
1143	size_t	size;
1144	off_t	offset;
1145	int	i;
1146	RAW_header header = {0};
1147
1148	if (RAW_flag) {
1149                if (read(RAW_fd, &header, sizeof(RAW_header)) != sizeof(RAW_header)) {
1150                        perror("read failed");
1151			exit(2);
1152                }
1153		if (header.version_no != RAW_VERSION1) {
1154			header.version_no = RAW_VERSION0;
1155			header.TOD_secs = time(NULL);
1156			header.TOD_usecs = 0;
1157
1158			lseek(RAW_fd, (off_t)0, SEEK_SET);
1159
1160			if (read(RAW_fd, &header.thread_count, sizeof(int)) != sizeof(int)) {
1161				perror("read failed");
1162				exit(2);
1163			}
1164		}
1165                total_threads = header.thread_count;
1166
1167		sample_TOD_secs = header.TOD_secs;
1168		sample_TOD_usecs = header.TOD_usecs;
1169
1170		if (total_threads == 0 && header.version_no != RAW_VERSION0) {
1171			offset = lseek(RAW_fd, (off_t)0, SEEK_CUR);
1172			offset = (offset + (4095)) & ~4095;
1173
1174			lseek(RAW_fd, offset, SEEK_SET);
1175		}
1176	} else {
1177		total_threads = bufinfo.nkdthreads;
1178	}
1179
1180	size = total_threads * sizeof(kd_threadmap);
1181
1182	if (size == 0 || ((mapptr = (kd_threadmap *) malloc(size)) == 0)) {
1183		return;
1184	}
1185	bzero (mapptr, size);
1186
1187	/*
1188	 * Now read the threadmap
1189	 */
1190	if (RAW_flag) {
1191		if (read(RAW_fd, mapptr, size) != size) {
1192			printf("Can't read the thread map -- this is not fatal\n");
1193		}
1194		if (header.version_no != RAW_VERSION0) {
1195			offset = lseek(RAW_fd, (off_t)0, SEEK_CUR);
1196			offset = (offset + (4095)) & ~4095;
1197
1198			lseek(RAW_fd, offset, SEEK_SET);
1199		}
1200	} else {
1201		int mib[] = { CTL_KERN, KERN_KDEBUG, KERN_KDTHRMAP};
1202		if (sysctl(mib, ARRAYSIZE(mib), mapptr, &size, NULL, 0) < 0) {
1203			/*
1204			 * This is not fatal -- just means I cant map command strings
1205			 */
1206			printf("Can't read the thread map -- this is not fatal\n");
1207
1208			total_threads = 0;
1209		}
1210	}
1211	for (i = 0; i < total_threads; i++) {
1212		create_map_entry(mapptr[i].thread, &mapptr[i].command[0]);
1213	}
1214	free(mapptr);
1215}
1216
1217void
1218create_map_entry(uintptr_t thread, char *command)
1219{
1220	threadmap_t tme;
1221
1222	if ((tme = threadmap_freelist)) {
1223		threadmap_freelist = tme->tm_next;
1224	} else {
1225		tme = (threadmap_t)malloc(sizeof(struct threadmap));
1226	}
1227
1228	tme->tm_thread = thread;
1229
1230	(void)strncpy (tme->tm_command, command, MAXCOMLEN);
1231	tme->tm_command[MAXCOMLEN] = '\0';
1232	tme->tm_orig_command[0] = '\0';
1233
1234	int hashid = thread & HASH_MASK;
1235
1236	tme->tm_next = threadmap_hash[hashid];
1237	threadmap_hash[hashid] = tme;
1238}
1239
1240void
1241delete_thread_entry(uintptr_t thread)
1242{
1243	threadmap_t tme;
1244
1245	int hashid = thread & HASH_MASK;
1246
1247	if ((tme = threadmap_hash[hashid])) {
1248		if (tme->tm_thread == thread) {
1249			threadmap_hash[hashid] = tme->tm_next;
1250		} else {
1251			threadmap_t tme_prev = tme;
1252
1253			for (tme = tme->tm_next; tme; tme = tme->tm_next) {
1254				if (tme->tm_thread == thread) {
1255					tme_prev->tm_next = tme->tm_next;
1256					break;
1257				}
1258				tme_prev = tme;
1259			}
1260		}
1261		if (tme) {
1262			tme->tm_next = threadmap_freelist;
1263			threadmap_freelist = tme;
1264		}
1265	}
1266}
1267
1268void
1269find_and_insert_tmp_map_entry(uintptr_t pthread, char *command)
1270{
1271	threadmap_t tme;
1272
1273	if ((tme = threadmap_temp)) {
1274		if (tme->tm_pthread == pthread) {
1275			threadmap_temp = tme->tm_next;
1276		} else {
1277			threadmap_t tme_prev = tme;
1278
1279			for (tme = tme->tm_next; tme; tme = tme->tm_next) {
1280				if (tme->tm_pthread == pthread) {
1281					tme_prev->tm_next = tme->tm_next;
1282					break;
1283				}
1284				tme_prev = tme;
1285			}
1286		}
1287		if (tme) {
1288			(void)strncpy (tme->tm_command, command, MAXCOMLEN);
1289			tme->tm_command[MAXCOMLEN] = '\0';
1290			tme->tm_orig_command[0] = '\0';
1291
1292			int hashid = tme->tm_thread & HASH_MASK;
1293			tme->tm_next = threadmap_hash[hashid];
1294			threadmap_hash[hashid] = tme;
1295		}
1296	}
1297}
1298
1299void
1300create_tmp_map_entry(uintptr_t thread, uintptr_t pthread)
1301{
1302	threadmap_t tme;
1303
1304	if ((tme = threadmap_freelist)) {
1305		threadmap_freelist = tme->tm_next;
1306	} else {
1307		tme = malloc(sizeof(struct threadmap));
1308	}
1309
1310	tme->tm_thread = thread;
1311	tme->tm_pthread = pthread;
1312	tme->tm_command[0] = '\0';
1313	tme->tm_orig_command[0] = '\0';
1314
1315	tme->tm_next = threadmap_temp;
1316	threadmap_temp = tme;
1317}
1318
1319threadmap_t
1320find_thread_entry(uintptr_t thread)
1321{
1322	threadmap_t tme;
1323
1324	int hashid = thread & HASH_MASK;
1325
1326	for (tme = threadmap_hash[hashid]; tme; tme = tme->tm_next) {
1327		if (tme->tm_thread == thread) {
1328			return tme;
1329		}
1330	}
1331	return 0;
1332}
1333
1334void
1335find_thread_name(uintptr_t thread, char **command)
1336{
1337	threadmap_t     tme;
1338
1339	if ((tme = find_thread_entry(thread))) {
1340		*command = tme->tm_command;
1341	} else {
1342		*command = EMPTYSTRING;
1343	}
1344}
1345
1346void
1347add_thread_entry_to_list(thread_entry_t *list, uintptr_t thread)
1348{
1349	thread_entry_t	te;
1350
1351	if ((te = thread_entry_freelist)) {
1352		thread_entry_freelist = te->te_next;
1353	} else {
1354		te = (thread_entry_t)malloc(sizeof(struct thread_entry));
1355	}
1356
1357	te->te_thread = thread;
1358	te->te_next = *list;
1359	*list = te;
1360}
1361
1362void
1363exec_thread_entry(uintptr_t thread, char *command)
1364{
1365	threadmap_t	tme;
1366
1367	if ((tme = find_thread_entry(thread))) {
1368		if (tme->tm_orig_command[0] == '\0') {
1369			(void)strncpy (tme->tm_orig_command, tme->tm_command, MAXCOMLEN);
1370			tme->tm_orig_command[MAXCOMLEN] = '\0';
1371		}
1372		(void)strncpy (tme->tm_command, command, MAXCOMLEN);
1373		tme->tm_command[MAXCOMLEN] = '\0';
1374
1375		add_thread_entry_to_list(&thread_reset_list, thread);
1376	} else {
1377		create_map_entry(thread, command);
1378	}
1379}
1380
1381void
1382record_thread_entry_for_gc(uintptr_t thread)
1383{
1384	add_thread_entry_to_list(&thread_delete_list, thread);
1385}
1386
1387void
1388gc_thread_entries(void)
1389{
1390	thread_entry_t te;
1391	thread_entry_t te_next;
1392	int count = 0;
1393
1394	for (te = thread_delete_list; te; te = te_next) {
1395		delete_thread_entry(te->te_thread);
1396
1397		te_next = te->te_next;
1398		te->te_next = thread_entry_freelist;
1399		thread_entry_freelist = te;
1400
1401		count++;
1402	}
1403	thread_delete_list = 0;
1404}
1405
1406void
1407gc_reset_entries(void)
1408{
1409	thread_entry_t te;
1410	thread_entry_t te_next;
1411	int count = 0;
1412
1413	for (te = thread_reset_list; te; te = te_next) {
1414		te_next = te->te_next;
1415		te->te_next = thread_entry_freelist;
1416		thread_entry_freelist = te;
1417
1418		count++;
1419	}
1420	thread_reset_list = 0;
1421}
1422
1423void
1424reset_thread_names(void)
1425{
1426	thread_entry_t te;
1427	thread_entry_t te_next;
1428	int count = 0;
1429
1430	for (te = thread_reset_list; te; te = te_next) {
1431		threadmap_t     tme;
1432
1433		if ((tme = find_thread_entry(te->te_thread))) {
1434			if (tme->tm_orig_command[0]) {
1435				(void)strncpy (tme->tm_command, tme->tm_orig_command, MAXCOMLEN);
1436				tme->tm_command[MAXCOMLEN] = '\0';
1437				tme->tm_orig_command[0] = '\0';
1438			}
1439		}
1440		te_next = te->te_next;
1441		te->te_next = thread_entry_freelist;
1442		thread_entry_freelist = te;
1443
1444		count++;
1445	}
1446	thread_reset_list = 0;
1447}
1448
1449void
1450delete_all_thread_entries(void)
1451{
1452	threadmap_t tme = 0;
1453	threadmap_t tme_next = 0;
1454	int i;
1455
1456	for (i = 0; i < HASH_SIZE; i++) {
1457		for (tme = threadmap_hash[i]; tme; tme = tme_next) {
1458			tme_next = tme->tm_next;
1459                        tme->tm_next = threadmap_freelist;
1460			threadmap_freelist = tme;
1461		}
1462		threadmap_hash[i] = 0;
1463	}
1464}
1465
1466
1467
1468
1469static void
1470insert_run_event(uintptr_t thread, kd_buf *kd, uint64_t now)
1471{
1472	threadrun_t	trp;
1473
1474	int hashid = thread & HASH_MASK;
1475
1476	for (trp = threadrun_hash[hashid]; trp; trp = trp->tr_next) {
1477		if (trp->tr_thread == thread) {
1478			break;
1479		}
1480	}
1481	if (trp == NULL) {
1482		if ((trp = threadrun_freelist)) {
1483			threadrun_freelist = trp->tr_next;
1484		} else {
1485			trp = (threadrun_t)malloc(sizeof(struct threadrun));
1486		}
1487
1488		trp->tr_thread = thread;
1489
1490		trp->tr_next = threadrun_hash[hashid];
1491		threadrun_hash[hashid] = trp;
1492
1493		add_thread_entry_to_list(&thread_run_list, thread);
1494	}
1495	trp->tr_entry = kd;
1496	trp->tr_timestamp = now;
1497}
1498
1499static threadrun_t
1500find_run_event(uintptr_t thread)
1501{
1502	threadrun_t trp;
1503	int hashid = thread & HASH_MASK;
1504
1505	for (trp = threadrun_hash[hashid]; trp; trp = trp->tr_next) {
1506		if (trp->tr_thread == thread) {
1507			return trp;
1508		}
1509	}
1510	return 0;
1511}
1512
1513static void
1514delete_run_event(uintptr_t thread)
1515{
1516	threadrun_t	trp = 0;
1517	threadrun_t trp_prev;
1518
1519	int hashid = thread & HASH_MASK;
1520
1521	if ((trp = threadrun_hash[hashid])) {
1522		if (trp->tr_thread == thread) {
1523			threadrun_hash[hashid] = trp->tr_next;
1524		} else {
1525			trp_prev = trp;
1526
1527			for (trp = trp->tr_next; trp; trp = trp->tr_next) {
1528				if (trp->tr_thread == thread) {
1529					trp_prev->tr_next = trp->tr_next;
1530					break;
1531				}
1532				trp_prev = trp;
1533			}
1534		}
1535		if (trp) {
1536			trp->tr_next = threadrun_freelist;
1537			threadrun_freelist = trp;
1538		}
1539	}
1540}
1541
1542static void
1543gc_run_events(void) {
1544	thread_entry_t te;
1545	thread_entry_t te_next;
1546	threadrun_t	trp;
1547	threadrun_t	trp_next;
1548	int count = 0;
1549
1550	for (te = thread_run_list; te; te = te_next) {
1551		int hashid = te->te_thread & HASH_MASK;
1552
1553		for (trp = threadrun_hash[hashid]; trp; trp = trp_next) {
1554			trp_next = trp->tr_next;
1555			trp->tr_next = threadrun_freelist;
1556			threadrun_freelist = trp;
1557			count++;
1558		}
1559		threadrun_hash[hashid] = 0;
1560
1561		te_next = te->te_next;
1562		te->te_next = thread_entry_freelist;
1563		thread_entry_freelist = te;
1564	}
1565	thread_run_list = 0;
1566}
1567
1568
1569
1570static void
1571insert_start_event(uintptr_t thread, int type, uint64_t now)
1572{
1573	event_t evp;
1574
1575	int hashid = thread & HASH_MASK;
1576
1577	for (evp = event_hash[hashid]; evp; evp = evp->ev_next) {
1578		if (evp->ev_thread == thread && evp->ev_type == type) {
1579			break;
1580		}
1581	}
1582	if (evp == NULL) {
1583		if ((evp = event_freelist)) {
1584			event_freelist = evp->ev_next;
1585		} else {
1586			evp = (event_t)malloc(sizeof(struct event));
1587		}
1588
1589		evp->ev_thread = thread;
1590		evp->ev_type = type;
1591
1592		evp->ev_next = event_hash[hashid];
1593		event_hash[hashid] = evp;
1594
1595		add_thread_entry_to_list(&thread_event_list, thread);
1596	}
1597	evp->ev_timestamp = now;
1598}
1599
1600
1601static uint64_t
1602consume_start_event(uintptr_t thread, int type, uint64_t now)
1603{
1604	event_t evp;
1605	event_t evp_prev;
1606	uint64_t elapsed = 0;
1607
1608	int hashid = thread & HASH_MASK;
1609
1610	if ((evp = event_hash[hashid])) {
1611		if (evp->ev_thread == thread && evp->ev_type == type) {
1612			event_hash[hashid] = evp->ev_next;
1613		} else {
1614			evp_prev = evp;
1615
1616			for (evp = evp->ev_next; evp; evp = evp->ev_next) {
1617				if (evp->ev_thread == thread && evp->ev_type == type) {
1618					evp_prev->ev_next = evp->ev_next;
1619					break;
1620				}
1621				evp_prev = evp;
1622			}
1623		}
1624		if (evp) {
1625			elapsed = now - evp->ev_timestamp;
1626
1627			if (now < evp->ev_timestamp) {
1628				printf("consume: now = %qd,  timestamp = %qd\n", now, evp->ev_timestamp);
1629				elapsed = 0;
1630			}
1631			evp->ev_next = event_freelist;
1632			event_freelist = evp;
1633		}
1634	}
1635	return elapsed;
1636}
1637
1638static void
1639gc_start_events(void)
1640{
1641	thread_entry_t	te;
1642	thread_entry_t	te_next;
1643	event_t         evp;
1644	event_t         evp_next;
1645	int		count = 0;
1646        int		hashid;
1647
1648	for (te = thread_event_list; te; te = te_next) {
1649
1650		hashid = te->te_thread & HASH_MASK;
1651
1652		for (evp = event_hash[hashid]; evp; evp = evp_next) {
1653			evp_next = evp->ev_next;
1654                        evp->ev_next = event_freelist;
1655                        event_freelist = evp;
1656			count++;
1657		}
1658		event_hash[hashid] = 0;
1659
1660		te_next = te->te_next;
1661		te->te_next = thread_entry_freelist;
1662		thread_entry_freelist = te;
1663	}
1664	thread_event_list = 0;
1665}
1666
1667int
1668thread_in_user_mode(uintptr_t thread, char *command)
1669{
1670	event_t evp;
1671
1672	if (strcmp(command, "kernel_task") == 0) {
1673		return 0;
1674	}
1675
1676	int hashid = thread & HASH_MASK;
1677
1678	for (evp = event_hash[hashid]; evp; evp = evp->ev_next) {
1679		if (evp->ev_thread == thread) {
1680			return 0;
1681		}
1682	}
1683	return 1;
1684}
1685
1686
1687
1688static lookup_t
1689handle_lookup_event(uintptr_t thread, int debugid, kd_buf *kdp)
1690{
1691	lookup_t lkp;
1692	boolean_t first_record = FALSE;
1693
1694	int hashid = thread & HASH_MASK;
1695
1696	if (debugid & DBG_FUNC_START) {
1697		first_record = TRUE;
1698	}
1699
1700	for (lkp = lookup_hash[hashid]; lkp; lkp = lkp->lk_next) {
1701		if (lkp->lk_thread == thread) {
1702			break;
1703		}
1704	}
1705	if (lkp == NULL) {
1706		if (first_record == FALSE) {
1707			return 0;
1708		}
1709
1710		if ((lkp = lookup_freelist)) {
1711			lookup_freelist = lkp->lk_next;
1712		} else {
1713			lkp = (lookup_t)malloc(sizeof(struct lookup));
1714		}
1715
1716		lkp->lk_thread = thread;
1717
1718		lkp->lk_next = lookup_hash[hashid];
1719		lookup_hash[hashid] = lkp;
1720
1721		add_thread_entry_to_list(&thread_lookup_list, thread);
1722	}
1723
1724	if (first_record == TRUE) {
1725		lkp->lk_pathptr = lkp->lk_pathname;
1726		lkp->lk_dvp = kdp->arg1;
1727	} else {
1728		if (lkp->lk_pathptr > &lkp->lk_pathname[NUMPARMS-4]) {
1729			return lkp;
1730		}
1731		*lkp->lk_pathptr++ = kdp->arg1;
1732	}
1733	*lkp->lk_pathptr++ = kdp->arg2;
1734	*lkp->lk_pathptr++ = kdp->arg3;
1735	*lkp->lk_pathptr++ = kdp->arg4;
1736	*lkp->lk_pathptr = 0;
1737
1738	if (debugid & DBG_FUNC_END) {
1739		return lkp;
1740	}
1741
1742	return 0;
1743}
1744
1745static void
1746delete_lookup_event(uintptr_t thread, lookup_t lkp_to_delete)
1747{
1748	lookup_t	lkp;
1749	lookup_t	lkp_prev;
1750	int		hashid;
1751
1752	hashid = thread & HASH_MASK;
1753
1754	if ((lkp = lookup_hash[hashid])) {
1755		if (lkp == lkp_to_delete) {
1756			lookup_hash[hashid] = lkp->lk_next;
1757		} else {
1758			lkp_prev = lkp;
1759
1760			for (lkp = lkp->lk_next; lkp; lkp = lkp->lk_next) {
1761				if (lkp == lkp_to_delete) {
1762					lkp_prev->lk_next = lkp->lk_next;
1763					break;
1764				}
1765				lkp_prev = lkp;
1766			}
1767		}
1768		if (lkp) {
1769			lkp->lk_next = lookup_freelist;
1770			lookup_freelist = lkp;
1771		}
1772	}
1773}
1774
1775static void
1776gc_lookup_events(void) {
1777	thread_entry_t	te;
1778	thread_entry_t	te_next;
1779	lookup_t	lkp;
1780	lookup_t	lkp_next;
1781	int		count = 0;
1782        int		hashid;
1783
1784	for (te = thread_lookup_list; te; te = te_next) {
1785		hashid = te->te_thread & HASH_MASK;
1786
1787		for (lkp = lookup_hash[hashid]; lkp; lkp = lkp_next) {
1788			lkp_next = lkp->lk_next;
1789                        lkp->lk_next = lookup_freelist;
1790                        lookup_freelist = lkp;
1791			count++;
1792		}
1793		lookup_hash[hashid] = 0;
1794
1795		te_next = te->te_next;
1796		te->te_next = thread_entry_freelist;
1797		thread_entry_freelist = te;
1798	}
1799	thread_lookup_list = 0;
1800}
1801
1802int
1803sample_sc(void)
1804{
1805	kd_buf	*kd, *end_of_sample;
1806	int	keep_going = 1;
1807	int	count, i;
1808
1809	if (!RAW_flag) {
1810		/*
1811		 * Get kernel buffer information
1812		 */
1813		get_bufinfo(&bufinfo);
1814	}
1815	if (need_new_map) {
1816		delete_all_thread_entries();
1817	        read_command_map();
1818		need_new_map = 0;
1819	}
1820	if (RAW_flag) {
1821		uint32_t bytes_read;
1822
1823		bytes_read = read(RAW_fd, my_buffer, num_entries * sizeof(kd_buf));
1824
1825		if (bytes_read == -1) {
1826			perror("read failed");
1827			exit(2);
1828		}
1829		count = bytes_read / sizeof(kd_buf);
1830
1831		if (count != num_entries) {
1832			keep_going = 0;
1833		}
1834
1835		if (first_read) {
1836			kd = (kd_buf *)my_buffer;
1837			first_now = kd->timestamp & KDBG_TIMESTAMP_MASK;
1838			first_read = 0;
1839		}
1840
1841	} else {
1842		int mib[] = { CTL_KERN, KERN_KDEBUG, KERN_KDREADTR };
1843		size_t needed = bufinfo.nkdbufs * sizeof(kd_buf);
1844
1845		if (sysctl(mib, ARRAYSIZE(mib), my_buffer, &needed, NULL, 0) < 0) {
1846			quit("trace facility failure, KERN_KDREADTR\n");
1847		}
1848
1849		count = needed;
1850		sample_generation++;
1851
1852		if (bufinfo.flags & KDBG_WRAPPED) {
1853			need_new_map = 1;
1854
1855			if (log_fp) {
1856				fprintf(log_fp, "\n\n%-19.19s   sample = %d   <<<<<<< trace buffer wrapped >>>>>>>\n\n",
1857					&(ctime(&curr_time)[0]), sample_generation);
1858			}
1859			set_enable(0);
1860			set_enable(1);
1861		}
1862	}
1863	end_of_sample = &((kd_buf *)my_buffer)[count];
1864
1865	/*
1866	 * Always reinitialize the DECR_TRAP array
1867	 */
1868	for (i = 0; i < num_cpus; i++) {
1869	      last_decrementer_kd[i] = (kd_buf *)my_buffer;
1870	}
1871
1872	for (kd = (kd_buf *)my_buffer; kd < end_of_sample; kd++) {
1873		kd_buf *kd_start;
1874		uintptr_t thread = kd->arg5;
1875		int	type = kd->debugid & DBG_FUNC_MASK;
1876
1877		(void)check_for_thread_update(thread, type, kd, NULL);
1878
1879		uint64_t now = kd->timestamp & KDBG_TIMESTAMP_MASK;
1880		last_now = now;
1881
1882		if (type == DECR_TRAP) {
1883			int cpunum = CPU_NUMBER(kd);
1884			double i_latency = handle_decrementer(kd, cpunum);
1885
1886			if (log_fp) {
1887				if (i_thresh_hold && (int)i_latency > i_thresh_hold) {
1888					kd_start = last_decrementer_kd[cpunum];
1889
1890					log_decrementer(kd_start, kd, end_of_sample, i_latency);
1891				}
1892				last_decrementer_kd[cpunum] = kd;
1893			}
1894		} else {
1895			double s_latency;
1896			if (check_for_scheduler_latency(type, &thread, now, kd, &kd_start, &s_latency)) {
1897				log_scheduler(kd_start, kd, end_of_sample, s_latency, thread);
1898			}
1899		}
1900	}
1901	if (log_fp) {
1902		fflush(log_fp);
1903	}
1904
1905	gc_thread_entries();
1906	gc_reset_entries();
1907	gc_run_events();
1908
1909	return keep_going;
1910}
1911
1912
1913
1914void
1915enter_syscall(FILE *fp, kd_buf *kd, int thread, int type, char *command, uint64_t now, uint64_t idelta, uint64_t start_bias, int print_info)
1916{
1917	char	*p;
1918	double	timestamp;
1919	double	delta;
1920	char	pcstring[128];
1921
1922	int cpunum = CPU_NUMBER(kd);
1923
1924	if (print_info && fp) {
1925		timestamp = (double)(now - start_bias) / divisor;
1926		delta = (double)idelta / divisor;
1927
1928		if ((p = find_code(type))) {
1929			if (type == INTERRUPT) {
1930				int mode;
1931
1932				if (kd->arg3) {
1933					mode = USER_MODE;
1934				} else {
1935					mode = KERNEL_MODE;
1936				}
1937
1938				pc_to_string(&pcstring[0], kd->arg2, 58, mode);
1939
1940				fprintf(fp, "%9.1f %8.1f\t\tINTERRUPT[%2lx] @ %-58.58s                       %8x   %2d  %s\n",
1941					timestamp, delta, kd->arg1, &pcstring[0], thread, cpunum, command);
1942			} else if (type == MACH_vmfault) {
1943				fprintf(fp, "%9.1f %8.1f\t\t%-28.28s                                                                     %8x   %2d  %s\n",
1944					timestamp, delta, p, thread, cpunum, command);
1945			} else {
1946				fprintf(fp, "%9.1f %8.1f\t\t%-28.28s %-16lx %-16lx %-16lx %-16lx %8x   %2d  %s\n",
1947					timestamp, delta, p, kd->arg1, kd->arg2, kd->arg3, kd->arg4,
1948					thread, cpunum, command);
1949			}
1950		} else {
1951			fprintf(fp, "%9.1f %8.1f\t\t%-8x                     %-16lx %-16lx %-16lx %-16lx %8x   %2d  %s\n",
1952				timestamp, delta, type, kd->arg1, kd->arg2, kd->arg3, kd->arg4,
1953				thread, cpunum, command);
1954	       }
1955	}
1956	if (type != BSC_thread_terminate && type != BSC_exit) {
1957		insert_start_event(thread, type, now);
1958	}
1959}
1960
1961
1962void
1963exit_syscall(FILE *fp, kd_buf *kd, int thread, int type, char *command, uint64_t now, uint64_t idelta, uint64_t start_bias, int print_info)
1964{
1965	char   *p;
1966	uint64_t user_addr;
1967	double	timestamp;
1968	double	delta;
1969	double  elapsed_timestamp;
1970
1971	elapsed_timestamp = (double)consume_start_event(thread, type, now) / divisor;
1972
1973	if (print_info && fp) {
1974		int cpunum = CPU_NUMBER(kd);
1975
1976		timestamp = (double)(now - start_bias) / divisor;
1977		delta = (double)idelta / divisor;
1978
1979		fprintf(fp, "%9.1f %8.1f(%.1f) \t", timestamp, delta, elapsed_timestamp);
1980
1981		if ((p = find_code(type))) {
1982			if (type == INTERRUPT) {
1983				fprintf(fp, "INTERRUPT                                                                                        %8x   %2d  %s\n", thread, cpunum, command);
1984			} else if (type == MACH_vmfault && kd->arg4 <= DBG_PAGEIND_FAULT) {
1985				user_addr = ((uint64_t)kd->arg1 << 32) | (uint32_t)kd->arg2;
1986
1987			    fprintf(fp, "%-28.28s %-10.10s   %-16qx                                       %8x   %2d  %s\n",
1988					p, fault_name[kd->arg4], user_addr,
1989					thread, cpunum, command);
1990		       } else {
1991				fprintf(fp, "%-28.28s %-16lx %-16lx                                   %8x   %2d  %s\n",
1992					p, kd->arg1, kd->arg2,
1993					thread, cpunum, command);
1994		       }
1995		} else {
1996			fprintf(fp, "%-8x                     %-16lx %-16lx                                   %8x   %2d  %s\n",
1997				type, kd->arg1, kd->arg2,
1998				thread, cpunum, command);
1999		}
2000	}
2001}
2002
2003
2004void
2005print_entry(FILE *fp, kd_buf *kd, int thread, int type, char *command, uint64_t now, uint64_t idelta, uint64_t start_bias, kd_buf *kd_note)
2006{
2007	char	*p;
2008
2009	if (!fp) {
2010		return;
2011	}
2012
2013	int cpunum = CPU_NUMBER(kd);
2014
2015	double timestamp = (double)(now - start_bias) / divisor;
2016	double delta = (double)idelta / divisor;
2017
2018	if ((p = find_code(type))) {
2019		if (kd == kd_note) {
2020			fprintf(fp, "%9.1f %8.1f\t**\t", timestamp, delta);
2021		} else {
2022			fprintf(fp, "%9.1f %8.1f\t\t", timestamp, delta);
2023		}
2024		fprintf(fp, "%-28.28s %-16lx %-16lx %-16lx %-16lx %8x   %2d  %s\n",
2025			p, kd->arg1, kd->arg2, kd->arg3, kd->arg4, thread, cpunum, command);
2026	} else {
2027		fprintf(fp, "%9.1f %8.1f\t\t%-8x                     %-16lx %-16lx %-16lx %-16lx %8x   %2d  %s\n",
2028			timestamp, delta, type, kd->arg1, kd->arg2, kd->arg3, kd->arg4,
2029			thread, cpunum, command);
2030	}
2031}
2032
2033
2034void
2035check_for_thread_update(uintptr_t thread, int debugid_base, kd_buf *kbufp, char **command)
2036{
2037	if (debugid_base == TRACE_DATA_NEWTHREAD) {
2038		/*
2039		 * Save the create thread data
2040		 */
2041		create_tmp_map_entry(kbufp->arg1, thread);
2042	} else if (debugid_base == TRACE_STRING_NEWTHREAD) {
2043		/*
2044		 * process new map entry
2045		 */
2046		find_and_insert_tmp_map_entry(thread, (char *)&kbufp->arg1);
2047	} else if (debugid_base == TRACE_STRING_EXEC) {
2048		exec_thread_entry(thread, (char *)&kbufp->arg1);
2049	} else {
2050		if (debugid_base == BSC_exit || debugid_base == BSC_thread_terminate) {
2051			record_thread_entry_for_gc(thread);
2052		}
2053		if (command) {
2054			find_thread_name(thread, command);
2055		}
2056	}
2057}
2058
2059
2060void
2061log_info(uint64_t now, uint64_t idelta, uint64_t start_bias, kd_buf *kd, kd_buf *kd_note)
2062{
2063	lookup_t	lkp;
2064	int		mode;
2065	int		reason;
2066	char		*p;
2067	char		*command;
2068	char		*command1;
2069	char		command_buf[32];
2070	char		sched_info[64];
2071	char		pcstring[128];
2072	const char *sched_reason;
2073	double		i_latency;
2074	double		timestamp;
2075	double		delta;
2076	char joe[32];
2077
2078	int thread  = kd->arg5;
2079	int cpunum	= CPU_NUMBER(kd);
2080	int debugid = kd->debugid;
2081	int type    = kd->debugid & DBG_FUNC_MASK;
2082
2083	(void)check_for_thread_update(thread, type, kd, &command);
2084
2085	if ((type >> 24) == DBG_TRACE) {
2086		if (((type >> 16) & 0xff) != DBG_TRACE_INFO) {
2087			return;
2088		}
2089	}
2090	timestamp = (double)(now - start_bias) / divisor;
2091	delta = (double)idelta / divisor;
2092
2093	switch (type) {
2094
2095		case CQ_action:
2096			pc_to_string(&pcstring[0], kd->arg1, 84, KERNEL_MODE);
2097
2098			fprintf(log_fp, "%9.1f %8.1f\t\tCQ_action @ %-84.84s %8x   %2d  %s\n",
2099				timestamp, delta, &pcstring[0], thread, cpunum, command);
2100			break;
2101
2102		case TES_action:
2103			pc_to_string(&pcstring[0], kd->arg1, 83, KERNEL_MODE);
2104
2105			fprintf(log_fp, "%9.1f %8.1f\t\tTES_action @ %-83.83s %8x   %2d  %s\n",
2106				timestamp, delta, &pcstring[0], thread, cpunum, command);
2107			break;
2108
2109		case IES_action:
2110			pc_to_string(&pcstring[0], kd->arg1, 83, KERNEL_MODE);
2111
2112			fprintf(log_fp, "%9.1f %8.1f\t\tIES_action @ %-83.83s %8x   %2d  %s\n",
2113				timestamp, delta, &pcstring[0], thread, cpunum, command);
2114			break;
2115
2116		case IES_filter:
2117			pc_to_string(&pcstring[0], kd->arg1, 83, KERNEL_MODE);
2118
2119			fprintf(log_fp, "%9.1f %8.1f\t\tIES_filter @ %-83.83s %8x   %2d  %s\n",
2120				timestamp, delta, &pcstring[0], thread, cpunum, command);
2121			break;
2122
2123		case DECR_TRAP:
2124			if ((int)kd->arg1 >= 0) {
2125				i_latency = 0;
2126			} else {
2127				i_latency = (((double)(-1 - kd->arg1)) / divisor);
2128			}
2129
2130			if (i_thresh_hold && (int)i_latency > i_thresh_hold) {
2131				p = "*";
2132			} else {
2133				p = " ";
2134			}
2135
2136			if (kd->arg3) {
2137				mode = USER_MODE;
2138			} else {
2139				mode = KERNEL_MODE;
2140			}
2141
2142			pc_to_string(&pcstring[0], kd->arg2, 84, mode);
2143
2144			fprintf(log_fp, "%9.1f %8.1f[%.1f]%s\tDECR_TRAP @ %-84.84s %8x   %2d  %s\n",
2145					timestamp, delta, i_latency, p, &pcstring[0], thread, cpunum, command);
2146			break;
2147
2148		case DECR_SET:
2149			fprintf(log_fp, "%9.1f %8.1f[%.1f]  \t%-28.28s                                                                     %8x   %2d  %s\n",
2150					timestamp, delta, (double)kd->arg1/divisor, "DECR_SET", thread, cpunum, command);
2151			break;
2152
2153		case MACH_sched:
2154		case MACH_stkhandoff:
2155
2156			find_thread_name(kd->arg2, &command1);
2157
2158			if (command1 == EMPTYSTRING) {
2159				command1 = command_buf;
2160				sprintf(command1, "%-8lx", kd->arg2);
2161			}
2162			if (thread_in_user_mode(kd->arg2, command1)) {
2163				p = "U";
2164			} else {
2165				p = "K";
2166			}
2167
2168			reason = kd->arg1;
2169
2170			if (reason > MAX_REASON) {
2171				sched_reason = "?";
2172			} else {
2173				sched_reason = sched_reasons[reason];
2174			}
2175
2176			if (sched_reason[0] == '?') {
2177				sprintf(joe, "%x", reason);
2178				sched_reason = joe;
2179			}
2180			sprintf(sched_info, "%16.16s @ pri %3lu  -->  %16.16s @ pri %3lu%s", command, kd->arg3, command1, kd->arg4, p);
2181
2182			fprintf(log_fp, "%9.1f %8.1f\t\t%-10.10s[%s]     %s                   %8x   %2d\n",
2183				timestamp, delta, "MACH_SCHED", sched_reason, sched_info, thread, cpunum);
2184			break;
2185
2186		case VFS_LOOKUP:
2187			if ((lkp = handle_lookup_event(thread, debugid, kd))) {
2188				/*
2189				 * print the tail end of the pathname
2190				 */
2191				p = (char *)lkp->lk_pathname;
2192				int clen = strlen(p);
2193
2194				if (clen > 45) {
2195					clen -= 45;
2196				} else {
2197					clen = 0;
2198				}
2199
2200				fprintf(log_fp, "%9.1f %8.1f\t\t%-14.14s %-59s    %-16lx   %8x   %2d  %s\n",
2201					timestamp, delta, "VFS_LOOKUP",
2202					&p[clen], lkp->lk_dvp, thread, cpunum, command);
2203
2204				delete_lookup_event(thread, lkp);
2205			}
2206			break;
2207
2208		default:
2209			if (debugid & DBG_FUNC_START) {
2210				enter_syscall(log_fp, kd, thread, type, command, now, idelta, start_bias, 1);
2211			} else if (debugid & DBG_FUNC_END) {
2212				exit_syscall(log_fp, kd, thread, type, command, now, idelta, start_bias, 1);
2213			} else {
2214				print_entry(log_fp, kd, thread, type, command, now, idelta, start_bias, kd_note);
2215			}
2216			break;
2217	}
2218}
2219
2220
2221
2222void
2223log_range(kd_buf *kd_buffer, kd_buf *kd_start, kd_buf *kd_stop, kd_buf *kd_note, char *buf1)
2224{
2225	uint64_t last_timestamp = 0;
2226	uint64_t delta = 0;
2227	uint64_t start_bias = 0;
2228	uint64_t now;
2229	kd_buf	*kd;
2230	int	clen;
2231	char buf2[128];
2232
2233	clen = strlen(buf1);
2234	memset(buf2, '-', clen);
2235	buf2[clen] = 0;
2236	fprintf(log_fp, "\n\n%s\n", buf2);
2237	fprintf(log_fp, "%s\n\n", buf1);
2238
2239	fprintf(log_fp, "RelTime(Us)  Delta              debugid                      arg1             arg2             arg3             arg4               thread  cpu  command\n\n");
2240
2241	reset_thread_names();
2242
2243	last_timestamp = kd_start->timestamp & KDBG_TIMESTAMP_MASK;
2244	start_bias = last_timestamp;
2245
2246	for (kd = kd_buffer; kd <= kd_stop; kd++) {
2247		now = kd->timestamp & KDBG_TIMESTAMP_MASK;
2248
2249		if (kd >= kd_start) {
2250			delta = now - last_timestamp;
2251
2252			log_info(now, delta, start_bias, kd, kd_note);
2253
2254			last_timestamp = now;
2255		} else {
2256			int	debugid = kd->debugid;
2257			int	thread = kd->arg5;
2258			int	type = kd->debugid & DBG_FUNC_MASK;
2259
2260			if ((type >> 24) == DBG_TRACE) {
2261				if (((type >> 16) & 0xff) != DBG_TRACE_INFO) {
2262					continue;
2263				}
2264			}
2265			if (type == BSC_thread_terminate || type == BSC_exit) {
2266				continue;
2267			}
2268
2269			if (debugid & DBG_FUNC_START) {
2270				insert_start_event(thread, type, now);
2271			} else if (debugid & DBG_FUNC_END) {
2272				(void)consume_start_event(thread, type, now);
2273			}
2274		}
2275	}
2276	gc_start_events();
2277	gc_lookup_events();
2278}
2279
2280
2281kd_buf *
2282log_decrementer(kd_buf *kd_beg, kd_buf *kd_end, kd_buf *end_of_sample, double i_latency)
2283{
2284	kd_buf *kd_start, *kd_stop;
2285	int kd_count; /* Limit the boundary of kd_start */
2286	uint64_t now;
2287	double sample_timestamp;
2288	char buf1[128];
2289
2290	int thread = kd_beg->arg5;
2291	int cpunum = CPU_NUMBER(kd_end);
2292
2293	for (kd_count = 0, kd_start = kd_beg - 1; (kd_start >= (kd_buf *)my_buffer); kd_start--, kd_count++) {
2294		if (kd_count == MAX_LOG_COUNT) {
2295		        break;
2296		}
2297
2298		if (CPU_NUMBER(kd_start) != cpunum) {
2299		        continue;
2300		}
2301
2302		if ((kd_start->debugid & DBG_FUNC_MASK) == DECR_TRAP) {
2303			break;
2304		}
2305
2306		if (kd_start->arg5 != thread) {
2307			break;
2308		}
2309	}
2310	if (kd_start < (kd_buf *)my_buffer) {
2311		kd_start = (kd_buf *)my_buffer;
2312	}
2313
2314	thread = kd_end->arg5;
2315
2316	for (kd_stop = kd_end + 1; kd_stop < end_of_sample; kd_stop++) {
2317		if (CPU_NUMBER(kd_stop) != cpunum) {
2318			continue;
2319		}
2320
2321		if ((kd_stop->debugid & DBG_FUNC_MASK) == INTERRUPT) {
2322			break;
2323		}
2324
2325		if (kd_stop->arg5 != thread) {
2326			break;
2327		}
2328	}
2329	if (kd_stop >= end_of_sample) {
2330		kd_stop = end_of_sample - 1;
2331	}
2332
2333	if (RAW_flag) {
2334		time_t TOD_secs;
2335		uint64_t TOD_usecs;
2336
2337		now = kd_start->timestamp & KDBG_TIMESTAMP_MASK;
2338		sample_timestamp = (double)(now - first_now) / divisor;
2339
2340		TOD_usecs = (uint64_t)sample_timestamp;
2341		TOD_secs = sample_TOD_secs + ((sample_TOD_usecs + TOD_usecs) / 1000000);
2342
2343		sprintf(buf1, "%-19.19s     interrupt latency = %.1fus [timestamp %.1f]", ctime(&TOD_secs), i_latency, sample_timestamp);
2344	} else {
2345		sprintf(buf1, "%-19.19s     interrupt latency = %.1fus [sample %d]", &(ctime(&curr_time)[0]), i_latency, sample_generation);
2346	}
2347
2348	log_range((kd_buf *)my_buffer, kd_start, kd_stop, 0, buf1);
2349
2350	return kd_stop;
2351}
2352
2353
2354void
2355log_scheduler(kd_buf *kd_beg, kd_buf *kd_end, kd_buf *end_of_sample, double s_latency, uintptr_t thread)
2356{
2357	kd_buf *kd_start, *kd_stop;
2358	uint64_t now;
2359	int      count;
2360	int	 cpunum;
2361	uint64_t cmask = 0;
2362	double sample_timestamp;
2363	char buf1[128];
2364
2365	for (count = 0, kd_start = kd_beg; (kd_start >= (kd_buf *)my_buffer); kd_start--) {
2366		cpunum = CPU_NUMBER(kd_start);
2367
2368		cmask |= ((uint64_t)1 << cpunum);
2369
2370		if (cmask == cpu_mask) {
2371			if (count++ > 100)
2372				break;
2373		}
2374	}
2375	if (kd_start < (kd_buf *)my_buffer) {
2376		kd_start = (kd_buf *)my_buffer;
2377	}
2378
2379	for (kd_stop = kd_end + 1; kd_stop < end_of_sample; kd_stop++) {
2380		if (kd_stop->arg5 == thread) {
2381			break;
2382		}
2383	}
2384	if (kd_stop >= end_of_sample) {
2385	        kd_stop = end_of_sample - 1;
2386	}
2387
2388	if (RAW_flag) {
2389		time_t	TOD_secs;
2390		uint64_t TOD_usecs;
2391
2392		now = kd_start->timestamp & KDBG_TIMESTAMP_MASK;
2393		sample_timestamp = (double)(now - first_now) / divisor;
2394
2395		TOD_usecs = (uint64_t)sample_timestamp;
2396		TOD_secs = sample_TOD_secs + ((sample_TOD_usecs + TOD_usecs) / 1000000);
2397
2398		sprintf(buf1, "%-19.19s     priority = %d,  scheduling latency = %.1fus [timestamp %.1f]", ctime(&TOD_secs), watch_priority, s_latency, sample_timestamp);
2399	} else {
2400		sprintf(buf1, "%-19.19s     priority = %d,  scheduling latency = %.1fus [sample %d]", &(ctime(&curr_time)[0]), watch_priority, s_latency, sample_generation);
2401	}
2402
2403	log_range((kd_buf *)my_buffer, kd_start, kd_stop, kd_beg, buf1);
2404}
2405
2406
2407
2408int
2409check_for_scheduler_latency(int type, uintptr_t *thread, uint64_t now, kd_buf *kd, kd_buf **kd_start, double *latency)
2410{
2411	int found_latency = 0;
2412
2413	if (type == MACH_makerunnable) {
2414		if (watch_priority == kd->arg2) {
2415			insert_run_event(kd->arg1, kd, now);
2416		}
2417	} else if (type == MACH_sched || type == MACH_stkhandoff) {
2418		threadrun_t	trp;
2419
2420		if (type == MACH_sched || type == MACH_stkhandoff) {
2421			*thread = kd->arg2;
2422		}
2423
2424		if ((trp = find_run_event(*thread))) {
2425			double d_s_latency = (((double)(now - trp->tr_timestamp)) / divisor);
2426			int s_latency = (int)d_s_latency;
2427
2428			if (s_latency) {
2429				if (s_latency < 100) {
2430					s_usec_10_bins[s_latency/10]++;
2431				}
2432				if (s_latency < 1000) {
2433					s_usec_100_bins[s_latency/100]++;
2434				} else if (s_latency < 10000) {
2435					s_msec_1_bins[s_latency/1000]++;
2436				} else if (s_latency < 50000) {
2437					s_msec_10_bins[s_latency/10000]++;
2438				} else {
2439					s_too_slow++;
2440				}
2441
2442				if (s_latency > s_max_latency) {
2443					s_max_latency = s_latency;
2444				}
2445				if (s_latency < s_min_latency || s_total_samples == 0) {
2446					s_min_latency = s_latency;
2447				}
2448				s_total_latency += s_latency;
2449				s_total_samples++;
2450
2451				if (s_thresh_hold && s_latency > s_thresh_hold) {
2452					s_exceeded_threshold++;
2453
2454					if (log_fp) {
2455						*kd_start = trp->tr_entry;
2456						*latency = d_s_latency;
2457						found_latency = 1;
2458					}
2459				}
2460			}
2461			delete_run_event(*thread);
2462		}
2463	}
2464	return found_latency;
2465}
2466
2467
2468double
2469handle_decrementer(kd_buf *kd, int cpunum)
2470{
2471	struct i_latencies *il;
2472	double latency;
2473	long   elapsed_usecs;
2474
2475	if (i_latency_per_cpu == FALSE) {
2476		cpunum = 0;
2477	}
2478
2479	il = &i_lat[cpunum];
2480
2481	if ((long)(kd->arg1) >= 0) {
2482		latency = 1;
2483	} else {
2484		latency = (((double)(-1 - kd->arg1)) / divisor);
2485	}
2486	elapsed_usecs = (long)latency;
2487
2488	if (elapsed_usecs < 100) {
2489		il->i_usec_10_bins[elapsed_usecs/10]++;
2490	}
2491
2492	if (elapsed_usecs < 1000) {
2493		il->i_usec_100_bins[elapsed_usecs/100]++;
2494	} else if (elapsed_usecs < 10000) {
2495		il->i_msec_1_bins[elapsed_usecs/1000]++;
2496	} else if (elapsed_usecs < 50000) {
2497		il->i_msec_10_bins[elapsed_usecs/10000]++;
2498	} else {
2499		il->i_too_slow++;
2500	}
2501
2502	if (use_high_res_bins && elapsed_usecs < N_HIGH_RES_BINS) {
2503		i_high_res_bins[elapsed_usecs]++;
2504	}
2505	if (i_thresh_hold && elapsed_usecs > i_thresh_hold) {
2506	        il->i_exceeded_threshold++;
2507	}
2508	if (elapsed_usecs > il->i_max_latency) {
2509	        il->i_max_latency = elapsed_usecs;
2510	}
2511	if (elapsed_usecs < il->i_min_latency || il->i_total_samples == 0) {
2512	        il->i_min_latency = elapsed_usecs;
2513	}
2514	il->i_total_latency += elapsed_usecs;
2515	il->i_total_samples++;
2516
2517	return latency;
2518}
2519
2520
2521
2522char *
2523find_code(int type)
2524{
2525	int i;
2526	for (i = 0; i < num_of_codes; i++) {
2527		if (codes_tab[i].type == type) {
2528			return codes_tab[i].name;
2529		}
2530	}
2531	return NULL;
2532}
2533
2534
2535void
2536init_code_file(void)
2537{
2538	FILE *fp;
2539	int i;
2540
2541	if ((fp = fopen(code_file, "r")) == NULL) {
2542		if (log_fp) {
2543			fprintf(log_fp, "open of %s failed\n", code_file);
2544		}
2545		return;
2546	}
2547	for (i = 0; i < MAX_ENTRIES; i++) {
2548		int code;
2549		char name[128];
2550		int n = fscanf(fp, "%x%127s\n", &code, name);
2551
2552		if (n == 1 && i == 0) {
2553			/*
2554			 * old code file format, just skip
2555			 */
2556			continue;
2557		}
2558		if (n != 2) {
2559			break;
2560		}
2561
2562		strncpy(codes_tab[i].name, name, 32);
2563		codes_tab[i].type = code;
2564	}
2565	num_of_codes = i;
2566
2567	fclose(fp);
2568}
2569
2570
2571void
2572do_kernel_nm(void)
2573{
2574	int i, len;
2575	FILE *fp = NULL;
2576	char tmp_nm_file[128];
2577	char tmpstr[1024];
2578	char inchr;
2579
2580	bzero(tmp_nm_file, 128);
2581	bzero(tmpstr, 1024);
2582
2583	/*
2584	 * Build the temporary nm file path
2585	 */
2586	strcpy(tmp_nm_file,"/tmp/knm.out.XXXXXX");
2587
2588	if (!mktemp(tmp_nm_file)) {
2589		fprintf(stderr, "Error in mktemp call\n");
2590		return;
2591	}
2592
2593	/*
2594	 * Build the nm command and create a tmp file with the output
2595	 */
2596	sprintf (tmpstr, "/usr/bin/nm -f -n -s __TEXT __text %s > %s",
2597		 kernelpath, tmp_nm_file);
2598	system(tmpstr);
2599
2600	/*
2601	 * Parse the output from the nm command
2602	 */
2603	if ((fp = fopen(tmp_nm_file, "r")) == NULL) {
2604		/* Hmmm, let's not treat this as fatal */
2605		fprintf(stderr, "Failed to open nm symbol file [%s]\n", tmp_nm_file);
2606		return;
2607	}
2608	/*
2609	 * Count the number of symbols in the nm symbol table
2610	 */
2611	kern_sym_count = 0;
2612
2613	while ((inchr = getc(fp)) != -1) {
2614		if (inchr == '\n') {
2615			kern_sym_count++;
2616		}
2617	}
2618	rewind(fp);
2619
2620	/*
2621	 * Malloc the space for symbol table
2622	 */
2623	if (kern_sym_count > 0) {
2624		kern_sym_tbl = malloc(kern_sym_count * sizeof(kern_sym_t));
2625
2626		if (!kern_sym_tbl) {
2627			/*
2628			 * Hmmm, lets not treat this as fatal
2629			 */
2630			fprintf(stderr, "Can't allocate memory for kernel symbol table\n");
2631		} else {
2632			bzero(kern_sym_tbl, kern_sym_count * sizeof(kern_sym_t));
2633		}
2634	} else {
2635		/*
2636		 * Hmmm, lets not treat this as fatal
2637		 */
2638		fprintf(stderr, "No kernel symbol table \n");
2639	}
2640	for (i = 0; i < kern_sym_count; i++) {
2641		bzero(tmpstr, 1024);
2642
2643		if (fscanf(fp, "%p %c %s", &kern_sym_tbl[i].k_sym_addr, &inchr, tmpstr) != 3) {
2644			break;
2645		} else {
2646			len = strlen(tmpstr);
2647			kern_sym_tbl[i].k_sym_name = malloc(len + 1);
2648
2649			if (kern_sym_tbl[i].k_sym_name == NULL) {
2650				fprintf(stderr, "Can't allocate memory for symbol name [%s]\n", tmpstr);
2651				kern_sym_tbl[i].k_sym_name = NULL;
2652				len = 0;
2653			} else {
2654				strcpy(kern_sym_tbl[i].k_sym_name, tmpstr);
2655			}
2656
2657			kern_sym_tbl[i].k_sym_len = len;
2658		}
2659	}
2660	if (i != kern_sym_count) {
2661		/*
2662		 * Hmmm, didn't build up entire table from nm
2663		 * scrap the entire thing
2664		 */
2665		free(kern_sym_tbl);
2666		kern_sym_tbl = NULL;
2667		kern_sym_count = 0;
2668	}
2669	fclose(fp);
2670
2671	/*
2672	 * Remove the temporary nm file
2673	 */
2674	unlink(tmp_nm_file);
2675#if 0
2676	/*
2677	 * Dump the kernel symbol table
2678	 */
2679	for (i = 0; i < kern_sym_count; i++) {
2680		if (kern_sym_tbl[i].k_sym_name) {
2681			printf ("[%d] %-16p    %s\n", i,
2682				kern_sym_tbl[i].k_sym_addr, kern_sym_tbl[i].k_sym_name);
2683		} else {
2684			printf ("[%d] %-16p    %s\n", i,
2685				kern_sym_tbl[i].k_sym_addr, "No symbol name");
2686		}
2687	}
2688#endif
2689}
2690
2691void
2692pc_to_string(char *pcstring, uintptr_t pc, int max_len, int mode)
2693{
2694	int ret;
2695	int len;
2696
2697	if (mode == USER_MODE) {
2698		sprintf(pcstring, "%-16lx [usermode addr]", pc);
2699		return;
2700	}
2701	ret = binary_search(kern_sym_tbl, 0, kern_sym_count-1, pc);
2702
2703	if (ret == -1 || kern_sym_tbl[ret].k_sym_name == NULL) {
2704		sprintf(pcstring, "%-16lx", pc);
2705		return;
2706	}
2707	if ((len = kern_sym_tbl[ret].k_sym_len) > (max_len - 8)) {
2708		len = max_len - 8;
2709	}
2710
2711	memcpy(pcstring, kern_sym_tbl[ret].k_sym_name, len);
2712
2713	sprintf(&pcstring[len], "+0x%-5lx", pc - (uintptr_t)kern_sym_tbl[ret].k_sym_addr);
2714}
2715
2716
2717/*
2718 * Return -1 if not found, else return index
2719 */
2720int
2721binary_search(kern_sym_t *list, int low, int high, uintptr_t addr)
2722{
2723	int mid;
2724
2725	if (kern_sym_count == 0) {
2726		return -1;
2727	}
2728
2729	if (low > high) {
2730		return -1;   /* failed */
2731	}
2732
2733	if (low + 1 == high) {
2734		if ((uintptr_t)list[low].k_sym_addr <= addr && addr < (uintptr_t)list[high].k_sym_addr) {
2735			/*
2736			 * We have a range match
2737			 */
2738			return low;
2739		}
2740		if ((uintptr_t)list[high].k_sym_addr <= addr) {
2741			return high;
2742		}
2743		/*
2744		 * Failed
2745		 */
2746		return -1;
2747	}
2748	mid = (low + high) / 2;
2749
2750	if (addr < (uintptr_t)list[mid].k_sym_addr) {
2751		return binary_search(list, low, mid, addr);
2752	}
2753
2754	return binary_search(list, mid, high, addr);
2755}
2756
2757
2758void
2759open_logfile(const char *path)
2760{
2761	log_fp = fopen(path, "a");
2762
2763	if (!log_fp) {
2764		/*
2765		 * failed to open path
2766		 */
2767		fprintf(stderr, "latency: failed to open logfile [%s]\n", path);
2768		exit_usage();
2769	}
2770}
2771
2772
2773void
2774open_rawfile(const char *path)
2775{
2776	RAW_fd = open(path, O_RDONLY);
2777
2778	if (RAW_fd == -1) {
2779		/*
2780		 * failed to open path
2781		 */
2782		fprintf(stderr, "latency: failed to open RAWfile [%s]\n", path);
2783		exit_usage();
2784	}
2785}
2786
2787
2788void
2789getdivisor(void)
2790{
2791	mach_timebase_info_data_t info;
2792
2793	(void)mach_timebase_info(&info);
2794
2795	divisor = ((double)info.denom / (double)info.numer) * 1000;
2796}
2797