1#!/usr/bin/ksh 2# 3# dexplorer - DTrace system explorer, runs a collection of scripts. 4# Written using DTrace (Solaris 10 3/05). 5# 6# This program automatically runs a collection of DTrace scripts to examine 7# many areas of the system, and places the output in a meaningful directory 8# structure that is tar'd and gzip'd. 9# 10# 28-Jun-2005, ver 0.76 (check for newer versions) 11# 12# USAGE: dexplorer [-yDT] [-d outputdir] [-i interval] 13# 14# -q # quiet mode 15# -y # "yes", don't prompt for confirmation 16# -D # don't delete output dir 17# -T # don't create output tar.gz 18# -d outputdir # output directory 19# -i interval # interval for each sample 20# eg, 21# dexplorer # default is 5 second samples 22# dexplorer -y -i30 # no prompting, with 30 second samples 23# 24# SEE ALSO: DTraceToolkit 25# 26# THANKS: David Visser, et all. for the idea and encouragement. 27# 28# COPYRIGHT: Copyright (c) 2005 Brendan Gregg. 29# 30# CDDL HEADER START 31# 32# The contents of this file are subject to the terms of the 33# Common Development and Distribution License, Version 1.0 only 34# (the "License"). You may not use this file except in compliance 35# with the License. 36# 37# You can obtain a copy of the license at Docs/cddl1.txt 38# or http://www.opensolaris.org/os/licensing. 39# See the License for the specific language governing permissions 40# and limitations under the License. 41# 42# CDDL HEADER END 43# 44# CODE: 45# 46# This is currently a monolithic script, and while it contains only 47# a few dozen straigftforward DTrace scripts I think it's desirable to 48# keep it that way. The scripts themselves have designed to be very 49# generic (eg, switching on all sdt:::), and are aggregations to keep a 50# limit on the size of the output. 51# 52# Author: Brendan Gregg [Sydney, Australia] 53# 54# 23-Jun-2005 Brendan Gregg Created this. 55 56# 57# Default variables 58# 59interval=5 # time of each sample 60verbose=1 # print screen output 61prompt=1 # prompt before run 62tar=1 # create tar file 63delete=1 # delete output dirs 64dtrace=/usr/sbin/dtrace # path to dtrace 65root=. # default output dir 66PATH=/usr/bin:/usr/sbin # safe path 67dir=de_`uname -n`_`date +%Y%m%d%H%M` # OUTPUT FILENAME 68samples=20 # max number of tests 69current=0 # current sample 70 71# 72# Process options 73# 74while getopts d:hi:qyDT name 75do 76 case $name in 77 d) root=$OPTARG ;; 78 i) interval=$OPTARG ;; 79 q) verbose=0 ;; 80 y) prompt=0 ;; 81 D) delete=0 ;; 82 T) tar=0 ;; 83 h|?) cat <<-END >&2 84 USAGE: dexplorer [-qyDT] [-d outputdir] [-i interval] 85 86 -q # quiet mode 87 -y # "yes", don't prompt for confirmation 88 -D # don't delete output dir 89 -T # don't create output tar.gz 90 -d outputdir # output directory 91 -i interval # interval for each sample 92 eg, 93 dexplorer # default is 5 second samples 94 dexplorer -y -i30 # no prompting, with 30 second samples 95 END 96 exit 1 97 esac 98done 99shift $(( OPTIND - 1 )) 100 101# 102# Confirm path 103# 104if [[ "$prompt" == "1" ]] ; then 105 if [[ "$root" == "." ]]; then 106 print "Output dir will be the current dir ($PWD)." 107 else 108 print "Output dir will be $root" 109 fi 110 print -n "Hit enter for yes, or type path: " 111 read ans junk 112 if [[ "$ans" == [yY] || "$ans" == [yY]es ]]; then 113 print "WARNING: I didn't ask for \"$ans\"!" 114 print "\tI was asking for the path or just enter." 115 print "\tignoring \"$ans\"..." 116 fi 117 if [[ "$ans" != "" ]]; then 118 root=$ans 119 print "Output is now $root." 120 fi 121fi 122 123# 124# Sanity checks 125# 126if [[ "$interval" == *[a-zA-Z]* ]]; then 127 print "ERROR2: Invalid interval $interval.\n" 128 print "Please use a number of seconds." 129 exit 2 130fi 131if (( ${#interval} < 1 )); then 132 print "ERROR3: Length of interval $interval too short.\n" 133 print "Minimum 1 second." 134 exit 3 135fi 136if [[ ! -d "$root" ]]; then 137 print "ERROR4: Output directory \"$root\" does not exist.\n" 138 print "Perhaps try a mkdir first?" 139 print "or use an existing dir, eg \"/tmp\"" 140 exit 4 141fi 142if [[ ! -w "$root" ]]; then 143 print "ERROR5: Can't write to output directory \"$root\".\n" 144 print "Are you logged in as root?" 145 print "Perhaps try another directory, eg \"/tmp\"" 146 exit 5 147fi 148if [[ `$dtrace -b1k -qn 'BEGIN { trace(pid); exit(0); }'` == "" ]]; then 149 print "ERROR6: Unable to run dtrace!\n" 150 print "Perhaps this is a permission problem? Try running as root." 151 exit 6 152fi 153 154# calculate total time 155(( total = interval * samples )) 156if (( total > 180 )); then 157 (( total = total / 60 )) 158 total="$total minutes" 159else 160 total="$total seconds" 161fi 162 163# 164# Common Functions 165# 166function decho { 167 if (( verbose )); then print "$*"; fi 168} 169clean="sed /^\$/d" 170header='dtrace:::BEGIN { 171 printf("%Y, ", walltimestamp); 172 printf("%s %s %s %s %s, ", `utsname.sysname, `utsname.nodename, 173 `utsname.release, `utsname.version, `utsname.machine); 174 printf("%d secs\n",'$interval'); 175 } 176 profile:::tick-'$interval'sec { exit(0); } 177 ' 178function dstatus { 179 if (( verbose )); then 180 (( percent = current * 100 / samples )) 181 printf "%3d%% $*\n" $percent 182 (( current = current + 1 )) 183 fi 184} 185 186######################################## 187# START # 188######################################## 189 190# 191# Make dirs 192# 193err=0 194cd $root 195(( err = err + $? )) 196mkdir $dir 197(( err = err + $? )) 198cd $dir 199(( err = err + $? )) 200base1=${PWD##*/} 201base2=${dir##*/} 202if [[ "$base1" != "$base2" || "$err" != "0" ]]; then 203 print "ERROR7: tried to mkdir $dir from $root, but something failed.\n" 204 print "Check directories before rerunning." 205 exit 7 206fi 207mkdir Cpu 208mkdir Disk 209mkdir Mem 210mkdir Net 211mkdir Proc 212mkdir Info 213 214# 215# Create Log 216# 217decho "Starting dexplorer ver 0.76." 218decho "Sample interval is $interval seconds. Total run is > $total." 219( print "dexplorer ver 0.76\n------------------" 220print -n "System: " 221uname -a 222print -n "Start: " 223date ) > log 224 225# 226# Capture Standard Info 227# 228args='pid,ppid,uid,gid,projid,zoneid,pset,pri,nice,' 229args=$args'class,vsz,rss,time,pcpu,pmem,args' 230uname -a > Info/uname-a # System 231psrinfo -v > Info/psrinfo-v # CPU 232prtconf > Info/prtconf # Memory (+ devices) 233df -k > Info/df-k # Disk 234ifconfig -a > Info/ifconfig-a # Network 235ps -eo $args > Info/ps-o # Processes 236uptime > Info/uptime # Load 237 238# 239# Cpu Tests, DTrace 240# 241 242dstatus "Interrupts by CPU..." 243$dtrace -qn "$header"' 244 sdt:::interrupt-start { @num[cpu] = count(); } 245 dtrace:::END 246 { 247 printf("%-16s %16s\n", "CPU", "INTERRUPTS"); 248 printa("%-16d %@16d\n", @num); 249 } 250' | $clean > Cpu/interrupt_by_cpu 251 252dstatus "Interrupt times..." 253$dtrace -qn "$header"' 254 sdt:::interrupt-start { self->ts = vtimestamp; } 255 sdt:::interrupt-complete 256 /self->ts && arg0 != 0/ 257 { 258 this->devi = (struct dev_info *)arg0; 259 self->name = this->devi != 0 ? 260 stringof(`devnamesp[this->devi->devi_major].dn_name) : "?"; 261 this->inst = this->devi != 0 ? this->devi->devi_instance : 0; 262 @num[self->name, this->inst] = sum(vtimestamp - self->ts); 263 self->name = 0; 264 } 265 sdt:::interrupt-complete { self->ts = 0; } 266 dtrace:::END 267 { 268 printf("%11s %16s\n", "DEVICE", "TIME (ns)"); 269 printa("%10s%-3d %@16d\n", @num); 270 } 271' | $clean > Cpu/interrupt_time 272 273dstatus "Dispatcher queue length by CPU..." 274$dtrace -qn "$header"' 275 profile:::profile-1000 276 { 277 this->num = curthread->t_cpu->cpu_disp->disp_nrunnable; 278 @length[cpu] = lquantize(this->num, 0, 100, 1); 279 } 280 dtrace:::END { printa(" CPU %d%@d\n", @length); } 281' | $clean > Cpu/dispqlen_by_cpu 282 283dstatus "Sdt counts..." 284$dtrace -qn "$header"' 285 sdt:::{ @num[probefunc, probename] = count(); } 286 dtrace:::END 287 { 288 printf("%-32s %-32s %10s\n", "FUNC", "NAME", "COUNT"); 289 printa("%-32s %-32s %@10d\n", @num); 290 } 291' | $clean > Cpu/sdt_count 292 293# 294# Disk Tests, DTrace 295# 296 297dstatus "Pages paged in by process..." 298$dtrace -qn "$header"' 299 vminfo:::pgpgin { @pg[pid, execname] = sum(arg0); } 300 dtrace:::END 301 { 302 printf("%6s %-16s %16s\n", "PID", "CMD", "PAGES"); 303 printa("%6d %-16s %@16d\n", @pg); 304 } 305' | $clean > Disk/pgpgin_by_process 306 307dstatus "Files opened successfully count..." 308$dtrace -qn "$header"' 309 syscall::open*:entry { self->file = copyinstr(arg0); self->ok = 1; } 310 syscall::open*:return /self->ok && arg0 != -1/ 311 { 312 @num[self->file] = count(); 313 } 314 syscall::open*:return /self->ok/ { self->file = 0; self->ok = 0; } 315 dtrace:::END 316 { 317 printf("%-64s %8s\n", "FILE", "COUNT"); 318 printa("%-64s %@8d\n", @num); 319 } 320' | $clean > Disk/fileopen_count 321 322dstatus "Disk I/O size distribution by process..." 323$dtrace -qn "$header"' 324 io:::start { @size[pid, execname] = quantize(args[0]->b_bcount); } 325' | $clean > Disk/sizedist_by_process 326 327# 328# Mem Tests, DTrace 329# 330 331dstatus "Minor faults by process..." 332$dtrace -qn "$header"' 333 vminfo:::as_fault { @mem[pid, execname] = sum(arg0); } 334 dtrace:::END 335 { 336 printf("%6s %-16s %16s\n", "PID", "CMD", "MINFAULTS"); 337 printa("%6d %-16s %@16d\n", @mem); 338 } 339' | $clean > Mem/minf_by_process 340 341 342dstatus "Vminfo data by process..." 343$dtrace -qn "$header"' 344 vminfo::: { @data[pid, execname, probename] = sum(arg0); } 345 dtrace:::END 346 { 347 printf("%6s %-16s %-16s %16s\n", 348 "PID", "CMD", "STATISTIC", "VALUE"); 349 printa("%6d %-16s %-16s %@16d\n", @data); 350 } 351' | $clean > Mem/vminfo_by_process 352 353# 354# Net Tests, DTrace 355# 356 357dstatus "Mib data by mib statistic..." 358$dtrace -qn "$header"' 359 mib::: { @data[probename] = sum(arg0); } 360 dtrace:::END 361 { 362 printf("%-32s %16s\n", "STATISTIC", "VALUE"); 363 printa("%-32s %@16d\n", @data); 364 } 365' | $clean > Net/mib_data 366 367dstatus "TCP write bytes by process..." 368$dtrace -qn "$header"' 369 fbt:ip:tcp_output:entry 370 { 371 this->size = msgdsize(args[1]); 372 @size[pid, execname] = sum(this->size); 373 } 374 dtrace:::END 375 { 376 printf("%6s %-16s %12s\n", "PID", "CMD", "BYTES"); 377 printa("%6d %-16s %@12d\n", @size); 378 } 379' | $clean > Net/tcpw_by_process 380 381# 382# Proc Tests, DTrace 383# 384 385dstatus "Sample process @ 1000 Hz..." 386$dtrace -qn "$header"' 387 profile:::profile-1000 388 { 389 @num[pid, curpsinfo->pr_psargs] = count(); 390 } 391 dtrace:::END 392 { 393 printf("%6s %12s %s\n", "PID", "SAMPLES", "ARGS"); 394 printa("%6d %@12d %S\n", @num); 395 } 396' | $clean > Proc/sample_process 397 398dstatus "Syscall count by process..." 399$dtrace -qn "$header"' 400 syscall:::entry { @num[pid, execname, probefunc] = count(); } 401 dtrace:::END 402 { 403 printf("%6s %-24s %-24s %8s\n", 404 "PID", "CMD", "SYSCALL", "COUNT"); 405 printa("%6d %-24s %-24s %@8d\n", @num); 406 } 407' | $clean > Proc/syscall_by_process 408 409dstatus "Syscall count by syscall..." 410$dtrace -qn "$header"' 411 syscall:::entry { @num[probefunc] = count(); } 412 dtrace:::END 413 { 414 printf("%-32s %16s\n", "SYSCALL", "COUNT"); 415 printa("%-32s %@16d\n", @num); 416 } 417' | $clean > Proc/syscall_count 418 419dstatus "Read bytes by process..." 420$dtrace -qn "$header"' 421 sysinfo:::readch { @bytes[pid, execname] = sum(arg0); } 422 dtrace:::END 423 { 424 printf("%6s %-16s %16s\n", "PID", "CMD", "BYTES"); 425 printa("%6d %-16s %@16d\n", @bytes); 426 } 427' | $clean > Proc/readb_by_process 428 429dstatus "Write bytes by process..." 430$dtrace -qn "$header"' 431 sysinfo:::writech { @bytes[pid, execname] = sum(arg0); } 432 dtrace:::END 433 { 434 printf("%6s %-16s %16s\n", "PID", "CMD", "BYTES"); 435 printa("%6d %-16s %@16d\n", @bytes); 436 } 437' | $clean > Proc/writeb_by_process 438 439dstatus "Sysinfo counts by process..." 440$dtrace -qn "$header"' 441 sysinfo::: { @num[pid, execname, probename] = sum(arg0); } 442 dtrace:::END 443 { 444 printf("%6s %-16s %-16s %16s\n", 445 "PID", "CMD", "STATISTIC", "COUNT"); 446 printa("%6d %-16s %-16s %@16d\n", @num); 447 } 448' | $clean > Proc/sysinfo_by_process 449 450dstatus "New process counts with arguments..." 451$dtrace -qn "$header"' 452 proc:::exec-success 453 { 454 @num[pid, ppid, curpsinfo->pr_psargs] = count(); 455 } 456 dtrace:::END 457 { 458 printf("%6s %6s %8s %s\n", "PID", "PPID", "COUNT", "ARGS"); 459 printa("%6d %6d %@8d %S\n", @num); 460 } 461' | $clean > Proc/newprocess_count 462 463dstatus "Signal counts..." 464$dtrace -qn "$header"' 465 proc:::signal-send { 466 @num[execname,args[2],stringof(args[1]->pr_fname)] = count(); 467 } 468 dtrace:::END 469 { 470 printf("%-16s %-8s %-16s %8s\n", 471 "FROM", "SIG", "TO", "COUNT"); 472 printa("%-16s %-8d %-16s %@8d\n", @num); 473 } 474' | $clean > Proc/signal_count 475 476dstatus "Syscall error counts..." 477$dtrace -qn "$header"' 478 syscall:::return /(int)arg0 == -1/ 479 { 480 @num[pid, execname, probefunc, errno] = count(); 481 } 482 dtrace:::END 483 { 484 printf("%6s %-16s %-32s %-6s %8s\n", 485 "PID", "CMD", "SYSCALL", "ERRNO", "COUNT"); 486 printa("%6d %-16s %-32s %-6d %@8d\n", @num); 487 } 488' | $clean > Proc/syscall_errors 489 490 491########### 492# Done 493# 494( print -n "End: " 495date ) >> log 496decho "100% Done." 497if (( tar )); then 498 cd .. 499 tar cf $dir.tar $dir 500 gzip $dir.tar 501 decho "File is $dir.tar.gz" 502fi 503if (( delete && tar )); then 504 cd $dir 505 # this could be all an "rm -r $dir", but since it will be run 506 # as root on production servers - lets be analy cautious, 507 rm Cpu/interrupt_by_cpu 508 rm Cpu/interrupt_time 509 rm Cpu/dispqlen_by_cpu 510 rm Cpu/sdt_count 511 rm Disk/pgpgin_by_process 512 rm Disk/fileopen_count 513 rm Disk/sizedist_by_process 514 rm Mem/minf_by_process 515 rm Mem/vminfo_by_process 516 rm Net/mib_data 517 rm Net/tcpw_by_process 518 rm Proc/sample_process 519 rm Proc/syscall_by_process 520 rm Proc/syscall_count 521 rm Proc/readb_by_process 522 rm Proc/writeb_by_process 523 rm Proc/sysinfo_by_process 524 rm Proc/newprocess_count 525 rm Proc/signal_count 526 rm Proc/syscall_errors 527 rmdir Cpu 528 rmdir Disk 529 rmdir Mem 530 rmdir Net 531 rmdir Proc 532 rm Info/uname-a 533 rm Info/psrinfo-v 534 rm Info/prtconf 535 rm Info/df-k 536 rm Info/ifconfig-a 537 rm Info/ps-o 538 rm Info/uptime 539 rmdir Info 540 rm log 541 cd .. 542 rmdir $dir 543else 544 decho "Directory is $dir" 545fi 546 547