1#!/usr/bin/ksh 2# 3# dexplorer - DTrace system explorer, runs a collection of scripts. 4# Written using DTrace (Solaris 10 3/05). 5# 6# This program automatically runs a collection of DTrace scripts to examine 7# many areas of the system, and places the output in a meaningful directory 8# structure that is tar'd and gzip'd. 9# 10# $Id: dexplorer 3 2007-08-01 10:50:08Z brendan $ 11# 12# USAGE: dexplorer [-yDT] [-d outputdir] [-i interval] 13# 14# -q # quiet mode 15# -y # "yes", don't prompt for confirmation 16# -D # don't delete output dir 17# -T # don't create output tar.gz 18# -d outputdir # output directory 19# -i interval # interval for each sample 20# eg, 21# dexplorer # default is 5 second samples 22# dexplorer -y -i30 # no prompting, with 30 second samples 23# 24# SEE ALSO: DTraceToolkit 25# 26# THANKS: David Visser, et all. for the idea and encouragement. 27# 28# COPYRIGHT: Copyright (c) 2005 Brendan Gregg. 29# 30# CDDL HEADER START 31# 32# The contents of this file are subject to the terms of the 33# Common Development and Distribution License, Version 1.0 only 34# (the "License"). You may not use this file except in compliance 35# with the License. 36# 37# You can obtain a copy of the license at Docs/cddl1.txt 38# or http://www.opensolaris.org/os/licensing. 39# See the License for the specific language governing permissions 40# and limitations under the License. 41# 42# CDDL HEADER END 43# 44# CODE: 45# 46# This is currently a monolithic script, and while it contains only 47# a few dozen straigftforward DTrace scripts I think it's desirable to 48# keep it that way. The scripts themselves have designed to be very 49# generic (eg, switching on all sdt:::), and are aggregations to keep a 50# limit on the size of the output. 51# 52# Author: Brendan Gregg [Sydney, Australia] 53# 54# 23-Jun-2005 Brendan Gregg Created this. 55# 28-Jun-2005 " " Last update. 56 57# 58# Default variables 59# 60interval=5 # time of each sample 61verbose=1 # print screen output 62prompt=1 # prompt before run 63tar=1 # create tar file 64delete=1 # delete output dirs 65dtrace=/usr/sbin/dtrace # path to dtrace 66root=. # default output dir 67PATH=/usr/bin:/usr/sbin # safe path 68dir=de_`uname -n`_`date +%Y%m%d%H%M` # OUTPUT FILENAME 69samples=20 # max number of tests 70current=0 # current sample 71 72# 73# Process options 74# 75while getopts d:hi:qyDT name 76do 77 case $name in 78 d) root=$OPTARG ;; 79 i) interval=$OPTARG ;; 80 q) verbose=0 ;; 81 y) prompt=0 ;; 82 D) delete=0 ;; 83 T) tar=0 ;; 84 h|?) cat <<-END >&2 85 USAGE: dexplorer [-qyDT] [-d outputdir] [-i interval] 86 87 -q # quiet mode 88 -y # "yes", don't prompt for confirmation 89 -D # don't delete output dir 90 -T # don't create output tar.gz 91 -d outputdir # output directory 92 -i interval # interval for each sample 93 eg, 94 dexplorer # default is 5 second samples 95 dexplorer -y -i30 # no prompting, with 30 second samples 96 END 97 exit 1 98 esac 99done 100shift $(( OPTIND - 1 )) 101 102# 103# Confirm path 104# 105if [[ "$prompt" == "1" ]] ; then 106 if [[ "$root" == "." ]]; then 107 print "Output dir will be the current dir ($PWD)." 108 else 109 print "Output dir will be $root" 110 fi 111 print -n "Hit enter for yes, or type path: " 112 read ans junk 113 if [[ "$ans" == [yY] || "$ans" == [yY]es ]]; then 114 print "WARNING: I didn't ask for \"$ans\"!" 115 print "\tI was asking for the path or just enter." 116 print "\tignoring \"$ans\"..." 117 fi 118 if [[ "$ans" != "" ]]; then 119 root=$ans 120 print "Output is now $root." 121 fi 122fi 123 124# 125# Sanity checks 126# 127if [[ "$interval" == *[a-zA-Z]* ]]; then 128 print "ERROR2: Invalid interval $interval.\n" 129 print "Please use a number of seconds." 130 exit 2 131fi 132if (( ${#interval} < 1 )); then 133 print "ERROR3: Length of interval $interval too short.\n" 134 print "Minimum 1 second." 135 exit 3 136fi 137if [[ ! -d "$root" ]]; then 138 print "ERROR4: Output directory \"$root\" does not exist.\n" 139 print "Perhaps try a mkdir first?" 140 print "or use an existing dir, eg \"/tmp\"" 141 exit 4 142fi 143if [[ ! -w "$root" ]]; then 144 print "ERROR5: Can't write to output directory \"$root\".\n" 145 print "Are you logged in as root?" 146 print "Perhaps try another directory, eg \"/tmp\"" 147 exit 5 148fi 149if [[ `$dtrace -b1k -qn 'BEGIN { trace(pid); exit(0); }'` == "" ]]; then 150 print "ERROR6: Unable to run dtrace!\n" 151 print "Perhaps this is a permission problem? Try running as root." 152 exit 6 153fi 154 155# calculate total time 156(( total = interval * samples )) 157if (( total > 180 )); then 158 (( total = total / 60 )) 159 total="$total minutes" 160else 161 total="$total seconds" 162fi 163 164# 165# Common Functions 166# 167function decho { 168 if (( verbose )); then print "$*"; fi 169} 170clean="sed /^\$/d" 171header='dtrace:::BEGIN { 172 printf("%Y, ", walltimestamp); 173 printf("%s %s %s %s %s, ", `utsname.sysname, `utsname.nodename, 174 `utsname.release, `utsname.version, `utsname.machine); 175 printf("%d secs\n",'$interval'); 176 } 177 profile:::tick-'$interval'sec { exit(0); } 178 ' 179function dstatus { 180 if (( verbose )); then 181 (( percent = current * 100 / samples )) 182 printf "%3d%% $*\n" $percent 183 (( current = current + 1 )) 184 fi 185} 186 187######################################## 188# START # 189######################################## 190 191# 192# Make dirs 193# 194err=0 195cd $root 196(( err = err + $? )) 197mkdir $dir 198(( err = err + $? )) 199cd $dir 200(( err = err + $? )) 201base1=${PWD##*/} 202base2=${dir##*/} 203if [[ "$base1" != "$base2" || "$err" != "0" ]]; then 204 print "ERROR7: tried to mkdir $dir from $root, but something failed.\n" 205 print "Check directories before rerunning." 206 exit 7 207fi 208mkdir Cpu 209mkdir Disk 210mkdir Mem 211mkdir Net 212mkdir Proc 213mkdir Info 214 215# 216# Create Log 217# 218decho "Starting dexplorer ver 0.76." 219decho "Sample interval is $interval seconds. Total run is > $total." 220( print "dexplorer ver 0.76\n------------------" 221print -n "System: " 222uname -a 223print -n "Start: " 224date ) > log 225 226# 227# Capture Standard Info 228# 229args='pid,ppid,uid,gid,projid,zoneid,pset,pri,nice,' 230args=$args'class,vsz,rss,time,pcpu,pmem,args' 231uname -a > Info/uname-a # System 232psrinfo -v > Info/psrinfo-v # CPU 233prtconf > Info/prtconf # Memory (+ devices) 234df -k > Info/df-k # Disk 235ifconfig -a > Info/ifconfig-a # Network 236ps -eo $args > Info/ps-o # Processes 237uptime > Info/uptime # Load 238 239# 240# Cpu Tests, DTrace 241# 242 243dstatus "Interrupts by CPU..." 244$dtrace -qn "$header"' 245 sdt:::interrupt-start { @num[cpu] = count(); } 246 dtrace:::END 247 { 248 printf("%-16s %16s\n", "CPU", "INTERRUPTS"); 249 printa("%-16d %@16d\n", @num); 250 } 251' | $clean > Cpu/interrupt_by_cpu 252 253dstatus "Interrupt times..." 254$dtrace -qn "$header"' 255 sdt:::interrupt-start { self->ts = vtimestamp; } 256 sdt:::interrupt-complete 257 /self->ts && arg0 != 0/ 258 { 259 this->devi = (struct dev_info *)arg0; 260 self->name = this->devi != 0 ? 261 stringof(`devnamesp[this->devi->devi_major].dn_name) : "?"; 262 this->inst = this->devi != 0 ? this->devi->devi_instance : 0; 263 @num[self->name, this->inst] = sum(vtimestamp - self->ts); 264 self->name = 0; 265 } 266 sdt:::interrupt-complete { self->ts = 0; } 267 dtrace:::END 268 { 269 printf("%11s %16s\n", "DEVICE", "TIME (ns)"); 270 printa("%10s%-3d %@16d\n", @num); 271 } 272' | $clean > Cpu/interrupt_time 273 274dstatus "Dispatcher queue length by CPU..." 275$dtrace -qn "$header"' 276 profile:::profile-1000 277 { 278 this->num = curthread->t_cpu->cpu_disp->disp_nrunnable; 279 @length[cpu] = lquantize(this->num, 0, 100, 1); 280 } 281 dtrace:::END { printa(" CPU %d%@d\n", @length); } 282' | $clean > Cpu/dispqlen_by_cpu 283 284dstatus "Sdt counts..." 285$dtrace -qn "$header"' 286 sdt:::{ @num[probefunc, probename] = count(); } 287 dtrace:::END 288 { 289 printf("%-32s %-32s %10s\n", "FUNC", "NAME", "COUNT"); 290 printa("%-32s %-32s %@10d\n", @num); 291 } 292' | $clean > Cpu/sdt_count 293 294# 295# Disk Tests, DTrace 296# 297 298dstatus "Pages paged in by process..." 299$dtrace -qn "$header"' 300 vminfo:::pgpgin { @pg[pid, execname] = sum(arg0); } 301 dtrace:::END 302 { 303 printf("%6s %-16s %16s\n", "PID", "CMD", "PAGES"); 304 printa("%6d %-16s %@16d\n", @pg); 305 } 306' | $clean > Disk/pgpgin_by_process 307 308dstatus "Files opened successfully count..." 309$dtrace -qn "$header"' 310 syscall::open*:entry { self->file = copyinstr(arg0); self->ok = 1; } 311 syscall::open*:return /self->ok && arg0 != -1/ 312 { 313 @num[self->file] = count(); 314 } 315 syscall::open*:return /self->ok/ { self->file = 0; self->ok = 0; } 316 dtrace:::END 317 { 318 printf("%-64s %8s\n", "FILE", "COUNT"); 319 printa("%-64s %@8d\n", @num); 320 } 321' | $clean > Disk/fileopen_count 322 323dstatus "Disk I/O size distribution by process..." 324$dtrace -qn "$header"' 325 io:::start { @size[pid, execname] = quantize(args[0]->b_bcount); } 326' | $clean > Disk/sizedist_by_process 327 328# 329# Mem Tests, DTrace 330# 331 332dstatus "Minor faults by process..." 333$dtrace -qn "$header"' 334 vminfo:::as_fault { @mem[pid, execname] = sum(arg0); } 335 dtrace:::END 336 { 337 printf("%6s %-16s %16s\n", "PID", "CMD", "MINFAULTS"); 338 printa("%6d %-16s %@16d\n", @mem); 339 } 340' | $clean > Mem/minf_by_process 341 342 343dstatus "Vminfo data by process..." 344$dtrace -qn "$header"' 345 vminfo::: { @data[pid, execname, probename] = sum(arg0); } 346 dtrace:::END 347 { 348 printf("%6s %-16s %-16s %16s\n", 349 "PID", "CMD", "STATISTIC", "VALUE"); 350 printa("%6d %-16s %-16s %@16d\n", @data); 351 } 352' | $clean > Mem/vminfo_by_process 353 354# 355# Net Tests, DTrace 356# 357 358dstatus "Mib data by mib statistic..." 359$dtrace -qn "$header"' 360 mib::: { @data[probename] = sum(arg0); } 361 dtrace:::END 362 { 363 printf("%-32s %16s\n", "STATISTIC", "VALUE"); 364 printa("%-32s %@16d\n", @data); 365 } 366' | $clean > Net/mib_data 367 368dstatus "TCP write bytes by process..." 369$dtrace -qn "$header"' 370 fbt:ip:tcp_output:entry 371 { 372 this->size = msgdsize(args[1]); 373 @size[pid, execname] = sum(this->size); 374 } 375 dtrace:::END 376 { 377 printf("%6s %-16s %12s\n", "PID", "CMD", "BYTES"); 378 printa("%6d %-16s %@12d\n", @size); 379 } 380' | $clean > Net/tcpw_by_process 381 382# 383# Proc Tests, DTrace 384# 385 386dstatus "Sample process @ 1000 Hz..." 387$dtrace -qn "$header"' 388 profile:::profile-1000 389 { 390 @num[pid, curpsinfo->pr_psargs] = count(); 391 } 392 dtrace:::END 393 { 394 printf("%6s %12s %s\n", "PID", "SAMPLES", "ARGS"); 395 printa("%6d %@12d %S\n", @num); 396 } 397' | $clean > Proc/sample_process 398 399dstatus "Syscall count by process..." 400$dtrace -qn "$header"' 401 syscall:::entry { @num[pid, execname, probefunc] = count(); } 402 dtrace:::END 403 { 404 printf("%6s %-24s %-24s %8s\n", 405 "PID", "CMD", "SYSCALL", "COUNT"); 406 printa("%6d %-24s %-24s %@8d\n", @num); 407 } 408' | $clean > Proc/syscall_by_process 409 410dstatus "Syscall count by syscall..." 411$dtrace -qn "$header"' 412 syscall:::entry { @num[probefunc] = count(); } 413 dtrace:::END 414 { 415 printf("%-32s %16s\n", "SYSCALL", "COUNT"); 416 printa("%-32s %@16d\n", @num); 417 } 418' | $clean > Proc/syscall_count 419 420dstatus "Read bytes by process..." 421$dtrace -qn "$header"' 422 sysinfo:::readch { @bytes[pid, execname] = sum(arg0); } 423 dtrace:::END 424 { 425 printf("%6s %-16s %16s\n", "PID", "CMD", "BYTES"); 426 printa("%6d %-16s %@16d\n", @bytes); 427 } 428' | $clean > Proc/readb_by_process 429 430dstatus "Write bytes by process..." 431$dtrace -qn "$header"' 432 sysinfo:::writech { @bytes[pid, execname] = sum(arg0); } 433 dtrace:::END 434 { 435 printf("%6s %-16s %16s\n", "PID", "CMD", "BYTES"); 436 printa("%6d %-16s %@16d\n", @bytes); 437 } 438' | $clean > Proc/writeb_by_process 439 440dstatus "Sysinfo counts by process..." 441$dtrace -qn "$header"' 442 sysinfo::: { @num[pid, execname, probename] = sum(arg0); } 443 dtrace:::END 444 { 445 printf("%6s %-16s %-16s %16s\n", 446 "PID", "CMD", "STATISTIC", "COUNT"); 447 printa("%6d %-16s %-16s %@16d\n", @num); 448 } 449' | $clean > Proc/sysinfo_by_process 450 451dstatus "New process counts with arguments..." 452$dtrace -qn "$header"' 453 proc:::exec-success 454 { 455 @num[pid, ppid, curpsinfo->pr_psargs] = count(); 456 } 457 dtrace:::END 458 { 459 printf("%6s %6s %8s %s\n", "PID", "PPID", "COUNT", "ARGS"); 460 printa("%6d %6d %@8d %S\n", @num); 461 } 462' | $clean > Proc/newprocess_count 463 464dstatus "Signal counts..." 465$dtrace -qn "$header"' 466 proc:::signal-send { 467 @num[execname,args[2],stringof(args[1]->pr_fname)] = count(); 468 } 469 dtrace:::END 470 { 471 printf("%-16s %-8s %-16s %8s\n", 472 "FROM", "SIG", "TO", "COUNT"); 473 printa("%-16s %-8d %-16s %@8d\n", @num); 474 } 475' | $clean > Proc/signal_count 476 477dstatus "Syscall error counts..." 478$dtrace -qn "$header"' 479 syscall:::return /(int)arg0 == -1/ 480 { 481 @num[pid, execname, probefunc, errno] = count(); 482 } 483 dtrace:::END 484 { 485 printf("%6s %-16s %-32s %-6s %8s\n", 486 "PID", "CMD", "SYSCALL", "ERRNO", "COUNT"); 487 printa("%6d %-16s %-32s %-6d %@8d\n", @num); 488 } 489' | $clean > Proc/syscall_errors 490 491 492########### 493# Done 494# 495( print -n "End: " 496date ) >> log 497decho "100% Done." 498if (( tar )); then 499 cd .. 500 tar cf $dir.tar $dir 501 gzip $dir.tar 502 decho "File is $dir.tar.gz" 503fi 504if (( delete && tar )); then 505 cd $dir 506 # this could be all an "rm -r $dir", but since it will be run 507 # as root on production servers - lets be analy cautious, 508 rm Cpu/interrupt_by_cpu 509 rm Cpu/interrupt_time 510 rm Cpu/dispqlen_by_cpu 511 rm Cpu/sdt_count 512 rm Disk/pgpgin_by_process 513 rm Disk/fileopen_count 514 rm Disk/sizedist_by_process 515 rm Mem/minf_by_process 516 rm Mem/vminfo_by_process 517 rm Net/mib_data 518 rm Net/tcpw_by_process 519 rm Proc/sample_process 520 rm Proc/syscall_by_process 521 rm Proc/syscall_count 522 rm Proc/readb_by_process 523 rm Proc/writeb_by_process 524 rm Proc/sysinfo_by_process 525 rm Proc/newprocess_count 526 rm Proc/signal_count 527 rm Proc/syscall_errors 528 rmdir Cpu 529 rmdir Disk 530 rmdir Mem 531 rmdir Net 532 rmdir Proc 533 rm Info/uname-a 534 rm Info/psrinfo-v 535 rm Info/prtconf 536 rm Info/df-k 537 rm Info/ifconfig-a 538 rm Info/ps-o 539 rm Info/uptime 540 rmdir Info 541 rm log 542 cd .. 543 rmdir $dir 544else 545 decho "Directory is $dir" 546fi 547 548