1#!/bin/ksh 2##!/usr/bin/ksh 3# 4# iotop - display top disk I/O events by process. 5# Written using DTrace (Solaris 10 3/05). 6# 7# This is measuring disk events that have made it past system caches. 8# 9# 20-Apr-2006, ver 0.76 10# 11# USAGE: iotop [-C] [-D|-o|-P] [-j|-Z] [-d device] [-f filename] 12# [-m mount_point] [-t top] [interval [count]] 13# 14# iotop # default output, 5 second intervals 15# 16# -C # don't clear the screen 17# -D # print delta times, elapsed, us 18# -j # print project ID 19# -o # print disk delta times, us 20# -P # print %I/O (disk delta times) 21# -Z # print zone ID 22# -d device # instance name to snoop (eg, dad0) 23# -f filename # full pathname of file to snoop 24# -m mount_point # this FS only (will skip raw events) 25# -t top # print top number only 26# eg, 27# iotop 1 # 1 second samples 28# iotop -C # don't clear the screen 29# iotop -P # print %I/O (time based) 30# iotop -j # print project IDs 31# iotop -Z # print zone IDs 32# iotop -t 20 # print top 20 lines only 33# iotop -C 5 12 # print 12 x 5 second samples 34# 35# FIELDS: 36# UID user ID 37# PID process ID 38# PPID parent process ID 39# PROJ project ID 40# ZONE zone ID 41# CMD process command name 42# DEVICE device name 43# MAJ device major number 44# MIN device minor number 45# D direction, Read or Write 46# BYTES total size of operations, bytes 47# ELAPSED total elapsed from request to completion, us 48# DISKTIME total time for disk to complete request, us 49# %I/O percent disk I/O, based on time (DISKTIME) 50# load 1 min load average 51# disk_r total disk read Kbytes for sample 52# disk_w total disk write Kbytes for sample 53# 54# NOTE: 55# * There are two different delta times reported. -D prints the 56# elapsed time from the disk request (strategy) to the disk completion 57# (iodone); -o prints the time for the disk to complete that event 58# since it's last event (time between iodones), or, the time to the 59# strategy if the disk had been idle. 60# * The %I/O value can exceed 100%. It represents how busy a process is 61# making the disks, in terms of a single disk. A value of 200% could 62# mean 2 disks are busy at 100%, or 4 disks at 50%... 63# 64# SEE ALSO: iosnoop 65# BigAdmin: DTrace, http://www.sun.com/bigadmin/content/dtrace 66# Solaris Dynamic Tracing Guide, http://docs.sun.com 67# DTrace Tools, http://www.brendangregg.com/dtrace.html 68# 69# INSPIRATION: top(1) by William LeFebvre 70# 71# COPYRIGHT: Copyright (c) 2005, 2006 Brendan Gregg. 72# 73# CDDL HEADER START 74# 75# The contents of this file are subject to the terms of the 76# Common Development and Distribution License, Version 1.0 only 77# (the "License"). You may not use this file except in compliance 78# with the License. 79# 80# You can obtain a copy of the license at Docs/cddl1.txt 81# or http://www.opensolaris.org/os/licensing. 82# See the License for the specific language governing permissions 83# and limitations under the License. 84# 85# CDDL HEADER END 86# 87# KNOWN BUGS: 88# - This can print errors while running on servers with Veritas volumes. 89# 90# Author: Brendan Gregg [Sydney, Australia] 91# 92# 15-Jul-2005 Brendan Gregg Created this. 93# 94 95 96############################## 97# --- Process Arguments --- 98# 99 100### default variables 101opt_device=0; opt_file=0; opt_mount=0; opt_clear=1; opt_proj=0; opt_zone=0 102opt_percent=0; opt_def=1; opt_bytes=1; filter=0; device=.; filename=.; mount=. 103opt_top=0; opt_elapsed=0; opt_dtime=0; interval=5; count=-1; top=0 104 105### process options 106while getopts CDd:f:hjm:oPt:Z name 107do 108 case $name in 109 C) opt_clear=0 ;; 110 D) opt_elapsed=1; opt_bytes=0 ;; 111 d) opt_device=1; device=$OPTARG ;; 112 f) opt_file=1; filename=$OPTARG ;; 113 j) opt_proj=1; opt_def=0 ;; 114 m) opt_mount=1; mount=$OPTARG ;; 115 o) opt_dtime=1; opt_bytes=0 ;; 116 P) opt_percent=1; opt_dtime=1; opt_bytes=0 ;; 117 t) opt_top=1; top=$OPTARG ;; 118 Z) opt_zone=1; opt_def=0 ;; 119 h|?) cat <<-END >&2 120 USAGE: iotop [-C] [-D|-o|-P] [-j|-Z] [-d device] [-f filename] 121 [-m mount_point] [-t top] [interval [count]] 122 123 -C # don't clear the screen 124 -D # print delta times, elapsed, us 125 -j # print project ID 126 -o # print disk delta times, us 127 -P # print %I/O (disk delta times) 128 -Z # print zone ID 129 -d device # instance name to snoop 130 -f filename # snoop this file only 131 -m mount_point # this FS only 132 -t top # print top number only 133 eg, 134 iotop # default output, 5 second samples 135 iotop 1 # 1 second samples 136 iotop -P # print %I/O (time based) 137 iotop -m / # snoop events on filesystem / only 138 iotop -t 20 # print top 20 lines only 139 iotop -C 5 12 # print 12 x 5 second samples 140 END 141 exit 1 142 esac 143done 144 145shift $(( $OPTIND - 1 )) 146 147### option logic 148if [[ "$1" > 0 ]]; then 149 interval=$1; shift 150fi 151if [[ "$1" > 0 ]]; then 152 count=$1; shift 153fi 154if (( opt_proj && opt_zone )); then 155 opt_proj=0 156fi 157if (( opt_elapsed && opt_dtime )); then 158 opt_elapsed=0 159fi 160if (( opt_device || opt_mount || opt_file )); then 161 filter=1 162fi 163if (( opt_clear )); then 164 clearstr=`clear` 165else 166 clearstr=. 167fi 168 169 170 171################################# 172# --- Main Program, DTrace --- 173# 174/usr/sbin/dtrace -n ' 175 /* 176 * Command line arguments 177 */ 178 inline int OPT_def = '$opt_def'; 179 inline int OPT_proj = '$opt_proj'; 180 inline int OPT_zone = '$opt_zone'; 181 inline int OPT_clear = '$opt_clear'; 182 inline int OPT_bytes = '$opt_bytes'; 183 inline int OPT_elapsed = '$opt_elapsed'; 184 inline int OPT_dtime = '$opt_dtime'; 185 inline int OPT_percent = '$opt_percent'; 186 inline int OPT_device = '$opt_device'; 187 inline int OPT_mount = '$opt_mount'; 188 inline int OPT_file = '$opt_file'; 189 inline int OPT_top = '$opt_top'; 190 inline int INTERVAL = '$interval'; 191 inline int COUNTER = '$count'; 192 inline int FILTER = '$filter'; 193 inline int TOP = '$top'; 194 inline string DEVICE = "'$device'"; 195 inline string FILENAME = "'$filename'"; 196 inline string MOUNT = "'$mount'"; 197 inline string CLEAR = "'$clearstr'"; 198 199 #pragma D option quiet 200 201 /* 202 * Print header 203 */ 204 dtrace:::BEGIN 205 { 206 last_event[""] = 0; 207 208 /* starting values */ 209 counts = COUNTER; 210 secs = INTERVAL; 211 disk_r = 0; 212 disk_w = 0; 213 214 printf("Tracing... Please wait.\n"); 215 } 216 217 /* 218 * Check event is being traced 219 */ 220 io:::start, 221 io:::done 222 { 223 /* default is to trace unless filtering, */ 224 this->ok = FILTER ? 0 : 1; 225 226 /* check each filter, */ 227 (OPT_device == 1 && DEVICE == args[1]->dev_statname)? this->ok = 1 : 1; 228 (OPT_file == 1 && FILENAME == args[2]->fi_pathname) ? this->ok = 1 : 1; 229 (OPT_mount == 1 && MOUNT == args[2]->fi_mount) ? this->ok = 1 : 1; 230 } 231 232 /* 233 * Reset last_event for disk idle -> start 234 * this prevents idle time being counted as disk time. 235 */ 236 io:::start 237 /! pending[args[1]->dev_statname]/ 238 { 239 /* save last disk event */ 240 last_event[args[1]->dev_statname] = timestamp; 241 } 242 243 /* 244 * Store entry details 245 */ 246 io:::start 247 /this->ok/ 248 { 249 /* these are used as a unique disk event key, */ 250 this->dev = args[0]->b_edev; 251 this->blk = args[0]->b_blkno; 252 253 /* save disk event details, */ 254 start_uid[this->dev, this->blk] = uid; 255 start_pid[this->dev, this->blk] = pid; 256 start_ppid[this->dev, this->blk] = ppid; 257 start_comm[this->dev, this->blk] = execname; 258 start_time[this->dev, this->blk] = timestamp; 259 start_proj[this->dev, this->blk] = curpsinfo->pr_projid; 260 start_zone[this->dev, this->blk] = curpsinfo->pr_zoneid; 261 start_rw[this->dev, this->blk] = args[0]->b_flags & B_READ ? "R" : "W"; 262 disk_r += args[0]->b_flags & B_READ ? args[0]->b_bcount : 0; 263 disk_w += args[0]->b_flags & B_READ ? 0 : args[0]->b_bcount; 264 265 /* increase disk event pending count */ 266 pending[args[1]->dev_statname]++; 267 } 268 269 /* 270 * Process and Print completion 271 */ 272 io:::done 273 /this->ok/ 274 { 275 /* decrease disk event pending count */ 276 pending[args[1]->dev_statname]--; 277 278 /* 279 * Process details 280 */ 281 282 /* fetch entry values */ 283 this->dev = args[0]->b_edev; 284 this->blk = args[0]->b_blkno; 285 this->suid = (int)start_uid[this->dev, this->blk]; 286 this->spid = start_pid[this->dev, this->blk]; 287 this->sppid = start_ppid[this->dev, this->blk]; 288 this->sproj = (int)start_proj[this->dev, this->blk]; 289 this->szone = (int)start_zone[this->dev, this->blk]; 290 self->scomm = start_comm[this->dev, this->blk]; 291 this->stime = start_time[this->dev, this->blk]; 292 this->etime = timestamp; /* endtime */ 293 this->elapsed = this->etime - this->stime; 294 self->rw = start_rw[this->dev, this->blk]; 295 this->dtime = last_event[args[1]->dev_statname] == 0 ? 0 : 296 timestamp - last_event[args[1]->dev_statname]; 297 298 /* memory cleanup */ 299 start_uid[this->dev, this->blk] = 0; 300 start_pid[this->dev, this->blk] = 0; 301 start_ppid[this->dev, this->blk] = 0; 302 start_time[this->dev, this->blk] = 0; 303 start_comm[this->dev, this->blk] = 0; 304 start_zone[this->dev, this->blk] = 0; 305 start_proj[this->dev, this->blk] = 0; 306 start_rw[this->dev, this->blk] = 0; 307 308 /* 309 * Choose statistic to track 310 */ 311 OPT_bytes ? this->value = args[0]->b_bcount : 1; 312 OPT_elapsed ? this->value = this->elapsed / 1000 : 1; 313 OPT_dtime ? this->value = this->dtime / 1000 : 1; 314 315 /* 316 * Save details 317 */ 318 OPT_def ? @out[this->suid, this->spid, this->sppid, self->scomm, 319 args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor, 320 self->rw] = sum(this->value) : 1; 321 OPT_proj ? @out[this->sproj, this->spid, this->sppid, self->scomm, 322 args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor, 323 self->rw] = sum(this->value) : 1; 324 OPT_zone ? @out[this->szone, this->spid, this->sppid, self->scomm, 325 args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor, 326 self->rw] = sum(this->value) : 1; 327 328 /* save last disk event */ 329 last_event[args[1]->dev_statname] = timestamp; 330 331 self->scomm = 0; 332 self->rw = 0; 333 } 334 335 /* 336 * Prevent pending from underflowing 337 * this can happen if this program is started during disk events. 338 */ 339 io:::done 340 /pending[args[1]->dev_statname] < 0/ 341 { 342 pending[args[1]->dev_statname] = 0; 343 } 344 345 /* 346 * Timer 347 */ 348 profile:::tick-1sec 349 { 350 secs--; 351 } 352 353 /* 354 * Print Report 355 */ 356 profile:::tick-1sec 357 /secs == 0/ 358 { 359 /* fetch 1 min load average */ 360 /* 361 this->load1a = `hp_avenrun[0] / 65536; 362 this->load1b = ((`hp_avenrun[0] % 65536) * 100) / 65536; 363 */ 364 this->fscale = `averunnable.fscale; 365 this->load1a = `averunnable.ldavg[0] / this->fscale; 366 this->load1b = ((`averunnable.ldavg[0] % this->fscale) * 100) / this->fscale; 367 368 /* convert counters to Kbytes */ 369 disk_r /= 1024; 370 disk_w /= 1024; 371 372 /* print status */ 373 OPT_clear ? printf("%s", CLEAR) : 1; 374 printf("%Y, load: %d.%02d, disk_r: %6d KB, disk_w: %6d KB\n\n", 375 walltimestamp, this->load1a, this->load1b, disk_r, disk_w); 376 377 /* print headers */ 378 OPT_def ? printf(" UID ") : 1; 379 OPT_proj ? printf(" PROJ ") : 1; 380 OPT_zone ? printf(" ZONE ") : 1; 381 printf("%6s %6s %-16s %-7s %3s %3s %1s", 382 "PID", "PPID", "CMD", "DEVICE", "MAJ", "MIN", "D"); 383 OPT_bytes ? printf(" %16s\n", "BYTES") : 1; 384 OPT_elapsed ? printf(" %16s\n", "ELAPSED") : 1; 385 OPT_dtime && ! OPT_percent ? printf(" %16s\n", "DISKTIME") : 1; 386 OPT_dtime && OPT_percent ? printf(" %6s\n", "%I/O") : 1; 387 388 /* truncate to top lines if needed */ 389 OPT_top ? trunc(@out, TOP) : 1; 390 391 /* normalise to percentage if needed */ 392 OPT_percent ? normalize(@out, INTERVAL * 10000) : 1; 393 394 /* print data */ 395 ! OPT_percent ? 396 printa("%5d %6d %6d %-16s %-7s %3d %3d %1s %16@d\n", @out) : 397 printa("%5d %6d %6d %-16s %-7s %3d %3d %1s %6@d\n", @out); 398 printf("\n"); 399 400 /* clear data */ 401 trunc(@out); 402 disk_r = 0; 403 disk_w = 0; 404 secs = INTERVAL; 405 counts--; 406 } 407 408 /* 409 * End of program 410 */ 411 profile:::tick-1sec 412 /counts == 0/ 413 { 414 exit(0); 415 } 416 417 /* 418 * Cleanup for Ctrl-C 419 */ 420 dtrace:::END 421 { 422 trunc(@out); 423 } 424' 425 426