1#!/bin/sh
2##!/usr/bin/sh
3#
4# iosnoop - A program to print disk I/O events as they happen, with useful
5#           details such as UID, PID, filename, command, etc. 
6#           Written using DTrace (Solaris 10 3/05).
7#
8# This is measuring disk events that have made it past system caches.
9#
10# 17-Sep-2005, ver 1.55
11#
12# USAGE: 	iosnoop [-a|-A|-DeghiNostv] [-d device] [-f filename] 
13#			[-m mount_point] [-n name] [-p PID]
14#
15#		iosnoop		# default output
16#
17#		-a		# print all data (mostly)
18#		-A		# dump all data, space delimited
19#		-D		# print time delta, us (elapsed)
20#		-e		# print device name
21#		-g		# print command arguments
22#		-i		# print device instance
23#		-N		# print major and minor numbers
24#		-o		# print disk delta time, us 
25#		-s		# print start time, us
26#		-t 		# print completion time, us
27#		-v		# print completion time, string
28#		-d device	# instance name to snoop (eg, dad0)
29#		-f filename	# full pathname of file to snoop
30#		-m mount_point	# this FS only (will skip raw events)
31#		-n name		# this process name only
32#		-p PID		# this PID only
33#  eg,
34#		iosnoop -v	# human readable timestamps
35#		iosnoop -N	# print major and minor numbers
36#		iosnoop -m /	# snoop events on the root filesystem only
37# 	
38# FIELDS:
39#		UID		user ID
40#		PID		process ID
41#		PPID		parennt process ID
42#		COMM		command name for the process
43#		ARGS		argument listing for the process
44#		SIZE		size of operation, bytes
45#		BLOCK		disk block for the operation (location)
46#		STIME	 	timestamp for the disk request, us
47#		TIME		timestamp for the disk completion, us
48#		DELTA		elapsed time from request to completion, us
49#		DTIME		time for disk to complete request, us
50#		STRTIME		timestamp for the disk completion, string
51#		DEVICE  	device name
52#		INS     	device instance number
53#		D		direction, Read or Write
54#		MOUNT		mount point
55#		FILE		filename (basename) for io operation
56# 
57# NOTE:
58# - There are two different delta times reported. -D prints the
59#   elapsed time from the disk request (strategy) to the disk completion
60#   (iodone); -o prints the time for the disk to complete that event 
61#   since it's last event (time between iodones), or, the time to the 
62#   strategy if the disk had been idle. 
63# - When filtering on PID or process name, be aware that poor disk event
64#   times may be due to events that have been filtered away, for example
65#   another process that may be seeking the disk heads elsewhere.
66#
67# SEE ALSO: BigAdmin: DTrace, http://www.sun.com/bigadmin/content/dtrace
68#	    Solaris Dynamic Tracing Guide, http://docs.sun.com
69#	    DTrace Tools, http://www.brendangregg.com/dtrace.html
70#
71# COPYRIGHT: Copyright (c) 2005 Brendan Gregg.
72#
73# CDDL HEADER START
74#
75#  The contents of this file are subject to the terms of the
76#  Common Development and Distribution License, Version 1.0 only
77#  (the "License").  You may not use this file except in compliance
78#  with the License.
79#
80#  You can obtain a copy of the license at Docs/cddl1.txt
81#  or http://www.opensolaris.org/os/licensing.
82#  See the License for the specific language governing permissions
83#  and limitations under the License.
84#
85# CDDL HEADER END
86#
87# Author: Brendan Gregg  [Sydney, Australia]
88#
89# 12-Mar-2004	Brendan Gregg	Created this, build 51.
90# 23-May-2004	   "	  "	Fixed mntpt bug.
91# 10-Oct-2004	   "      "	Rewritten to use the io provider, build 63.
92# 04-Jan-2005	   "	  "	Wrapped in sh to provide options.
93# 08-May-2005	   "      "	Rewritten for perfromance.
94# 15-Jul-2005	   "      "	Improved DTIME calculation.
95# 25-Jul-2005	   "      "	Added -p, -n. Improved code.
96# 17-Sep-2005	   "      "	Increased switchrate.
97#
98
99
100##############################
101# --- Process Arguments ---
102#
103
104### default variables
105opt_dump=0; opt_device=0; opt_delta=0; opt_devname=0; opt_file=0; opt_args=0; 
106opt_mount=0; opt_start=0 opt_end=0; opt_endstr=0; opt_ins=0; opt_nums=0
107opt_dtime=0; filter=0; device=.; filename=.; mount=.; pname=.; pid=0
108opt_name=0; opt_pid=0
109
110### process options
111while getopts aAd:Def:ghim:Nn:op:stv name
112do
113	case $name in
114	a)	opt_devname=1; opt_args=1; opt_endstr=1; opt_nums=1 ;;
115	A)	opt_dump=1 ;;
116	d)	opt_device=1; device=$OPTARG ;;
117	D)	opt_delta=1 ;;
118	e)	opt_devname=1 ;;
119	f)	opt_file=1; filename=$OPTARG ;;
120	g)	opt_args=1 ;;
121	i)	opt_ins=1 ;;
122	N)	opt_nums=1 ;;
123	n)	opt_name=1; pname=$OPTARG ;;
124	o)	opt_dtime=1 ;;
125	p)	opt_pid=1; pid=$OPTARG ;;
126	m)	opt_mount=1; mount=$OPTARG ;;
127	s)	opt_start=1 ;;
128	t)	opt_end=1 ;;
129	v)	opt_endstr=1 ;;
130	h|?)	cat <<-END >&2
131		USAGE: iosnoop [-a|-A|-DeghiNostv] [-d device] [-f filename]
132		               [-m mount_point] [-n name] [-p PID]
133		       iosnoop          # default output
134		                -a      # print all data (mostly)
135		                -A      # dump all data, space delimited
136		                -D      # print time delta, us (elapsed)
137		                -e      # print device name
138		                -g      # print command arguments
139		                -i      # print device instance
140		                -N      # print major and minor numbers
141		                -o      # print disk delta time, us
142		                -s      # print start time, us
143		                -t      # print completion time, us
144		                -v      # print completion time, string
145		                -d device       # instance name to snoop 
146		                -f filename     # snoop this file only
147		                -m mount_point  # this FS only 
148		                -n name         # this process name only 
149		                -p PID          # this PID only 
150		   eg,
151		        iosnoop -v      # human readable timestamps
152		        iosnoop -N      # print major and minor numbers
153		        iosnoop -m /    # snoop events on filesystem / only
154		END
155		exit 1
156	esac
157done
158
159### option logic
160if [ $opt_dump -eq 1 ]; then
161	opt_delta=0; opt_devname=0; opt_args=2; opt_start=0; 
162	opt_end=0; opt_endstr=0; opt_nums=0; opt_ins=0; opt_dtime=0
163fi
164if [ $opt_device -eq 1 -o $opt_file -eq 1 -o $opt_mount -eq 1 -o \
165    $opt_name -eq 1 -o $opt_pid -eq 1 ]; then
166	filter=1
167fi
168
169
170#################################
171# --- Main Program, DTrace ---
172#
173/usr/sbin/dtrace -n '
174 /*
175  * Command line arguments
176  */
177 inline int OPT_dump 	= '$opt_dump';
178 inline int OPT_device 	= '$opt_device';
179 inline int OPT_delta 	= '$opt_delta';
180 inline int OPT_devname = '$opt_devname';
181 inline int OPT_file 	= '$opt_file';
182 inline int OPT_args 	= '$opt_args';
183 inline int OPT_ins  	= '$opt_ins';
184 inline int OPT_nums  	= '$opt_nums';
185 inline int OPT_dtime 	= '$opt_dtime';
186 inline int OPT_mount 	= '$opt_mount';
187 inline int OPT_start 	= '$opt_start';
188 inline int OPT_pid 	= '$opt_pid';
189 inline int OPT_name 	= '$opt_name';
190 inline int OPT_end 	= '$opt_end';
191 inline int OPT_endstr 	= '$opt_endstr';
192 inline int FILTER 	= '$filter';
193 inline int PID 	= '$pid';
194 inline string DEVICE 	= "'$device'";
195 inline string FILENAME = "'$filename'";
196 inline string MOUNT 	= "'$mount'";
197 inline string NAME 	= "'$pname'";
198 
199 #pragma D option quiet
200 #pragma D option switchrate=10hz
201
202 /*
203  * Print header
204  */
205 dtrace:::BEGIN 
206 {
207	last_event[""] = 0;
208
209	/* print optional headers */
210 	OPT_start   ? printf("%-14s ","STIME")   : 1;
211 	OPT_end     ? printf("%-14s ","TIME")    : 1;
212 	OPT_endstr  ? printf("%-20s ","STRTIME") : 1;
213 	OPT_devname ? printf("%-7s ","DEVICE")   : 1;
214 	OPT_ins     ? printf("%-3s ","INS")      : 1;
215 	OPT_nums    ? printf("%-3s %-3s ","MAJ","MIN") : 1;
216 	OPT_delta   ? printf("%-10s ","DELTA")   : 1;
217 	OPT_dtime   ? printf("%-10s ","DTIME")   : 1;
218
219	/* print main headers */
220	OPT_dump ? 
221	    printf("%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n",
222 	    "TIME", "STIME", "DELTA", "DEVICE", "INS", "MAJ", "MIN", "UID",
223	    "PID", "PPID", "D", "BLOCK", "SIZE", "MOUNT", "FILE", "PATH",
224	    "COMM","ARGS") :
225 	    printf("%5s %5s %1s %8s %6s ", "UID", "PID", "D", "BLOCK", "SIZE");
226	OPT_args == 0 ? printf("%10s %s\n", "COMM", "PATHNAME") : 1;
227 	OPT_args == 1 ? printf("%28s %s\n", "PATHNAME", "ARGS") : 1;
228 }
229
230 /*
231  * Check event is being traced
232  */
233 io:::start
234 { 
235	/* default is to trace unless filtering, */
236	self->ok = FILTER ? 0 : 1;
237
238	/* check each filter, */
239	(OPT_device == 1 && DEVICE == args[1]->dev_statname)? self->ok = 1 : 1;
240	(OPT_file == 1 && FILENAME == args[2]->fi_pathname) ? self->ok = 1 : 1;
241	(OPT_mount == 1 && MOUNT == args[2]->fi_mount) ? self->ok = 1 : 1;
242	(OPT_name == 1 && NAME == strstr(NAME, execname)) ? self->ok = 1 : 1;
243	(OPT_name == 1 && execname == strstr(execname, NAME)) ? self->ok = 1 : 1;
244	(OPT_pid == 1 && PID == pid) ? self->ok = 1 : 1;
245 }
246
247 /*
248  * Reset last_event for disk idle -> start
249  * this prevents idle time being counted as disk time.
250  */
251 io:::start
252 /! pending[args[1]->dev_statname]/
253 {
254	/* save last disk event */
255	last_event[args[1]->dev_statname] = timestamp;
256 }
257
258 /*
259  * Store entry details
260  */
261 io:::start
262 /self->ok/
263 {
264	/* these are used as a unique disk event key, */
265 	this->dev = args[0]->b_edev;
266 	this->blk = args[0]->b_blkno;
267
268	/* save disk event details, */
269 	start_uid[this->dev, this->blk] = (int)uid;
270 	start_pid[this->dev, this->blk] = pid;
271 	start_ppid[this->dev, this->blk] = ppid;
272 	start_args[this->dev, this->blk] = (char *)curpsinfo->pr_psargs;
273 	start_comm[this->dev, this->blk] = execname;
274 	start_time[this->dev, this->blk] = timestamp;
275
276	/* increase disk event pending count */
277	pending[args[1]->dev_statname]++;
278
279	self->ok = 0;
280 }
281
282 /*
283  * Process and Print completion
284  */
285 io:::done
286 /start_time[args[0]->b_edev, args[0]->b_blkno]/
287 {
288	/* decrease disk event pending count */
289	pending[args[1]->dev_statname]--;
290
291	/*
292	 * Process details
293	 */
294
295 	/* fetch entry values */
296 	this->dev = args[0]->b_edev;
297 	this->blk = args[0]->b_blkno;
298 	this->suid = start_uid[this->dev, this->blk];
299 	this->spid = start_pid[this->dev, this->blk];
300 	this->sppid = start_ppid[this->dev, this->blk];
301 	self->sargs = (int)start_args[this->dev, this->blk] == 0 ? 
302 	    "" : start_args[this->dev, this->blk];
303 	self->scomm = start_comm[this->dev, this->blk];
304 	this->stime = start_time[this->dev, this->blk];
305	this->etime = timestamp; /* endtime */
306	this->delta = this->etime - this->stime;
307	this->dtime = last_event[args[1]->dev_statname] == 0 ? 0 :
308	    timestamp - last_event[args[1]->dev_statname];
309
310 	/* memory cleanup */
311 	start_uid[this->dev, this->blk]  = 0;
312 	start_pid[this->dev, this->blk]  = 0;
313 	start_ppid[this->dev, this->blk] = 0;
314 	start_args[this->dev, this->blk] = 0;
315 	start_time[this->dev, this->blk] = 0;
316 	start_comm[this->dev, this->blk] = 0;
317 	start_rw[this->dev, this->blk]   = 0;
318
319	/*
320	 * Print details
321	 */
322
323	/* print optional fields */
324 	OPT_start   ? printf("%-14d ", this->stime/1000) : 1;
325 	OPT_end     ? printf("%-14d ", this->etime/1000) : 1;
326 	OPT_endstr  ? printf("%-20Y ", walltimestamp) : 1;
327 	OPT_devname ? printf("%-7s ", args[1]->dev_statname) : 1;
328 	OPT_ins     ? printf("%3d ", args[1]->dev_instance) : 1;
329 	OPT_nums    ? printf("%3d %3d ",
330	    args[1]->dev_major, args[1]->dev_minor) : 1;
331 	OPT_delta   ? printf("%-10d ", this->delta/1000) : 1;
332 	OPT_dtime   ? printf("%-10d ", this->dtime/1000) : 1;
333
334	/* print main fields */
335	OPT_dump ? 
336 	    printf("%d %d %d %s %d %d %d %d %d %d %s %d %d %s %s %s %s %S\n",
337 	    this->etime/1000, this->stime/1000, this->delta/1000,
338 	    args[1]->dev_statname, args[1]->dev_instance, args[1]->dev_major,
339	    args[1]->dev_minor, this->suid, this->spid, this->sppid, 
340	    args[0]->b_flags & B_READ ? "R" : "W", 
341	    args[0]->b_blkno, args[0]->b_bcount, args[2]->fi_mount,
342	    args[2]->fi_name, args[2]->fi_pathname, self->scomm, self->sargs) :
343 	    printf("%5d %5d %1s %8d %6d ",
344 	    this->suid, this->spid, args[0]->b_flags & B_READ ? "R" : "W",
345	    args[0]->b_blkno, args[0]->b_bcount);
346	OPT_args == 0 ? printf("%10s %s\n", self->scomm, args[2]->fi_pathname)
347	    : 1;
348 	OPT_args == 1 ? printf("%28s %S\n",
349	    args[2]->fi_pathname, self->sargs) : 1;
350
351	/* save last disk event */
352	last_event[args[1]->dev_statname] = timestamp;
353
354	/* cleanup */
355	self->scomm = 0;
356	self->sargs = 0;
357 }
358
359 /*
360  * Prevent pending from underflowing
361  * this can happen if this program is started during disk events.
362  */
363 io:::done
364 /pending[args[1]->dev_statname] < 0/
365 {
366	pending[args[1]->dev_statname] = 0;
367 }
368'
369
370