1#!/bin/sh
2#
3# iosnoop - A program to print disk I/O events as they happen, with useful
4#           details such as UID, PID, filename (if available), command, etc.
5#           Written using DTrace (Solaris 10 3/05, MacOS X 10.5).
6#
7# This is measuring events that have made it past system caches, such as
8# disk events for local file systems, and network events for remote
9# filesystems (such as NFS.)
10#
11# $Id: iosnoop,v 1.2 2018/10/01 13:28:07 sevan Exp $
12#
13# USAGE: 	iosnoop [-a|-A|-DeghiNostv] [-d device] [-f filename] 
14#			[-m mount_point] [-n name] [-p PID]
15#
16#		iosnoop		# default output
17#
18#		-a		# print all data (mostly)
19#		-A		# dump all data, space delimited
20#		-D		# print time delta, us (elapsed)
21#		-e		# print device name
22#		-g		# print command arguments
23#		-i		# print device instance
24#		-N		# print major and minor numbers
25#		-o		# print disk delta time, us 
26#		-s		# print start time, us
27#		-t 		# print completion time, us
28#		-v		# print completion time, string
29#		-d device	# instance name to snoop (eg, dad0)
30#		-f filename	# full pathname of file to snoop
31#		-m mount_point	# this FS only (will skip raw events)
32#		-n name		# this process name only
33#		-p PID		# this PID only
34#  eg,
35#		iosnoop -v	# human readable timestamps
36#		iosnoop -N	# print major and minor numbers
37#		iosnoop -m /	# snoop events on the root filesystem only
38# 	
39# FIELDS:
40#		UID		user ID
41#		PID		process ID
42#		PPID		parennt process ID
43#		COMM		command name for the process
44#		ARGS		argument listing for the process
45#		SIZE		size of operation, bytes
46#		BLOCK		disk block for the operation (location)
47#		STIME	 	timestamp for the disk request, us
48#		TIME		timestamp for the disk completion, us
49#		DELTA		elapsed time from request to completion, us
50#		DTIME		time for disk to complete request, us
51#		STRTIME		timestamp for the disk completion, string
52#		DEVICE  	device name
53#		INS     	device instance number
54#		D		direction, Read or Write
55#		MOUNT		mount point
56#		FILE		filename (basename) for io operation
57# 
58# NOTE:
59# - There are two different delta times reported. -D prints the
60#   elapsed time from the disk request (strategy) to the disk completion
61#   (iodone); -o prints the time for the disk to complete that event 
62#   since it's last event (time between iodones), or, the time to the 
63#   strategy if the disk had been idle. 
64# - When filtering on PID or process name, be aware that poor disk event
65#   times may be due to events that have been filtered away, for example
66#   another process that may be seeking the disk heads elsewhere.
67#
68# SEE ALSO: BigAdmin: DTrace, http://www.sun.com/bigadmin/content/dtrace
69#	    Solaris Dynamic Tracing Guide, http://docs.sun.com
70#	    DTrace Tools, http://www.brendangregg.com/dtrace.html
71#
72# COPYRIGHT: Copyright (c) 2009 Brendan Gregg.
73#
74# CDDL HEADER START
75#
76#  The contents of this file are subject to the terms of the
77#  Common Development and Distribution License, Version 1.0 only
78#  (the "License").  You may not use this file except in compliance
79#  with the License.
80#
81#  You can obtain a copy of the license at Docs/cddl1.txt
82#  or http://www.opensolaris.org/os/licensing.
83#  See the License for the specific language governing permissions
84#  and limitations under the License.
85#
86# CDDL HEADER END
87#
88# 12-Mar-2004	Brendan Gregg	Created this, build 51.
89# 23-May-2004	   "	  "	Fixed mntpt bug.
90# 10-Oct-2004	   "      "	Rewritten to use the io provider, build 63.
91# 04-Jan-2005	   "	  "	Wrapped in sh to provide options.
92# 08-May-2005	   "      "	Rewritten for perfromance.
93# 15-Jul-2005	   "      "	Improved DTIME calculation.
94# 25-Jul-2005	   "      "	Added -p, -n. Improved code.
95# 17-Sep-2005	   "      "	Increased switchrate.
96# 15-Sep-2009	   "      "	Removed genunix for both MacOS X and NFS.
97#
98
99
100##############################
101# --- Process Arguments ---
102#
103
104### default variables
105opt_dump=0; opt_device=0; opt_delta=0; opt_devname=0; opt_file=0; opt_args=0; 
106opt_mount=0; opt_start=0 opt_end=0; opt_endstr=0; opt_ins=0; opt_nums=0
107opt_dtime=0; filter=0; device=.; filename=.; mount=.; pname=.; pid=0
108opt_name=0; opt_pid=0
109
110### process options
111while getopts aAd:Def:ghim:Nn:op:stv name
112do
113	case $name in
114	a)	opt_devname=1; opt_args=1; opt_endstr=1; opt_nums=1 ;;
115	A)	opt_dump=1 ;;
116	d)	opt_device=1; device=$OPTARG ;;
117	D)	opt_delta=1 ;;
118	e)	opt_devname=1 ;;
119	f)	opt_file=1; filename=$OPTARG ;;
120	g)	opt_args=1 ;;
121	i)	opt_ins=1 ;;
122	N)	opt_nums=1 ;;
123	n)	opt_name=1; pname=$OPTARG ;;
124	o)	opt_dtime=1 ;;
125	p)	opt_pid=1; pid=$OPTARG ;;
126	m)	opt_mount=1; mount=$OPTARG ;;
127	s)	opt_start=1 ;;
128	t)	opt_end=1 ;;
129	v)	opt_endstr=1 ;;
130	h|?)	cat <<-END >&2
131		USAGE: iosnoop [-a|-A|-DeghiNostv] [-d device] [-f filename]
132		               [-m mount_point] [-n name] [-p PID]
133		       iosnoop          # default output
134		                -a      # print all data (mostly)
135		                -A      # dump all data, space delimited
136		                -D      # print time delta, us (elapsed)
137		                -e      # print device name
138		                -g      # print command arguments
139		                -i      # print device instance
140		                -N      # print major and minor numbers
141		                -o      # print disk delta time, us
142		                -s      # print start time, us
143		                -t      # print completion time, us
144		                -v      # print completion time, string
145		                -d device       # instance name to snoop 
146		                -f filename     # snoop this file only
147		                -m mount_point  # this FS only 
148		                -n name         # this process name only 
149		                -p PID          # this PID only 
150		   eg,
151		        iosnoop -v      # human readable timestamps
152		        iosnoop -N      # print major and minor numbers
153		        iosnoop -m /    # snoop events on filesystem / only
154		END
155		exit 1
156	esac
157done
158
159### option logic
160if [ $opt_dump -eq 1 ]; then
161	opt_delta=0; opt_devname=0; opt_args=2; opt_start=0; 
162	opt_end=0; opt_endstr=0; opt_nums=0; opt_ins=0; opt_dtime=0
163fi
164if [ $opt_device -eq 1 -o $opt_file -eq 1 -o $opt_mount -eq 1 -o \
165    $opt_name -eq 1 -o $opt_pid -eq 1 ]; then
166	filter=1
167fi
168
169
170#################################
171# --- Main Program, DTrace ---
172#
173/usr/sbin/dtrace -n '
174 /*
175  * Command line arguments
176  */
177 inline int OPT_dump 	= '$opt_dump';
178 inline int OPT_device 	= '$opt_device';
179 inline int OPT_delta 	= '$opt_delta';
180 inline int OPT_devname = '$opt_devname';
181 inline int OPT_file 	= '$opt_file';
182 inline int OPT_args 	= '$opt_args';
183 inline int OPT_ins  	= '$opt_ins';
184 inline int OPT_nums  	= '$opt_nums';
185 inline int OPT_dtime 	= '$opt_dtime';
186 inline int OPT_mount 	= '$opt_mount';
187 inline int OPT_start 	= '$opt_start';
188 inline int OPT_pid 	= '$opt_pid';
189 inline int OPT_name 	= '$opt_name';
190 inline int OPT_end 	= '$opt_end';
191 inline int OPT_endstr 	= '$opt_endstr';
192 inline int FILTER 	= '$filter';
193 inline int PID 	= '$pid';
194 inline string DEVICE 	= "'$device'";
195 inline string FILENAME = "'$filename'";
196 inline string MOUNT 	= "'$mount'";
197 inline string NAME 	= "'$pname'";
198 
199 #pragma D option quiet
200 #pragma D option switchrate=10hz
201
202 /* boost the following if you get "dynamic variable drops" */
203 #pragma D option dynvarsize=16m
204
205 /*
206  * Print header
207  */
208 dtrace:::BEGIN 
209 {
210	last_event[""] = 0;
211
212	/* print optional headers */
213 	OPT_start   ? printf("%-14s ","STIME(us)")   : 1;
214 	OPT_end     ? printf("%-14s ","TIME(us)")    : 1;
215 	OPT_endstr  ? printf("%-20s ","STRTIME") : 1;
216 	OPT_devname ? printf("%-7s ","DEVICE")   : 1;
217 	OPT_ins     ? printf("%-3s ","INS")      : 1;
218 	OPT_nums    ? printf("%-3s %-3s ","MAJ","MIN") : 1;
219 	OPT_delta   ? printf("%-10s ","DELTA(us)")   : 1;
220 	OPT_dtime   ? printf("%-10s ","DTIME(us)")   : 1;
221
222	/* print main headers */
223	OPT_dump ? 
224	    printf("%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n",
225 	    "TIME", "STIME", "DELTA", "DEVICE", "INS", "MAJ", "MIN", "UID",
226	    "PID", "PPID", "D", "BLOCK", "SIZE", "MOUNT", "FILE", "PATH",
227	    "COMM","ARGS") :
228 	    printf("%5s %5s %1s %8s %6s ", "UID", "PID", "D", "BLOCK", "SIZE");
229	OPT_args == 0 ? printf("%10s %s\n", "COMM", "PATHNAME") : 1;
230 	OPT_args == 1 ? printf("%28s %s\n", "PATHNAME", "ARGS") : 1;
231 }
232
233 /*
234  * Check event is being traced
235  */
236 io:::start
237 { 
238	/* default is to trace unless filtering, */
239	self->ok = FILTER ? 0 : 1;
240
241	/* check each filter, */
242	(OPT_device == 1 && DEVICE == args[1]->dev_statname)? self->ok = 1 : 1;
243	(OPT_file == 1 && FILENAME == args[2]->fi_pathname) ? self->ok = 1 : 1;
244	(OPT_mount == 1 && MOUNT == args[2]->fi_mount) ? self->ok = 1 : 1;
245	(OPT_name == 1 && NAME == execname) ? self->ok = 1 : 1;
246	(OPT_pid == 1 && PID == pid) ? self->ok = 1 : 1;
247 }
248
249 /*
250  * Reset last_event for disk idle -> start
251  * this prevents idle time being counted as disk time.
252  */
253 io:::start
254 /! pending[args[1]->dev_statname]/
255 {
256	/* save last disk event */
257	last_event[args[1]->dev_statname] = timestamp;
258 }
259
260 /*
261  * Store entry details
262  */
263 io:::start
264 /self->ok/
265 {
266	/* these are used as a unique disk event key, */
267 	this->dev = args[0]->b_edev;
268 	this->blk = args[0]->b_blkno;
269
270	/* save disk event details, */
271 	start_uid[this->dev, this->blk] = uid;
272 	start_pid[this->dev, this->blk] = pid;
273 	start_ppid[this->dev, this->blk] = ppid;
274 	start_args[this->dev, this->blk] = (char *)curpsinfo->pr_psargs;
275 	start_comm[this->dev, this->blk] = execname;
276 	start_time[this->dev, this->blk] = timestamp;
277
278	/* increase disk event pending count */
279	pending[args[1]->dev_statname]++;
280
281	self->ok = 0;
282 }
283
284 /*
285  * Process and Print completion
286  */
287 io:::done
288 /start_time[args[0]->b_edev, args[0]->b_blkno]/
289 {
290	/* decrease disk event pending count */
291	pending[args[1]->dev_statname]--;
292
293	/*
294	 * Process details
295	 */
296
297 	/* fetch entry values */
298 	this->dev = args[0]->b_edev;
299 	this->blk = args[0]->b_blkno;
300 	this->suid = start_uid[this->dev, this->blk];
301 	this->spid = start_pid[this->dev, this->blk];
302 	this->sppid = start_ppid[this->dev, this->blk];
303 	self->sargs = (int)start_args[this->dev, this->blk] == 0 ? 
304 	    "" : start_args[this->dev, this->blk];
305 	self->scomm = start_comm[this->dev, this->blk];
306 	this->stime = start_time[this->dev, this->blk];
307	this->etime = timestamp; /* endtime */
308	this->delta = this->etime - this->stime;
309	this->dtime = last_event[args[1]->dev_statname] == 0 ? 0 :
310	    timestamp - last_event[args[1]->dev_statname];
311
312 	/* memory cleanup */
313 	start_uid[this->dev, this->blk]  = 0;
314 	start_pid[this->dev, this->blk]  = 0;
315 	start_ppid[this->dev, this->blk] = 0;
316 	start_args[this->dev, this->blk] = 0;
317 	start_time[this->dev, this->blk] = 0;
318 	start_comm[this->dev, this->blk] = 0;
319 	start_rw[this->dev, this->blk]   = 0;
320
321	/*
322	 * Print details
323	 */
324
325	/* print optional fields */
326 	OPT_start   ? printf("%-14d ", this->stime/1000) : 1;
327 	OPT_end     ? printf("%-14d ", this->etime/1000) : 1;
328 	OPT_endstr  ? printf("%-20Y ", walltimestamp) : 1;
329 	OPT_devname ? printf("%-7s ", args[1]->dev_statname) : 1;
330 	OPT_ins     ? printf("%3d ", args[1]->dev_instance) : 1;
331 	OPT_nums    ? printf("%3d %3d ",
332	    args[1]->dev_major, args[1]->dev_minor) : 1;
333 	OPT_delta   ? printf("%-10d ", this->delta/1000) : 1;
334 	OPT_dtime   ? printf("%-10d ", this->dtime/1000) : 1;
335
336	/* print main fields */
337	OPT_dump ? 
338 	    printf("%d %d %d %s %d %d %d %d %d %d %s %d %d %s %s %s %s %S\n",
339 	    this->etime/1000, this->stime/1000, this->delta/1000,
340 	    args[1]->dev_statname, args[1]->dev_instance, args[1]->dev_major,
341	    args[1]->dev_minor, this->suid, this->spid, this->sppid, 
342	    args[0]->b_flags & B_READ ? "R" : "W", 
343	    args[0]->b_blkno, args[0]->b_bcount, args[2]->fi_mount,
344	    args[2]->fi_name, args[2]->fi_pathname, self->scomm, self->sargs) :
345 	    printf("%5d %5d %1s %8d %6d ",
346 	    this->suid, this->spid, args[0]->b_flags & B_READ ? "R" : "W",
347	    args[0]->b_blkno, args[0]->b_bcount);
348	OPT_args == 0 ? printf("%10s %s\n", self->scomm, args[2]->fi_pathname)
349	    : 1;
350 	OPT_args == 1 ? printf("%28s %S\n",
351	    args[2]->fi_pathname, self->sargs) : 1;
352
353	/* save last disk event */
354	last_event[args[1]->dev_statname] = timestamp;
355
356	/* cleanup */
357	self->scomm = 0;
358	self->sargs = 0;
359 }
360
361 /*
362  * Prevent pending from underflowing
363  * this can happen if this program is started during disk events.
364  */
365 io:::done
366 /pending[args[1]->dev_statname] < 0/
367 {
368	pending[args[1]->dev_statname] = 0;
369 }
370'
371