1235368Sgnn#!/usr/sbin/dtrace -s
2235368Sgnn/*
3235368Sgnn * fsrw.d - file system read/write event tracing.
4235368Sgnn *          Written using DTrace (Solaris 10 3/05)
5235368Sgnn *
6235368Sgnn * This traces file related activity: system call reads and writes,
7235368Sgnn * vnode logical read and writes (fop), and disk I/O. It can be used
8235368Sgnn * to examine the behaviour of each I/O layer, from the syscall
9235368Sgnn * interface to what the disk is doing. Behaviour such as read-ahead, and
10235368Sgnn * max I/O size breakup can be observed.
11235368Sgnn *
12235368Sgnn * $Id: fsrw.d 3 2007-08-01 10:50:08Z brendan $
13235368Sgnn *
14235368Sgnn * USAGE:	fsrw.d
15235368Sgnn *
16235368Sgnn * FIELDS:
17235368Sgnn *		Event		Traced event (see EVENTS below)
18235368Sgnn *		Device		Device, for disk I/O
19235368Sgnn *		RW		Either Read or Write
20235368Sgnn *		Size		Size of I/O in bytes
21235368Sgnn *		Offset		Offset of I/O in kilobytes
22235368Sgnn *		Path		Path to file on disk
23235368Sgnn *
24235368Sgnn * EVENTS:
25235368Sgnn *		sc-read		System call read
26235368Sgnn *		sc-write	System call write
27235368Sgnn *		fop_read	Logical read
28235368Sgnn *		fop_write	Logical write
29235368Sgnn *		disk_io		Physical disk I/O
30235368Sgnn *		disk_ra		Physical disk I/O, read ahead
31235368Sgnn *
32235368Sgnn * The events are drawn with a level of indentation, which can sometimes
33235368Sgnn * help identify related events.
34235368Sgnn *
35235368Sgnn * SEE ALSO: fspaging.d
36235368Sgnn *
37235368Sgnn * IDEA: Richard McDougall, Solaris Internals 2nd Ed, FS Chapter.
38235368Sgnn *
39235368Sgnn * COPYRIGHT: Copyright (c) 2006 Brendan Gregg.
40235368Sgnn *
41235368Sgnn * CDDL HEADER START
42235368Sgnn *
43235368Sgnn *  The contents of this file are subject to the terms of the
44235368Sgnn *  Common Development and Distribution License, Version 1.0 only
45235368Sgnn *  (the "License").  You may not use this file except in compliance
46235368Sgnn *  with the License.
47235368Sgnn *
48235368Sgnn *  You can obtain a copy of the license at Docs/cddl1.txt
49235368Sgnn *  or http://www.opensolaris.org/os/licensing.
50235368Sgnn *  See the License for the specific language governing permissions
51235368Sgnn *  and limitations under the License.
52235368Sgnn *
53235368Sgnn * CDDL HEADER END
54235368Sgnn *
55235368Sgnn * ToDo: readv()
56235368Sgnn *
57235368Sgnn * 20-Mar-2006  Brendan Gregg   Created this.
58235368Sgnn * 23-Apr-2006	   "      "	Last update.
59235368Sgnn */
60235368Sgnn
61235368Sgnn#pragma D option quiet
62235368Sgnn#pragma D option switchrate=10hz
63235368Sgnn
64235368Sgnndtrace:::BEGIN
65235368Sgnn{
66235368Sgnn	printf("%-12s %10s %2s %8s %6s %s\n",
67235368Sgnn	    "Event", "Device", "RW", "Size", "Offset", "Path");
68235368Sgnn}
69235368Sgnn
70235368Sgnnsyscall::*read:entry,
71235368Sgnnsyscall::*write*:entry
72235368Sgnn{
73235368Sgnn	/*
74235368Sgnn	 * starting with a file descriptior, dig out useful info
75235368Sgnn	 * from the corresponding file_t and vnode_t.
76235368Sgnn	 */
77235368Sgnn	this->filistp = curthread->t_procp->p_user.u_finfo.fi_list;
78235368Sgnn	this->ufentryp = (uf_entry_t *)((uint64_t)this->filistp +
79235368Sgnn	    (uint64_t)arg0 * (uint64_t)sizeof (uf_entry_t));
80235368Sgnn	this->filep = this->ufentryp->uf_file;
81235368Sgnn	self->offset = this->filep->f_offset;
82235368Sgnn	this->vnodep = this->filep != 0 ? this->filep->f_vnode : 0;
83235368Sgnn	self->vpath = this->vnodep ? (this->vnodep->v_path != 0 ?
84235368Sgnn	    cleanpath(this->vnodep->v_path) : "<unknown>") : "<unknown>";
85235368Sgnn
86235368Sgnn	/* only trace activity to regular files and directories, as */
87235368Sgnn	self->sc_trace = this->vnodep ? this->vnodep->v_type == VREG ||
88235368Sgnn	    this->vnodep->v_type == VDIR ? 1 : 0 : 0;
89235368Sgnn}
90235368Sgnn
91235368Sgnnsyscall::*read:entry
92235368Sgnn/self->sc_trace/
93235368Sgnn{
94235368Sgnn	printf("sc-%-9s %10s %2s %8d %6d %s\n", probefunc, ".", "R",
95235368Sgnn	    (int)arg2, self->offset / 1024, self->vpath);
96235368Sgnn}
97235368Sgnn
98235368Sgnnsyscall::*write*:entry
99235368Sgnn/self->sc_trace/
100235368Sgnn{
101235368Sgnn	printf("sc-%-9s %10s %2s %8d %6d %s\n", probefunc, ".", "W",
102235368Sgnn	    (int)arg2, self->offset / 1024, self->vpath);
103235368Sgnn}
104235368Sgnn
105235368Sgnnsyscall::*read:return,
106235368Sgnnsyscall::*write*:return
107235368Sgnn{
108235368Sgnn	self->vpath = 0;
109235368Sgnn	self->offset = 0;
110235368Sgnn	self->sc_trace = 0;
111235368Sgnn}
112235368Sgnn
113235368Sgnnfbt::fop_read:entry,
114235368Sgnnfbt::fop_write:entry
115235368Sgnn/self->sc_trace && args[0]->v_path/
116235368Sgnn{
117235368Sgnn	printf("  %-10s %10s %2s %8d %6d %s\n", probefunc, ".",
118235368Sgnn	    probefunc == "fop_read" ? "R" : "W", args[1]->uio_resid,
119235368Sgnn	    args[1]->_uio_offset._f / 1024, cleanpath(args[0]->v_path));
120235368Sgnn}
121235368Sgnn
122235368Sgnnfbt:ufs:ufs_getpage_ra:entry
123235368Sgnn{
124235368Sgnn	/* fetch the real offset (file_t is unaware of this) */
125235368Sgnn	self->ra_offset = ((inode_t *)args[0]->v_data)->i_nextrio;
126235368Sgnn	self->read_ahead = 1;
127235368Sgnn}
128235368Sgnn
129235368Sgnnfbt:ufs:ufs_getpage_ra:return
130235368Sgnn{
131235368Sgnn	self->read_ahead = 0;
132235368Sgnn	self->ra_offset = 0;
133235368Sgnn}
134235368Sgnn
135235368Sgnnio::bdev_strategy:start
136235368Sgnn{
137235368Sgnn	this->offset = self->read_ahead ? self->ra_offset : args[2]->fi_offset;
138235368Sgnn	printf("    %-8s %10s %2s %8d %6d %s\n",
139235368Sgnn	    self->read_ahead ? "disk_ra" : "disk_io", args[1]->dev_statname,
140235368Sgnn	    args[0]->b_flags & B_READ ? "R" : "W", args[0]->b_bcount,
141235368Sgnn	    this->offset / 1024, args[2]->fi_pathname);
142235368Sgnn	/*
143235368Sgnn	 * it would seem to make sense to only trace disk events during
144235368Sgnn	 * an fop event, easily coded with a self->fop_trace flag. However
145235368Sgnn	 * writes are asynchronous to the fop_write calls (they are flushed
146235368Sgnn	 * at some later time), and so this approach will miss tracing
147235368Sgnn	 * most of the disk writes.
148235368Sgnn	 */
149235368Sgnn}
150