1#!/usr/sbin/dtrace -s 2/* 3 * fsrw.d - file system read/write event tracing. 4 * Written using DTrace (Solaris 10 3/05) 5 * 6 * This traces file related activity: system call reads and writes, 7 * vnode logical read and writes (fop), and disk I/O. It can be used 8 * to examine the behaviour of each I/O layer, from the syscall 9 * interface to what the disk is doing. Behaviour such as read-ahead, and 10 * max I/O size breakup can be observed. 11 * 12 * 23-Apr-2006, ver 0.50 13 * 14 * USAGE: fsrw.d 15 * 16 * FIELDS: 17 * Event Traced event (see EVENTS below) 18 * Device Device, for disk I/O 19 * RW Either Read or Write 20 * Size Size of I/O in bytes 21 * Offset Offset of I/O in kilobytes 22 * Path Path to file on disk 23 * 24 * EVENTS: 25 * sc-read System call read 26 * sc-write System call write 27 * fop_read Logical read 28 * fop_write Logical write 29 * disk_io Physical disk I/O 30 * disk_ra Physical disk I/O, read ahead 31 * 32 * The events are drawn with a level of indentation, which can sometimes 33 * help identify related events. 34 * 35 * SEE ALSO: fspaging.d 36 * 37 * IDEA: Richard McDougall, Solaris Internals 2nd Ed, FS Chapter. 38 * 39 * COPYRIGHT: Copyright (c) 2006 Brendan Gregg. 40 * 41 * CDDL HEADER START 42 * 43 * The contents of this file are subject to the terms of the 44 * Common Development and Distribution License, Version 1.0 only 45 * (the "License"). You may not use this file except in compliance 46 * with the License. 47 * 48 * You can obtain a copy of the license at Docs/cddl1.txt 49 * or http://www.opensolaris.org/os/licensing. 50 * See the License for the specific language governing permissions 51 * and limitations under the License. 52 * 53 * CDDL HEADER END 54 * 55 * ToDo: readv() 56 * 57 * 20-Mar-2006 Brendan Gregg Created this. 58 */ 59 60#pragma D option quiet 61#pragma D option switchrate=10hz 62 63dtrace:::BEGIN 64{ 65 printf("%-12s %10s %2s %8s %6s %s\n", 66 "Event", "Device", "RW", "Size", "Offset", "Path"); 67} 68 69syscall::*read:entry, 70syscall::*write*:entry 71{ 72 /* 73 * starting with a file descriptior, dig out useful info 74 * from the corresponding file_t and vnode_t. 75 */ 76 this->filistp = curthread->t_procp->p_user.u_finfo.fi_list; 77 this->ufentryp = (uf_entry_t *)((uint64_t)this->filistp + 78 (uint64_t)arg0 * (uint64_t)sizeof (uf_entry_t)); 79 this->filep = this->ufentryp->uf_file; 80 self->offset = this->filep->f_offset; 81 this->vnodep = this->filep != 0 ? this->filep->f_vnode : 0; 82 self->vpath = this->vnodep ? (this->vnodep->v_path != 0 ? 83 cleanpath(this->vnodep->v_path) : "<unknown>") : "<unknown>"; 84 85 /* only trace activity to regular files and directories, as */ 86 self->sc_trace = this->vnodep ? this->vnodep->v_type == VREG || 87 this->vnodep->v_type == VDIR ? 1 : 0 : 0; 88} 89 90syscall::*read:entry 91/self->sc_trace/ 92{ 93 printf("sc-%-9s %10s %2s %8d %6d %s\n", probefunc, ".", "R", 94 (int)arg2, self->offset / 1024, self->vpath); 95} 96 97syscall::*write*:entry 98/self->sc_trace/ 99{ 100 printf("sc-%-9s %10s %2s %8d %6d %s\n", probefunc, ".", "W", 101 (int)arg2, self->offset / 1024, self->vpath); 102} 103 104syscall::*read:return, 105syscall::*write*:return 106{ 107 self->vpath = 0; 108 self->offset = 0; 109 self->sc_trace = 0; 110} 111 112fbt::fop_read:entry, 113fbt::fop_write:entry 114/self->sc_trace && args[0]->v_path/ 115{ 116 printf(" %-10s %10s %2s %8d %6d %s\n", probefunc, ".", 117 probefunc == "fop_read" ? "R" : "W", args[1]->uio_resid, 118 args[1]->_uio_offset._f / 1024, cleanpath(args[0]->v_path)); 119} 120 121fbt:ufs:ufs_getpage_ra:entry 122{ 123 /* fetch the real offset (file_t is unaware of this) */ 124 self->ra_offset = ((inode_t *)args[0]->v_data)->i_nextrio; 125 self->read_ahead = 1; 126} 127 128fbt:ufs:ufs_getpage_ra:return 129{ 130 self->read_ahead = 0; 131 self->ra_offset = 0; 132} 133 134io::bdev_strategy:start 135{ 136 this->offset = self->read_ahead ? self->ra_offset : args[2]->fi_offset; 137 printf(" %-8s %10s %2s %8d %6d %s\n", 138 self->read_ahead ? "disk_ra" : "disk_io", args[1]->dev_statname, 139 args[0]->b_flags & B_READ ? "R" : "W", args[0]->b_bcount, 140 this->offset / 1024, args[2]->fi_pathname); 141 /* 142 * it would seem to make sense to only trace disk events during 143 * an fop event, easily coded with a self->fop_trace flag. However 144 * writes are asynchronous to the fop_write calls (they are flushed 145 * at some later time), and so this approach will miss tracing 146 * most of the disk writes. 147 */ 148} 149