1#!/usr/sbin/dtrace -s 2/* 3 * fsrw.d - file system read/write event tracing. 4 * Written using DTrace (Solaris 10 3/05) 5 * 6 * This traces file related activity: system call reads and writes, 7 * vnode logical read and writes (fop), and disk I/O. It can be used 8 * to examine the behaviour of each I/O layer, from the syscall 9 * interface to what the disk is doing. Behaviour such as read-ahead, and 10 * max I/O size breakup can be observed. 11 * 12 * $Id: fsrw.d 3 2007-08-01 10:50:08Z brendan $ 13 * 14 * USAGE: fsrw.d 15 * 16 * FIELDS: 17 * Event Traced event (see EVENTS below) 18 * Device Device, for disk I/O 19 * RW Either Read or Write 20 * Size Size of I/O in bytes 21 * Offset Offset of I/O in kilobytes 22 * Path Path to file on disk 23 * 24 * EVENTS: 25 * sc-read System call read 26 * sc-write System call write 27 * fop_read Logical read 28 * fop_write Logical write 29 * disk_io Physical disk I/O 30 * disk_ra Physical disk I/O, read ahead 31 * 32 * The events are drawn with a level of indentation, which can sometimes 33 * help identify related events. 34 * 35 * SEE ALSO: fspaging.d 36 * 37 * IDEA: Richard McDougall, Solaris Internals 2nd Ed, FS Chapter. 38 * 39 * COPYRIGHT: Copyright (c) 2006 Brendan Gregg. 40 * 41 * CDDL HEADER START 42 * 43 * The contents of this file are subject to the terms of the 44 * Common Development and Distribution License, Version 1.0 only 45 * (the "License"). You may not use this file except in compliance 46 * with the License. 47 * 48 * You can obtain a copy of the license at Docs/cddl1.txt 49 * or http://www.opensolaris.org/os/licensing. 50 * See the License for the specific language governing permissions 51 * and limitations under the License. 52 * 53 * CDDL HEADER END 54 * 55 * ToDo: readv() 56 * 57 * 20-Mar-2006 Brendan Gregg Created this. 58 * 23-Apr-2006 " " Last update. 59 */ 60 61#pragma D option quiet 62#pragma D option switchrate=10hz 63 64dtrace:::BEGIN 65{ 66 printf("%-12s %10s %2s %8s %6s %s\n", 67 "Event", "Device", "RW", "Size", "Offset", "Path"); 68} 69 70syscall::*read:entry, 71syscall::*write*:entry 72{ 73 /* 74 * starting with a file descriptior, dig out useful info 75 * from the corresponding file_t and vnode_t. 76 */ 77 this->filistp = curthread->t_procp->p_user.u_finfo.fi_list; 78 this->ufentryp = (uf_entry_t *)((uint64_t)this->filistp + 79 (uint64_t)arg0 * (uint64_t)sizeof (uf_entry_t)); 80 this->filep = this->ufentryp->uf_file; 81 self->offset = this->filep->f_offset; 82 this->vnodep = this->filep != 0 ? this->filep->f_vnode : 0; 83 self->vpath = this->vnodep ? (this->vnodep->v_path != 0 ? 84 cleanpath(this->vnodep->v_path) : "<unknown>") : "<unknown>"; 85 86 /* only trace activity to regular files and directories, as */ 87 self->sc_trace = this->vnodep ? this->vnodep->v_type == VREG || 88 this->vnodep->v_type == VDIR ? 1 : 0 : 0; 89} 90 91syscall::*read:entry 92/self->sc_trace/ 93{ 94 printf("sc-%-9s %10s %2s %8d %6d %s\n", probefunc, ".", "R", 95 (int)arg2, self->offset / 1024, self->vpath); 96} 97 98syscall::*write*:entry 99/self->sc_trace/ 100{ 101 printf("sc-%-9s %10s %2s %8d %6d %s\n", probefunc, ".", "W", 102 (int)arg2, self->offset / 1024, self->vpath); 103} 104 105syscall::*read:return, 106syscall::*write*:return 107{ 108 self->vpath = 0; 109 self->offset = 0; 110 self->sc_trace = 0; 111} 112 113fbt::fop_read:entry, 114fbt::fop_write:entry 115/self->sc_trace && args[0]->v_path/ 116{ 117 printf(" %-10s %10s %2s %8d %6d %s\n", probefunc, ".", 118 probefunc == "fop_read" ? "R" : "W", args[1]->uio_resid, 119 args[1]->_uio_offset._f / 1024, cleanpath(args[0]->v_path)); 120} 121 122fbt:ufs:ufs_getpage_ra:entry 123{ 124 /* fetch the real offset (file_t is unaware of this) */ 125 self->ra_offset = ((inode_t *)args[0]->v_data)->i_nextrio; 126 self->read_ahead = 1; 127} 128 129fbt:ufs:ufs_getpage_ra:return 130{ 131 self->read_ahead = 0; 132 self->ra_offset = 0; 133} 134 135io::bdev_strategy:start 136{ 137 this->offset = self->read_ahead ? self->ra_offset : args[2]->fi_offset; 138 printf(" %-8s %10s %2s %8d %6d %s\n", 139 self->read_ahead ? "disk_ra" : "disk_io", args[1]->dev_statname, 140 args[0]->b_flags & B_READ ? "R" : "W", args[0]->b_bcount, 141 this->offset / 1024, args[2]->fi_pathname); 142 /* 143 * it would seem to make sense to only trace disk events during 144 * an fop event, easily coded with a self->fop_trace flag. However 145 * writes are asynchronous to the fop_write calls (they are flushed 146 * at some later time), and so this approach will miss tracing 147 * most of the disk writes. 148 */ 149} 150