1235368Sgnn#!/usr/sbin/dtrace -s 2235368Sgnn/* 3235368Sgnn * fsrw.d - file system read/write event tracing. 4235368Sgnn * Written using DTrace (Solaris 10 3/05) 5235368Sgnn * 6235368Sgnn * This traces file related activity: system call reads and writes, 7235368Sgnn * vnode logical read and writes (fop), and disk I/O. It can be used 8235368Sgnn * to examine the behaviour of each I/O layer, from the syscall 9235368Sgnn * interface to what the disk is doing. Behaviour such as read-ahead, and 10235368Sgnn * max I/O size breakup can be observed. 11235368Sgnn * 12235368Sgnn * $Id: fsrw.d 3 2007-08-01 10:50:08Z brendan $ 13235368Sgnn * 14235368Sgnn * USAGE: fsrw.d 15235368Sgnn * 16235368Sgnn * FIELDS: 17235368Sgnn * Event Traced event (see EVENTS below) 18235368Sgnn * Device Device, for disk I/O 19235368Sgnn * RW Either Read or Write 20235368Sgnn * Size Size of I/O in bytes 21235368Sgnn * Offset Offset of I/O in kilobytes 22235368Sgnn * Path Path to file on disk 23235368Sgnn * 24235368Sgnn * EVENTS: 25235368Sgnn * sc-read System call read 26235368Sgnn * sc-write System call write 27235368Sgnn * fop_read Logical read 28235368Sgnn * fop_write Logical write 29235368Sgnn * disk_io Physical disk I/O 30235368Sgnn * disk_ra Physical disk I/O, read ahead 31235368Sgnn * 32235368Sgnn * The events are drawn with a level of indentation, which can sometimes 33235368Sgnn * help identify related events. 34235368Sgnn * 35235368Sgnn * SEE ALSO: fspaging.d 36235368Sgnn * 37235368Sgnn * IDEA: Richard McDougall, Solaris Internals 2nd Ed, FS Chapter. 38235368Sgnn * 39235368Sgnn * COPYRIGHT: Copyright (c) 2006 Brendan Gregg. 40235368Sgnn * 41235368Sgnn * CDDL HEADER START 42235368Sgnn * 43235368Sgnn * The contents of this file are subject to the terms of the 44235368Sgnn * Common Development and Distribution License, Version 1.0 only 45235368Sgnn * (the "License"). You may not use this file except in compliance 46235368Sgnn * with the License. 47235368Sgnn * 48235368Sgnn * You can obtain a copy of the license at Docs/cddl1.txt 49235368Sgnn * or http://www.opensolaris.org/os/licensing. 50235368Sgnn * See the License for the specific language governing permissions 51235368Sgnn * and limitations under the License. 52235368Sgnn * 53235368Sgnn * CDDL HEADER END 54235368Sgnn * 55235368Sgnn * ToDo: readv() 56235368Sgnn * 57235368Sgnn * 20-Mar-2006 Brendan Gregg Created this. 58235368Sgnn * 23-Apr-2006 " " Last update. 59235368Sgnn */ 60235368Sgnn 61235368Sgnn#pragma D option quiet 62235368Sgnn#pragma D option switchrate=10hz 63235368Sgnn 64235368Sgnndtrace:::BEGIN 65235368Sgnn{ 66235368Sgnn printf("%-12s %10s %2s %8s %6s %s\n", 67235368Sgnn "Event", "Device", "RW", "Size", "Offset", "Path"); 68235368Sgnn} 69235368Sgnn 70235368Sgnnsyscall::*read:entry, 71235368Sgnnsyscall::*write*:entry 72235368Sgnn{ 73235368Sgnn /* 74235368Sgnn * starting with a file descriptior, dig out useful info 75235368Sgnn * from the corresponding file_t and vnode_t. 76235368Sgnn */ 77235368Sgnn this->filistp = curthread->t_procp->p_user.u_finfo.fi_list; 78235368Sgnn this->ufentryp = (uf_entry_t *)((uint64_t)this->filistp + 79235368Sgnn (uint64_t)arg0 * (uint64_t)sizeof (uf_entry_t)); 80235368Sgnn this->filep = this->ufentryp->uf_file; 81235368Sgnn self->offset = this->filep->f_offset; 82235368Sgnn this->vnodep = this->filep != 0 ? this->filep->f_vnode : 0; 83235368Sgnn self->vpath = this->vnodep ? (this->vnodep->v_path != 0 ? 84235368Sgnn cleanpath(this->vnodep->v_path) : "<unknown>") : "<unknown>"; 85235368Sgnn 86235368Sgnn /* only trace activity to regular files and directories, as */ 87235368Sgnn self->sc_trace = this->vnodep ? this->vnodep->v_type == VREG || 88235368Sgnn this->vnodep->v_type == VDIR ? 1 : 0 : 0; 89235368Sgnn} 90235368Sgnn 91235368Sgnnsyscall::*read:entry 92235368Sgnn/self->sc_trace/ 93235368Sgnn{ 94235368Sgnn printf("sc-%-9s %10s %2s %8d %6d %s\n", probefunc, ".", "R", 95235368Sgnn (int)arg2, self->offset / 1024, self->vpath); 96235368Sgnn} 97235368Sgnn 98235368Sgnnsyscall::*write*:entry 99235368Sgnn/self->sc_trace/ 100235368Sgnn{ 101235368Sgnn printf("sc-%-9s %10s %2s %8d %6d %s\n", probefunc, ".", "W", 102235368Sgnn (int)arg2, self->offset / 1024, self->vpath); 103235368Sgnn} 104235368Sgnn 105235368Sgnnsyscall::*read:return, 106235368Sgnnsyscall::*write*:return 107235368Sgnn{ 108235368Sgnn self->vpath = 0; 109235368Sgnn self->offset = 0; 110235368Sgnn self->sc_trace = 0; 111235368Sgnn} 112235368Sgnn 113235368Sgnnfbt::fop_read:entry, 114235368Sgnnfbt::fop_write:entry 115235368Sgnn/self->sc_trace && args[0]->v_path/ 116235368Sgnn{ 117235368Sgnn printf(" %-10s %10s %2s %8d %6d %s\n", probefunc, ".", 118235368Sgnn probefunc == "fop_read" ? "R" : "W", args[1]->uio_resid, 119235368Sgnn args[1]->_uio_offset._f / 1024, cleanpath(args[0]->v_path)); 120235368Sgnn} 121235368Sgnn 122235368Sgnnfbt:ufs:ufs_getpage_ra:entry 123235368Sgnn{ 124235368Sgnn /* fetch the real offset (file_t is unaware of this) */ 125235368Sgnn self->ra_offset = ((inode_t *)args[0]->v_data)->i_nextrio; 126235368Sgnn self->read_ahead = 1; 127235368Sgnn} 128235368Sgnn 129235368Sgnnfbt:ufs:ufs_getpage_ra:return 130235368Sgnn{ 131235368Sgnn self->read_ahead = 0; 132235368Sgnn self->ra_offset = 0; 133235368Sgnn} 134235368Sgnn 135235368Sgnnio::bdev_strategy:start 136235368Sgnn{ 137235368Sgnn this->offset = self->read_ahead ? self->ra_offset : args[2]->fi_offset; 138235368Sgnn printf(" %-8s %10s %2s %8d %6d %s\n", 139235368Sgnn self->read_ahead ? "disk_ra" : "disk_io", args[1]->dev_statname, 140235368Sgnn args[0]->b_flags & B_READ ? "R" : "W", args[0]->b_bcount, 141235368Sgnn this->offset / 1024, args[2]->fi_pathname); 142235368Sgnn /* 143235368Sgnn * it would seem to make sense to only trace disk events during 144235368Sgnn * an fop event, easily coded with a self->fop_trace flag. However 145235368Sgnn * writes are asynchronous to the fop_write calls (they are flushed 146235368Sgnn * at some later time), and so this approach will miss tracing 147235368Sgnn * most of the disk writes. 148235368Sgnn */ 149235368Sgnn} 150