1/* $NetBSD: subr_iostat.c,v 1.26 2024/05/04 13:33:18 mlelstv Exp $ */ 2/* NetBSD: subr_disk.c,v 1.69 2005/05/29 22:24:15 christos Exp */ 3 4/*- 5 * Copyright (c) 1996, 1997, 1999, 2000, 2009 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 10 * NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34/* 35 * Copyright (c) 1982, 1986, 1988, 1993 36 * The Regents of the University of California. All rights reserved. 37 * (c) UNIX System Laboratories, Inc. 38 * All or some portions of this file are derived from material licensed 39 * to the University of California by American Telephone and Telegraph 40 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 41 * the permission of UNIX System Laboratories, Inc. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice, this list of conditions and the following disclaimer. 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 3. Neither the name of the University nor the names of its contributors 52 * may be used to endorse or promote products derived from this software 53 * without specific prior written permission. 54 * 55 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 56 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 57 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 58 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 59 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 60 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 61 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 62 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 63 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 65 * SUCH DAMAGE. 66 * 67 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94 68 */ 69 70#include <sys/cdefs.h> 71__KERNEL_RCSID(0, "$NetBSD: subr_iostat.c,v 1.26 2024/05/04 13:33:18 mlelstv Exp $"); 72 73#include <sys/param.h> 74#include <sys/kernel.h> 75#include <sys/kmem.h> 76#include <sys/iostat.h> 77#include <sys/sysctl.h> 78#include <sys/rwlock.h> 79 80/* 81 * Function prototypes for sysctl nodes 82 */ 83static int sysctl_hw_disknames(SYSCTLFN_PROTO); 84static int sysctl_hw_iostatnames(SYSCTLFN_PROTO); 85static int sysctl_hw_iostats(SYSCTLFN_PROTO); 86 87static int 88iostati_getnames(int disk_only, char *oldp, size_t *oldlenp, const void *newp, 89 u_int namelen); 90 91/* 92 * A global list of all drives attached to the system. May grow or 93 * shrink over time. 94 */ 95struct iostatlist_head iostatlist = TAILQ_HEAD_INITIALIZER(iostatlist); 96int iostat_count; /* number of drives in global drivelist */ 97krwlock_t iostatlist_lock; 98 99static void sysctl_io_stats_setup(struct sysctllog **); 100 101/* 102 * Initialise the iostat subsystem. 103 */ 104void 105iostat_init(void) 106{ 107 108 rw_init(&iostatlist_lock); 109 sysctl_io_stats_setup(NULL); 110} 111 112/* 113 * Searches the iostatlist for the iostat corresponding to the 114 * name provided. 115 */ 116struct io_stats * 117iostat_find(const char *name) 118{ 119 struct io_stats *iostatp; 120 121 KASSERT(name != NULL); 122 123 rw_enter(&iostatlist_lock, RW_READER); 124 TAILQ_FOREACH(iostatp, &iostatlist, io_link) { 125 if (strcmp(iostatp->io_name, name) == 0) { 126 break; 127 } 128 } 129 rw_exit(&iostatlist_lock); 130 131 return iostatp; 132} 133 134/* 135 * Allocate and initialise memory for the i/o statistics. 136 */ 137struct io_stats * 138iostat_alloc(int32_t type, void *parent, const char *name) 139{ 140 struct io_stats *stats; 141 142 stats = kmem_zalloc(sizeof(*stats), KM_SLEEP); 143 stats->io_type = type; 144 stats->io_parent = parent; 145 (void)strlcpy(stats->io_name, name, sizeof(stats->io_name)); 146 147 /* 148 * Set the attached timestamp. 149 */ 150 getmicrouptime(&stats->io_attachtime); 151 152 /* 153 * Link into the drivelist. 154 */ 155 rw_enter(&iostatlist_lock, RW_WRITER); 156 TAILQ_INSERT_TAIL(&iostatlist, stats, io_link); 157 iostat_count++; 158 rw_exit(&iostatlist_lock); 159 160 return stats; 161} 162 163/* 164 * Remove i/o from stats collection. 165 */ 166void 167iostat_free(struct io_stats *stats) 168{ 169 170 /* 171 * Remove from the iostat list. 172 */ 173 if (iostat_count == 0) 174 panic("iostat_free: iostat_count == 0"); 175 rw_enter(&iostatlist_lock, RW_WRITER); 176 TAILQ_REMOVE(&iostatlist, stats, io_link); 177 iostat_count--; 178 rw_exit(&iostatlist_lock); 179 kmem_free(stats, sizeof(*stats)); 180} 181 182/* 183 * Rename i/o stats. 184 */ 185void 186iostat_rename(struct io_stats *stats, const char *name) 187{ 188 189 rw_enter(&iostatlist_lock, RW_WRITER); 190 (void)strlcpy(stats->io_name, name, sizeof(stats->io_name)); 191 rw_exit(&iostatlist_lock); 192} 193 194/* 195 * multiply timeval by unsigned integer and add to result 196 */ 197static void 198timermac(struct timeval *a, uint64_t count, struct timeval *res) 199{ 200 struct timeval part = *a; 201 202 while (count) { 203 if (count & 1) 204 timeradd(res, &part, res); 205 timeradd(&part, &part, &part); 206 count >>= 1; 207 } 208} 209 210/* 211 * Increment the iostat wait counter. 212 * Accumulate wait time and timesum. 213 * 214 * Wait time is spent in the device bufq. 215 */ 216void 217iostat_wait(struct io_stats *stats) 218{ 219 struct timeval dv_time, diff_time; 220 int32_t count; 221 222 KASSERT(stats->io_wait >= 0); 223 224 getmicrouptime(&dv_time); 225 226 timersub(&dv_time, &stats->io_waitstamp, &diff_time); 227 count = stats->io_wait++; 228 if (count != 0) { 229 timermac(&diff_time, count, &stats->io_waitsum); 230 timeradd(&stats->io_waittime, &diff_time, &stats->io_waittime); 231 } 232 stats->io_waitstamp = dv_time; 233} 234 235/* 236 * Decrement the iostat wait counter. 237 * Increment the iostat busy counter. 238 * Accumulate wait and busy times and timesums. 239 * 240 * Busy time is spent being processed by the device. 241 * 242 * Old devices do not yet measure wait time, so skip 243 * processing it if the counter is still zero. 244 */ 245void 246iostat_busy(struct io_stats *stats) 247{ 248 struct timeval dv_time, diff_time; 249 int32_t count; 250 251 KASSERT(stats->io_wait >= 0); /* > 0 when iostat_wait is used */ 252 KASSERT(stats->io_busy >= 0); 253 254 getmicrouptime(&dv_time); 255 256 timersub(&dv_time, &stats->io_waitstamp, &diff_time); 257 if (stats->io_wait != 0) { 258 count = stats->io_wait--; 259 timermac(&diff_time, count, &stats->io_waitsum); 260 timeradd(&stats->io_waittime, &diff_time, &stats->io_waittime); 261 } 262 stats->io_waitstamp = dv_time; 263 264 timersub(&dv_time, &stats->io_busystamp, &diff_time); 265 count = stats->io_busy++; 266 if (count != 0) { 267 timermac(&diff_time, count, &stats->io_busysum); 268 timeradd(&stats->io_busytime, &diff_time, &stats->io_busytime); 269 } 270 stats->io_busystamp = dv_time; 271} 272 273/* 274 * Decrement the iostat busy counter, increment the byte count. 275 * Accumulate busy time and timesum. 276 */ 277void 278iostat_unbusy(struct io_stats *stats, long bcount, int read) 279{ 280 struct timeval dv_time, diff_time; 281 int32_t count; 282 283 KASSERT(stats->io_busy > 0); 284 285 getmicrouptime(&dv_time); 286 stats->io_timestamp = dv_time; 287 288 /* any op */ 289 timersub(&dv_time, &stats->io_busystamp, &diff_time); 290 count = stats->io_busy--; 291 timermac(&diff_time, count, &stats->io_busysum); 292 timeradd(&stats->io_busytime, &diff_time, &stats->io_busytime); 293 stats->io_busystamp = dv_time; 294 295 if (bcount > 0) { 296 if (read) { 297 stats->io_rbytes += bcount; 298 stats->io_rxfer++; 299 } else { 300 stats->io_wbytes += bcount; 301 stats->io_wxfer++; 302 } 303 } 304} 305 306/* 307 * Return non-zero if a device has an I/O request in flight. 308 */ 309bool 310iostat_isbusy(struct io_stats *stats) 311{ 312 313 return stats->io_busy != 0; 314} 315 316/* 317 * Increment the seek counter. This does look almost redundant but it 318 * abstracts the stats gathering. 319 */ 320void 321iostat_seek(struct io_stats *stats) 322{ 323 324 stats->io_seek++; 325} 326 327static int 328sysctl_hw_disknames(SYSCTLFN_ARGS) 329{ 330 331 return iostati_getnames(1, oldp, oldlenp, newp, namelen); 332} 333 334static int 335sysctl_hw_iostatnames(SYSCTLFN_ARGS) 336{ 337 338 return iostati_getnames(0, oldp, oldlenp, newp, namelen); 339} 340 341static int 342iostati_getnames(int disk_only, char *oldp, size_t *oldlenp, const void *newp, 343 u_int namelen) 344{ 345 char bf[IOSTATNAMELEN + 1]; 346 char *where = oldp; 347 struct io_stats *stats; 348 size_t needed, left, slen; 349 int error, first; 350 351 if (newp != NULL) 352 return (EPERM); 353 if (namelen != 0) 354 return (EINVAL); 355 356 first = 1; 357 error = 0; 358 needed = 0; 359 left = *oldlenp; 360 361 rw_enter(&iostatlist_lock, RW_READER); 362 for (stats = TAILQ_FIRST(&iostatlist); stats != NULL; 363 stats = TAILQ_NEXT(stats, io_link)) { 364 if ((disk_only == 1) && (stats->io_type != IOSTAT_DISK)) 365 continue; 366 367 if (where == NULL) 368 needed += strlen(stats->io_name) + 1; 369 else { 370 memset(bf, 0, sizeof(bf)); 371 if (first) { 372 strncpy(bf, stats->io_name, sizeof(bf)); 373 /* account for trailing NUL byte */ 374 needed += 1; 375 first = 0; 376 } else { 377 bf[0] = ' '; 378 strncpy(bf + 1, stats->io_name, 379 sizeof(bf) - 1); 380 } 381 bf[IOSTATNAMELEN] = '\0'; 382 slen = strlen(bf); 383 if (left < slen + 1) 384 break; 385 /* +1 to copy out the trailing NUL byte */ 386 error = copyout(bf, where, slen + 1); 387 if (error) 388 break; 389 where += slen; 390 needed += slen; 391 left -= slen; 392 } 393 } 394 rw_exit(&iostatlist_lock); 395 *oldlenp = needed; 396 return (error); 397} 398 399static int 400sysctl_hw_iostats(SYSCTLFN_ARGS) 401{ 402 struct io_sysctl sdrive; 403 struct io_stats *stats; 404 char *where = oldp; 405 size_t tocopy, left; 406 int error; 407 408 if (newp != NULL) 409 return (EPERM); 410 411 /* 412 * The original hw.diskstats call was broken and did not require 413 * the userland to pass in its size of struct disk_sysctl. This 414 * was fixed after NetBSD 1.6 was released. 415 */ 416 if (namelen == 0) 417 tocopy = offsetof(struct io_sysctl, busy); 418 else 419 tocopy = name[0]; 420 421 if (where == NULL) { 422 *oldlenp = iostat_count * tocopy; 423 return (0); 424 } 425 426 error = 0; 427 left = *oldlenp; 428 memset(&sdrive, 0, sizeof(sdrive)); 429 *oldlenp = 0; 430 431 rw_enter(&iostatlist_lock, RW_READER); 432 TAILQ_FOREACH(stats, &iostatlist, io_link) { 433 if (left < tocopy) 434 break; 435 436 strncpy(sdrive.name, stats->io_name, sizeof(sdrive.name)); 437 sdrive.attachtime_sec = stats->io_attachtime.tv_sec; 438 sdrive.attachtime_usec = stats->io_attachtime.tv_usec; 439 sdrive.timestamp_sec = stats->io_busystamp.tv_sec; 440 sdrive.timestamp_usec = stats->io_busystamp.tv_usec; 441 442 sdrive.time_sec = stats->io_busytime.tv_sec; 443 sdrive.time_usec = stats->io_busytime.tv_usec; 444 445 sdrive.seek = stats->io_seek; 446 447 sdrive.rxfer = stats->io_rxfer; 448 sdrive.wxfer = stats->io_wxfer; 449 sdrive.xfer = stats->io_rxfer + stats->io_wxfer; 450 451 sdrive.rbytes = stats->io_rbytes; 452 sdrive.wbytes = stats->io_wbytes; 453 sdrive.bytes = stats->io_rbytes + stats->io_wbytes; 454 455 sdrive.wait_sec = stats->io_waittime.tv_sec; 456 sdrive.wait_usec = stats->io_waittime.tv_usec; 457 458 sdrive.time_sec = stats->io_busytime.tv_sec; 459 sdrive.time_usec = stats->io_busytime.tv_usec; 460 461 sdrive.waitsum_sec = stats->io_waitsum.tv_sec; 462 sdrive.waitsum_usec = stats->io_waitsum.tv_usec; 463 464 sdrive.busysum_sec = stats->io_busysum.tv_sec; 465 sdrive.busysum_usec = stats->io_busysum.tv_usec; 466 467 sdrive.busy = stats->io_busy; 468 469 error = copyout(&sdrive, where, uimin(tocopy, sizeof(sdrive))); 470 if (error) 471 break; 472 where += tocopy; 473 *oldlenp += tocopy; 474 left -= tocopy; 475 } 476 rw_exit(&iostatlist_lock); 477 return (error); 478} 479 480static void 481sysctl_io_stats_setup(struct sysctllog **clog) 482{ 483 484 sysctl_createv(clog, 0, NULL, NULL, 485 CTLFLAG_PERMANENT, 486 CTLTYPE_STRING, "disknames", 487 SYSCTL_DESCR("List of disk drives present"), 488 sysctl_hw_disknames, 0, NULL, 0, 489 CTL_HW, HW_DISKNAMES, CTL_EOL); 490 sysctl_createv(clog, 0, NULL, NULL, 491 CTLFLAG_PERMANENT, 492 CTLTYPE_STRING, "iostatnames", 493 SYSCTL_DESCR("I/O stats are being collected for these" 494 " devices"), 495 sysctl_hw_iostatnames, 0, NULL, 0, 496 CTL_HW, HW_IOSTATNAMES, CTL_EOL); 497 sysctl_createv(clog, 0, NULL, NULL, 498 CTLFLAG_PERMANENT, 499 CTLTYPE_STRUCT, "iostats", 500 SYSCTL_DESCR("Statistics on device I/O operations"), 501 sysctl_hw_iostats, 0, NULL, 0, 502 CTL_HW, HW_IOSTATS, CTL_EOL); 503} 504