geom_io.c revision 111119
1/*- 2 * Copyright (c) 2002 Poul-Henning Kamp 3 * Copyright (c) 2002 Networks Associates Technology, Inc. 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7 * and NAI Labs, the Security Research Division of Network Associates, Inc. 8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9 * DARPA CHATS research program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. The names of the authors may not be used to endorse or promote 20 * products derived from this software without specific prior written 21 * permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * $FreeBSD: head/sys/geom/geom_io.c 111119 2003-02-19 05:47:46Z imp $ 36 */ 37 38 39#include <sys/param.h> 40#include <sys/stdint.h> 41#ifndef _KERNEL 42#include <stdio.h> 43#include <string.h> 44#include <stdlib.h> 45#include <signal.h> 46#include <err.h> 47#include <sched.h> 48#else 49#include <sys/systm.h> 50#include <sys/kernel.h> 51#include <sys/malloc.h> 52#include <sys/bio.h> 53#endif 54 55#include <sys/errno.h> 56#include <geom/geom.h> 57#include <geom/geom_int.h> 58#include <geom/geom_stats.h> 59 60static struct g_bioq g_bio_run_down; 61static struct g_bioq g_bio_run_up; 62static struct g_bioq g_bio_run_task; 63static struct g_bioq g_bio_idle; 64 65static u_int pace; 66 67#include <machine/atomic.h> 68 69static void 70g_bioq_lock(struct g_bioq *bq) 71{ 72 73 mtx_lock(&bq->bio_queue_lock); 74} 75 76static void 77g_bioq_unlock(struct g_bioq *bq) 78{ 79 80 mtx_unlock(&bq->bio_queue_lock); 81} 82 83#if 0 84static void 85g_bioq_destroy(struct g_bioq *bq) 86{ 87 88 mtx_destroy(&bq->bio_queue_lock); 89} 90#endif 91 92static void 93g_bioq_init(struct g_bioq *bq) 94{ 95 96 TAILQ_INIT(&bq->bio_queue); 97 mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF); 98} 99 100static struct bio * 101g_bioq_first(struct g_bioq *bq) 102{ 103 struct bio *bp; 104 105 bp = TAILQ_FIRST(&bq->bio_queue); 106 if (bp != NULL) { 107 TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue); 108 bq->bio_queue_length--; 109 } 110 return (bp); 111} 112 113static void 114g_bioq_enqueue_tail(struct bio *bp, struct g_bioq *rq) 115{ 116 117 g_bioq_lock(rq); 118 TAILQ_INSERT_TAIL(&rq->bio_queue, bp, bio_queue); 119 rq->bio_queue_length++; 120 g_bioq_unlock(rq); 121} 122 123struct bio * 124g_new_bio(void) 125{ 126 struct bio *bp; 127 128 g_bioq_lock(&g_bio_idle); 129 bp = g_bioq_first(&g_bio_idle); 130 g_bioq_unlock(&g_bio_idle); 131 if (bp == NULL) 132 bp = g_malloc(sizeof *bp, M_NOWAIT | M_ZERO); 133 /* g_trace(G_T_BIO, "g_new_bio() = %p", bp); */ 134 return (bp); 135} 136 137void 138g_destroy_bio(struct bio *bp) 139{ 140 141 /* g_trace(G_T_BIO, "g_destroy_bio(%p)", bp); */ 142 bzero(bp, sizeof *bp); 143 g_bioq_enqueue_tail(bp, &g_bio_idle); 144} 145 146struct bio * 147g_clone_bio(struct bio *bp) 148{ 149 struct bio *bp2; 150 151 bp2 = g_new_bio(); 152 if (bp2 != NULL) { 153 bp2->bio_parent = bp; 154 bp2->bio_cmd = bp->bio_cmd; 155 bp2->bio_length = bp->bio_length; 156 bp2->bio_offset = bp->bio_offset; 157 bp2->bio_data = bp->bio_data; 158 bp2->bio_attribute = bp->bio_attribute; 159 bp->bio_children++; 160 } 161 /* g_trace(G_T_BIO, "g_clone_bio(%p) = %p", bp, bp2); */ 162 return(bp2); 163} 164 165void 166g_io_init() 167{ 168 169 g_bioq_init(&g_bio_run_down); 170 g_bioq_init(&g_bio_run_up); 171 g_bioq_init(&g_bio_run_task); 172 g_bioq_init(&g_bio_idle); 173} 174 175int 176g_io_setattr(const char *attr, struct g_consumer *cp, int len, void *ptr) 177{ 178 struct bio *bp; 179 int error; 180 181 g_trace(G_T_BIO, "bio_setattr(%s)", attr); 182 bp = g_new_bio(); 183 bp->bio_cmd = BIO_SETATTR; 184 bp->bio_done = NULL; 185 bp->bio_attribute = attr; 186 bp->bio_length = len; 187 bp->bio_data = ptr; 188 g_io_request(bp, cp); 189 error = biowait(bp, "gsetattr"); 190 g_destroy_bio(bp); 191 return (error); 192} 193 194 195int 196g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr) 197{ 198 struct bio *bp; 199 int error; 200 201 g_trace(G_T_BIO, "bio_getattr(%s)", attr); 202 bp = g_new_bio(); 203 bp->bio_cmd = BIO_GETATTR; 204 bp->bio_done = NULL; 205 bp->bio_attribute = attr; 206 bp->bio_length = *len; 207 bp->bio_data = ptr; 208 g_io_request(bp, cp); 209 error = biowait(bp, "ggetattr"); 210 *len = bp->bio_completed; 211 g_destroy_bio(bp); 212 return (error); 213} 214 215static int 216g_io_check(struct bio *bp) 217{ 218 struct g_consumer *cp; 219 struct g_provider *pp; 220 221 cp = bp->bio_from; 222 pp = bp->bio_to; 223 224 /* Fail if access counters dont allow the operation */ 225 switch(bp->bio_cmd) { 226 case BIO_READ: 227 case BIO_GETATTR: 228 if (cp->acr == 0) 229 return (EPERM); 230 break; 231 case BIO_WRITE: 232 case BIO_DELETE: 233 case BIO_SETATTR: 234 if (cp->acw == 0) 235 return (EPERM); 236 break; 237 default: 238 return (EPERM); 239 } 240 /* if provider is marked for error, don't disturb. */ 241 if (pp->error) 242 return (pp->error); 243 244 switch(bp->bio_cmd) { 245 case BIO_READ: 246 case BIO_WRITE: 247 case BIO_DELETE: 248 /* Reject I/O not on sector boundary */ 249 if (bp->bio_offset % pp->sectorsize) 250 return (EINVAL); 251 /* Reject I/O not integral sector long */ 252 if (bp->bio_length % pp->sectorsize) 253 return (EINVAL); 254 /* Reject requests past the end of media. */ 255 if (bp->bio_offset > pp->mediasize) 256 return (EIO); 257 break; 258 default: 259 break; 260 } 261 return (0); 262} 263 264void 265g_io_request(struct bio *bp, struct g_consumer *cp) 266{ 267 struct g_provider *pp; 268 struct bintime bt; 269 270 pp = cp->provider; 271 KASSERT(cp != NULL, ("NULL cp in g_io_request")); 272 KASSERT(bp != NULL, ("NULL bp in g_io_request")); 273 KASSERT(bp->bio_data != NULL, ("NULL bp->data in g_io_request")); 274 KASSERT(pp != NULL, ("consumer not attached in g_io_request")); 275 276 bp->bio_from = cp; 277 bp->bio_to = pp; 278 bp->bio_error = 0; 279 bp->bio_completed = 0; 280 281 if (g_collectstats) { 282 binuptime(&bt); 283 bp->bio_t0 = bt; 284 if (cp->stat->nop == cp->stat->nend) 285 cp->stat->wentbusy = bt; /* Consumer is idle */ 286 if (pp->stat->nop == pp->stat->nend) 287 pp->stat->wentbusy = bt; /* Provider is idle */ 288 } 289 cp->stat->nop++; 290 pp->stat->nop++; 291 292 /* Pass it on down. */ 293 g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d", 294 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd); 295 g_bioq_enqueue_tail(bp, &g_bio_run_down); 296 wakeup(&g_wait_down); 297} 298 299void 300g_io_deliver(struct bio *bp, int error) 301{ 302 struct g_consumer *cp; 303 struct g_provider *pp; 304 struct bintime t1, dt; 305 int idx; 306 307 cp = bp->bio_from; 308 pp = bp->bio_to; 309 KASSERT(bp != NULL, ("NULL bp in g_io_deliver")); 310 KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver")); 311 KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver")); 312 KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver")); 313 314 g_trace(G_T_BIO, 315"g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd", 316 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error, 317 (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length); 318 319 if (g_collectstats) { 320 switch (bp->bio_cmd) { 321 case BIO_READ: idx = G_STAT_IDX_READ; break; 322 case BIO_WRITE: idx = G_STAT_IDX_WRITE; break; 323 case BIO_DELETE: idx = G_STAT_IDX_DELETE; break; 324 case BIO_GETATTR: idx = -1; break; 325 case BIO_SETATTR: idx = -1; break; 326 default: 327 panic("unknown bio_cmd in g_io_deliver"); 328 break; 329 } 330 binuptime(&t1); 331 /* Raise the "inconsistent" flag for userland */ 332 atomic_add_acq_int(&cp->stat->seq0, 1); 333 atomic_add_acq_int(&pp->stat->seq0, 1); 334 if (idx >= 0) { 335 /* Account the service time */ 336 dt = t1; 337 bintime_sub(&dt, &bp->bio_t0); 338 bintime_add(&cp->stat->ops[idx].dt, &dt); 339 bintime_add(&pp->stat->ops[idx].dt, &dt); 340 /* ... and the metrics */ 341 pp->stat->ops[idx].nbyte += bp->bio_completed; 342 cp->stat->ops[idx].nbyte += bp->bio_completed; 343 pp->stat->ops[idx].nop++; 344 cp->stat->ops[idx].nop++; 345 /* ... and any errors */ 346 if (error == ENOMEM) { 347 cp->stat->ops[idx].nmem++; 348 pp->stat->ops[idx].nmem++; 349 } else if (error != 0) { 350 cp->stat->ops[idx].nerr++; 351 pp->stat->ops[idx].nerr++; 352 } 353 } 354 /* Account for busy time on the consumer */ 355 dt = t1; 356 bintime_sub(&dt, &cp->stat->wentbusy); 357 bintime_add(&cp->stat->bt, &dt); 358 cp->stat->wentbusy = t1; 359 /* Account for busy time on the provider */ 360 dt = t1; 361 bintime_sub(&dt, &pp->stat->wentbusy); 362 bintime_add(&pp->stat->bt, &dt); 363 pp->stat->wentbusy = t1; 364 /* Mark the structures as consistent again */ 365 atomic_add_acq_int(&cp->stat->seq1, 1); 366 atomic_add_acq_int(&pp->stat->seq1, 1); 367 } 368 cp->stat->nend++; 369 pp->stat->nend++; 370 371 if (error == ENOMEM) { 372 printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name); 373 g_io_request(bp, cp); 374 pace++; 375 return; 376 } 377 bp->bio_error = error; 378 g_bioq_enqueue_tail(bp, &g_bio_run_up); 379 wakeup(&g_wait_up); 380} 381 382void 383g_io_schedule_down(struct thread *tp __unused) 384{ 385 struct bio *bp; 386 off_t excess; 387 int error; 388 struct mtx mymutex; 389 390 bzero(&mymutex, sizeof mymutex); 391 mtx_init(&mymutex, "g_xdown", MTX_DEF, 0); 392 393 for(;;) { 394 g_bioq_lock(&g_bio_run_down); 395 bp = g_bioq_first(&g_bio_run_down); 396 if (bp == NULL) { 397 msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock, 398 PRIBIO | PDROP, "g_down", hz/10); 399 continue; 400 } 401 g_bioq_unlock(&g_bio_run_down); 402 error = g_io_check(bp); 403 if (error) { 404 g_io_deliver(bp, error); 405 continue; 406 } 407 switch (bp->bio_cmd) { 408 case BIO_READ: 409 case BIO_WRITE: 410 case BIO_DELETE: 411 /* Truncate requests to the end of providers media. */ 412 excess = bp->bio_offset + bp->bio_length; 413 if (excess > bp->bio_to->mediasize) { 414 excess -= bp->bio_to->mediasize; 415 bp->bio_length -= excess; 416 } 417 /* Deliver zero length transfers right here. */ 418 if (bp->bio_length == 0) { 419 g_io_deliver(bp, 0); 420 continue; 421 } 422 break; 423 default: 424 break; 425 } 426 mtx_lock(&mymutex); 427 bp->bio_to->geom->start(bp); 428 mtx_unlock(&mymutex); 429 if (pace) { 430 pace--; 431 break; 432 } 433 } 434} 435 436void 437bio_taskqueue(struct bio *bp, bio_task_t *func, void *arg) 438{ 439 bp->bio_task = func; 440 bp->bio_task_arg = arg; 441 /* 442 * The taskqueue is actually just a second queue off the "up" 443 * queue, so we use the same lock. 444 */ 445 g_bioq_lock(&g_bio_run_up); 446 TAILQ_INSERT_TAIL(&g_bio_run_task.bio_queue, bp, bio_queue); 447 g_bio_run_task.bio_queue_length++; 448 wakeup(&g_wait_up); 449 g_bioq_unlock(&g_bio_run_up); 450} 451 452 453void 454g_io_schedule_up(struct thread *tp __unused) 455{ 456 struct bio *bp; 457 struct mtx mymutex; 458 459 bzero(&mymutex, sizeof mymutex); 460 mtx_init(&mymutex, "g_xup", MTX_DEF, 0); 461 for(;;) { 462 g_bioq_lock(&g_bio_run_up); 463 bp = g_bioq_first(&g_bio_run_task); 464 if (bp != NULL) { 465 g_bioq_unlock(&g_bio_run_up); 466 mtx_lock(&mymutex); 467 bp->bio_task(bp, bp->bio_task_arg); 468 mtx_unlock(&mymutex); 469 continue; 470 } 471 bp = g_bioq_first(&g_bio_run_up); 472 if (bp != NULL) { 473 g_bioq_unlock(&g_bio_run_up); 474 mtx_lock(&mymutex); 475 biodone(bp); 476 mtx_unlock(&mymutex); 477 continue; 478 } 479 msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock, 480 PRIBIO | PDROP, "g_up", hz/10); 481 } 482} 483 484void * 485g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error) 486{ 487 struct bio *bp; 488 void *ptr; 489 int errorc; 490 491 bp = g_new_bio(); 492 bp->bio_cmd = BIO_READ; 493 bp->bio_done = NULL; 494 bp->bio_offset = offset; 495 bp->bio_length = length; 496 ptr = g_malloc(length, M_WAITOK); 497 bp->bio_data = ptr; 498 g_io_request(bp, cp); 499 errorc = biowait(bp, "gread"); 500 if (error != NULL) 501 *error = errorc; 502 g_destroy_bio(bp); 503 if (errorc) { 504 g_free(ptr); 505 ptr = NULL; 506 } 507 return (ptr); 508} 509 510int 511g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length) 512{ 513 struct bio *bp; 514 int error; 515 516 bp = g_new_bio(); 517 bp->bio_cmd = BIO_WRITE; 518 bp->bio_done = NULL; 519 bp->bio_offset = offset; 520 bp->bio_length = length; 521 bp->bio_data = ptr; 522 g_io_request(bp, cp); 523 error = biowait(bp, "gwrite"); 524 g_destroy_bio(bp); 525 return (error); 526} 527