geom_io.c revision 112367
1279264Sdelphij/*- 2238384Sjkim * Copyright (c) 2002 Poul-Henning Kamp 3238384Sjkim * Copyright (c) 2002 Networks Associates Technology, Inc. 4238384Sjkim * All rights reserved. 5238384Sjkim * 6238384Sjkim * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7238384Sjkim * and NAI Labs, the Security Research Division of Network Associates, Inc. 8296341Sdelphij * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9238384Sjkim * DARPA CHATS research program. 10238384Sjkim * 11238384Sjkim * Redistribution and use in source and binary forms, with or without 12238384Sjkim * modification, are permitted provided that the following conditions 13238384Sjkim * are met: 14238384Sjkim * 1. Redistributions of source code must retain the above copyright 15296341Sdelphij * notice, this list of conditions and the following disclaimer. 16238384Sjkim * 2. Redistributions in binary form must reproduce the above copyright 17238384Sjkim * notice, this list of conditions and the following disclaimer in the 18238384Sjkim * documentation and/or other materials provided with the distribution. 19238384Sjkim * 3. The names of the authors may not be used to endorse or promote 20238384Sjkim * products derived from this software without specific prior written 21238384Sjkim * permission. 22296341Sdelphij * 23238384Sjkim * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24238384Sjkim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25238384Sjkim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26238384Sjkim * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27238384Sjkim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28238384Sjkim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29238384Sjkim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30238384Sjkim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31238384Sjkim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32238384Sjkim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33238384Sjkim * SUCH DAMAGE. 34238384Sjkim * 35238384Sjkim * $FreeBSD: head/sys/geom/geom_io.c 112367 2003-03-18 08:45:25Z phk $ 36238384Sjkim */ 37296341Sdelphij 38238384Sjkim 39238384Sjkim#include <sys/param.h> 40296341Sdelphij#ifndef _KERNEL 41238384Sjkim#include <stdio.h> 42238384Sjkim#include <string.h> 43238384Sjkim#include <stdlib.h> 44238384Sjkim#include <signal.h> 45238384Sjkim#include <err.h> 46238384Sjkim#include <sched.h> 47238384Sjkim#else 48238384Sjkim#include <sys/systm.h> 49238384Sjkim#include <sys/kernel.h> 50238384Sjkim#include <sys/malloc.h> 51238384Sjkim#include <sys/bio.h> 52296341Sdelphij#endif 53238384Sjkim 54238384Sjkim#include <sys/errno.h> 55238384Sjkim#include <geom/geom.h> 56238384Sjkim#include <geom/geom_int.h> 57238384Sjkim#include <geom/geom_stats.h> 58238384Sjkim 59238384Sjkimstatic struct g_bioq g_bio_run_down; 60238384Sjkimstatic struct g_bioq g_bio_run_up; 61238384Sjkimstatic struct g_bioq g_bio_run_task; 62238384Sjkimstatic struct g_bioq g_bio_idle; 63238384Sjkim 64238384Sjkimstatic u_int pace; 65238384Sjkim 66296341Sdelphij#include <machine/atomic.h> 67238384Sjkim 68238384Sjkimstatic void 69238384Sjkimg_bioq_lock(struct g_bioq *bq) 70238384Sjkim{ 71238384Sjkim 72238384Sjkim mtx_lock(&bq->bio_queue_lock); 73238384Sjkim} 74238384Sjkim 75238384Sjkimstatic void 76238384Sjkimg_bioq_unlock(struct g_bioq *bq) 77238384Sjkim{ 78238384Sjkim 79238384Sjkim mtx_unlock(&bq->bio_queue_lock); 80238384Sjkim} 81238384Sjkim 82238384Sjkim#if 0 83238384Sjkimstatic void 84238384Sjkimg_bioq_destroy(struct g_bioq *bq) 85238384Sjkim{ 86238384Sjkim 87238384Sjkim mtx_destroy(&bq->bio_queue_lock); 88238384Sjkim} 89238384Sjkim#endif 90238384Sjkim 91238384Sjkimstatic void 92238384Sjkimg_bioq_init(struct g_bioq *bq) 93238384Sjkim{ 94238384Sjkim 95238384Sjkim TAILQ_INIT(&bq->bio_queue); 96238384Sjkim mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF); 97238384Sjkim} 98238384Sjkim 99238384Sjkimstatic struct bio * 100238384Sjkimg_bioq_first(struct g_bioq *bq) 101238384Sjkim{ 102238384Sjkim struct bio *bp; 103238384Sjkim 104238384Sjkim bp = TAILQ_FIRST(&bq->bio_queue); 105238384Sjkim if (bp != NULL) { 106238384Sjkim TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue); 107238384Sjkim bq->bio_queue_length--; 108238384Sjkim } 109238384Sjkim return (bp); 110238384Sjkim} 111238384Sjkim 112296341Sdelphijstatic void 113296341Sdelphijg_bioq_enqueue_tail(struct bio *bp, struct g_bioq *rq) 114296341Sdelphij{ 115296341Sdelphij 116238384Sjkim g_bioq_lock(rq); 117238384Sjkim TAILQ_INSERT_TAIL(&rq->bio_queue, bp, bio_queue); 118296341Sdelphij rq->bio_queue_length++; 119238384Sjkim g_bioq_unlock(rq); 120296341Sdelphij} 121279264Sdelphij 122238384Sjkimstruct bio * 123238384Sjkimg_new_bio(void) 124238384Sjkim{ 125238384Sjkim struct bio *bp; 126296341Sdelphij 127296341Sdelphij g_bioq_lock(&g_bio_idle); 128296341Sdelphij bp = g_bioq_first(&g_bio_idle); 129296341Sdelphij g_bioq_unlock(&g_bio_idle); 130296341Sdelphij if (bp == NULL) 131296341Sdelphij bp = g_malloc(sizeof *bp, M_NOWAIT | M_ZERO); 132238384Sjkim /* g_trace(G_T_BIO, "g_new_bio() = %p", bp); */ 133296341Sdelphij return (bp); 134273399Sdelphij} 135238384Sjkim 136238384Sjkimvoid 137238384Sjkimg_destroy_bio(struct bio *bp) 138238384Sjkim{ 139238384Sjkim 140238384Sjkim /* g_trace(G_T_BIO, "g_destroy_bio(%p)", bp); */ 141296341Sdelphij bzero(bp, sizeof *bp); 142273399Sdelphij g_bioq_enqueue_tail(bp, &g_bio_idle); 143238384Sjkim} 144238384Sjkim 145238384Sjkimstruct bio * 146238384Sjkimg_clone_bio(struct bio *bp) 147238384Sjkim{ 148 struct bio *bp2; 149 150 bp2 = g_new_bio(); 151 if (bp2 != NULL) { 152 bp2->bio_parent = bp; 153 bp2->bio_cmd = bp->bio_cmd; 154 bp2->bio_length = bp->bio_length; 155 bp2->bio_offset = bp->bio_offset; 156 bp2->bio_data = bp->bio_data; 157 bp2->bio_attribute = bp->bio_attribute; 158 bp->bio_children++; 159 } 160 /* g_trace(G_T_BIO, "g_clone_bio(%p) = %p", bp, bp2); */ 161 return(bp2); 162} 163 164void 165g_io_init() 166{ 167 168 g_bioq_init(&g_bio_run_down); 169 g_bioq_init(&g_bio_run_up); 170 g_bioq_init(&g_bio_run_task); 171 g_bioq_init(&g_bio_idle); 172} 173 174int 175g_io_setattr(const char *attr, struct g_consumer *cp, int len, void *ptr) 176{ 177 struct bio *bp; 178 int error; 179 180 g_trace(G_T_BIO, "bio_setattr(%s)", attr); 181 bp = g_new_bio(); 182 bp->bio_cmd = BIO_SETATTR; 183 bp->bio_done = NULL; 184 bp->bio_attribute = attr; 185 bp->bio_length = len; 186 bp->bio_data = ptr; 187 g_io_request(bp, cp); 188 error = biowait(bp, "gsetattr"); 189 g_destroy_bio(bp); 190 return (error); 191} 192 193 194int 195g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr) 196{ 197 struct bio *bp; 198 int error; 199 200 g_trace(G_T_BIO, "bio_getattr(%s)", attr); 201 bp = g_new_bio(); 202 bp->bio_cmd = BIO_GETATTR; 203 bp->bio_done = NULL; 204 bp->bio_attribute = attr; 205 bp->bio_length = *len; 206 bp->bio_data = ptr; 207 g_io_request(bp, cp); 208 error = biowait(bp, "ggetattr"); 209 *len = bp->bio_completed; 210 g_destroy_bio(bp); 211 return (error); 212} 213 214static int 215g_io_check(struct bio *bp) 216{ 217 struct g_consumer *cp; 218 struct g_provider *pp; 219 220 cp = bp->bio_from; 221 pp = bp->bio_to; 222 223 /* Fail if access counters dont allow the operation */ 224 switch(bp->bio_cmd) { 225 case BIO_READ: 226 case BIO_GETATTR: 227 if (cp->acr == 0) 228 return (EPERM); 229 break; 230 case BIO_WRITE: 231 case BIO_DELETE: 232 case BIO_SETATTR: 233 if (cp->acw == 0) 234 return (EPERM); 235 break; 236 default: 237 return (EPERM); 238 } 239 /* if provider is marked for error, don't disturb. */ 240 if (pp->error) 241 return (pp->error); 242 243 switch(bp->bio_cmd) { 244 case BIO_READ: 245 case BIO_WRITE: 246 case BIO_DELETE: 247 /* Reject I/O not on sector boundary */ 248 if (bp->bio_offset % pp->sectorsize) 249 return (EINVAL); 250 /* Reject I/O not integral sector long */ 251 if (bp->bio_length % pp->sectorsize) 252 return (EINVAL); 253 /* Reject requests past the end of media. */ 254 if (bp->bio_offset > pp->mediasize) 255 return (EIO); 256 break; 257 default: 258 break; 259 } 260 return (0); 261} 262 263void 264g_io_request(struct bio *bp, struct g_consumer *cp) 265{ 266 struct g_provider *pp; 267 struct bintime bt; 268 269 pp = cp->provider; 270 KASSERT(cp != NULL, ("NULL cp in g_io_request")); 271 KASSERT(bp != NULL, ("NULL bp in g_io_request")); 272 KASSERT(bp->bio_data != NULL, ("NULL bp->data in g_io_request")); 273 KASSERT(pp != NULL, ("consumer not attached in g_io_request")); 274 275 bp->bio_from = cp; 276 bp->bio_to = pp; 277 bp->bio_error = 0; 278 bp->bio_completed = 0; 279 280 if (g_collectstats) { 281 binuptime(&bt); 282 bp->bio_t0 = bt; 283 if (cp->nstart == cp->nend) 284 cp->stat->wentbusy = bt; /* Consumer is idle */ 285 if (pp->nstart == pp->nend) 286 pp->stat->wentbusy = bt; /* Provider is idle */ 287 cp->stat->nop++; 288 pp->stat->nop++; 289 } 290 cp->nstart++; 291 pp->nstart++; 292 293 /* Pass it on down. */ 294 g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d", 295 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd); 296 g_bioq_enqueue_tail(bp, &g_bio_run_down); 297 wakeup(&g_wait_down); 298} 299 300void 301g_io_deliver(struct bio *bp, int error) 302{ 303 struct g_consumer *cp; 304 struct g_provider *pp; 305 struct bintime t1, dt; 306 int idx; 307 308 cp = bp->bio_from; 309 pp = bp->bio_to; 310 KASSERT(bp != NULL, ("NULL bp in g_io_deliver")); 311 KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver")); 312 KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver")); 313 KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver")); 314 315 g_trace(G_T_BIO, 316"g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd", 317 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error, 318 (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length); 319 320 if (g_collectstats) { 321 switch (bp->bio_cmd) { 322 case BIO_READ: idx = G_STAT_IDX_READ; break; 323 case BIO_WRITE: idx = G_STAT_IDX_WRITE; break; 324 case BIO_DELETE: idx = G_STAT_IDX_DELETE; break; 325 case BIO_GETATTR: idx = -1; break; 326 case BIO_SETATTR: idx = -1; break; 327 default: 328 panic("unknown bio_cmd in g_io_deliver"); 329 break; 330 } 331 binuptime(&t1); 332 /* Raise the "inconsistent" flag for userland */ 333 atomic_add_acq_int(&cp->stat->seq0, 1); 334 atomic_add_acq_int(&pp->stat->seq0, 1); 335 if (idx >= 0) { 336 /* Account the service time */ 337 dt = t1; 338 bintime_sub(&dt, &bp->bio_t0); 339 bintime_add(&cp->stat->ops[idx].dt, &dt); 340 bintime_add(&pp->stat->ops[idx].dt, &dt); 341 /* ... and the metrics */ 342 pp->stat->ops[idx].nbyte += bp->bio_completed; 343 cp->stat->ops[idx].nbyte += bp->bio_completed; 344 pp->stat->ops[idx].nop++; 345 cp->stat->ops[idx].nop++; 346 /* ... and any errors */ 347 if (error == ENOMEM) { 348 cp->stat->ops[idx].nmem++; 349 pp->stat->ops[idx].nmem++; 350 } else if (error != 0) { 351 cp->stat->ops[idx].nerr++; 352 pp->stat->ops[idx].nerr++; 353 } 354 } 355 /* Account for busy time on the consumer */ 356 dt = t1; 357 bintime_sub(&dt, &cp->stat->wentbusy); 358 bintime_add(&cp->stat->bt, &dt); 359 cp->stat->wentbusy = t1; 360 /* Account for busy time on the provider */ 361 dt = t1; 362 bintime_sub(&dt, &pp->stat->wentbusy); 363 bintime_add(&pp->stat->bt, &dt); 364 pp->stat->wentbusy = t1; 365 /* Mark the structures as consistent again */ 366 atomic_add_acq_int(&cp->stat->seq1, 1); 367 atomic_add_acq_int(&pp->stat->seq1, 1); 368 cp->stat->nend++; 369 pp->stat->nend++; 370 } 371 cp->nend++; 372 pp->nend++; 373 374 if (error == ENOMEM) { 375 printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name); 376 g_io_request(bp, cp); 377 pace++; 378 return; 379 } 380 bp->bio_error = error; 381 g_bioq_enqueue_tail(bp, &g_bio_run_up); 382 wakeup(&g_wait_up); 383} 384 385void 386g_io_schedule_down(struct thread *tp __unused) 387{ 388 struct bio *bp; 389 off_t excess; 390 int error; 391 struct mtx mymutex; 392 393 bzero(&mymutex, sizeof mymutex); 394 mtx_init(&mymutex, "g_xdown", MTX_DEF, 0); 395 396 for(;;) { 397 g_bioq_lock(&g_bio_run_down); 398 bp = g_bioq_first(&g_bio_run_down); 399 if (bp == NULL) { 400 msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock, 401 PRIBIO | PDROP, "g_down", hz/10); 402 continue; 403 } 404 g_bioq_unlock(&g_bio_run_down); 405 error = g_io_check(bp); 406 if (error) { 407 g_io_deliver(bp, error); 408 continue; 409 } 410 switch (bp->bio_cmd) { 411 case BIO_READ: 412 case BIO_WRITE: 413 case BIO_DELETE: 414 /* Truncate requests to the end of providers media. */ 415 excess = bp->bio_offset + bp->bio_length; 416 if (excess > bp->bio_to->mediasize) { 417 excess -= bp->bio_to->mediasize; 418 bp->bio_length -= excess; 419 } 420 /* Deliver zero length transfers right here. */ 421 if (bp->bio_length == 0) { 422 g_io_deliver(bp, 0); 423 continue; 424 } 425 break; 426 default: 427 break; 428 } 429 mtx_lock(&mymutex); 430 bp->bio_to->geom->start(bp); 431 mtx_unlock(&mymutex); 432 if (pace) { 433 pace--; 434 break; 435 } 436 } 437} 438 439void 440bio_taskqueue(struct bio *bp, bio_task_t *func, void *arg) 441{ 442 bp->bio_task = func; 443 bp->bio_task_arg = arg; 444 /* 445 * The taskqueue is actually just a second queue off the "up" 446 * queue, so we use the same lock. 447 */ 448 g_bioq_lock(&g_bio_run_up); 449 TAILQ_INSERT_TAIL(&g_bio_run_task.bio_queue, bp, bio_queue); 450 g_bio_run_task.bio_queue_length++; 451 wakeup(&g_wait_up); 452 g_bioq_unlock(&g_bio_run_up); 453} 454 455 456void 457g_io_schedule_up(struct thread *tp __unused) 458{ 459 struct bio *bp; 460 struct mtx mymutex; 461 462 bzero(&mymutex, sizeof mymutex); 463 mtx_init(&mymutex, "g_xup", MTX_DEF, 0); 464 for(;;) { 465 g_bioq_lock(&g_bio_run_up); 466 bp = g_bioq_first(&g_bio_run_task); 467 if (bp != NULL) { 468 g_bioq_unlock(&g_bio_run_up); 469 mtx_lock(&mymutex); 470 bp->bio_task(bp, bp->bio_task_arg); 471 mtx_unlock(&mymutex); 472 continue; 473 } 474 bp = g_bioq_first(&g_bio_run_up); 475 if (bp != NULL) { 476 g_bioq_unlock(&g_bio_run_up); 477 mtx_lock(&mymutex); 478 biodone(bp); 479 mtx_unlock(&mymutex); 480 continue; 481 } 482 msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock, 483 PRIBIO | PDROP, "g_up", hz/10); 484 } 485} 486 487void * 488g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error) 489{ 490 struct bio *bp; 491 void *ptr; 492 int errorc; 493 494 bp = g_new_bio(); 495 bp->bio_cmd = BIO_READ; 496 bp->bio_done = NULL; 497 bp->bio_offset = offset; 498 bp->bio_length = length; 499 ptr = g_malloc(length, M_WAITOK); 500 bp->bio_data = ptr; 501 g_io_request(bp, cp); 502 errorc = biowait(bp, "gread"); 503 if (error != NULL) 504 *error = errorc; 505 g_destroy_bio(bp); 506 if (errorc) { 507 g_free(ptr); 508 ptr = NULL; 509 } 510 return (ptr); 511} 512 513int 514g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length) 515{ 516 struct bio *bp; 517 int error; 518 519 bp = g_new_bio(); 520 bp->bio_cmd = BIO_WRITE; 521 bp->bio_done = NULL; 522 bp->bio_offset = offset; 523 bp->bio_length = length; 524 bp->bio_data = ptr; 525 g_io_request(bp, cp); 526 error = biowait(bp, "gwrite"); 527 g_destroy_bio(bp); 528 return (error); 529} 530