geom_io.c revision 112027
1/*- 2 * Copyright (c) 2002 Poul-Henning Kamp 3 * Copyright (c) 2002 Networks Associates Technology, Inc. 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7 * and NAI Labs, the Security Research Division of Network Associates, Inc. 8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9 * DARPA CHATS research program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. The names of the authors may not be used to endorse or promote 20 * products derived from this software without specific prior written 21 * permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * $FreeBSD: head/sys/geom/geom_io.c 112027 2003-03-09 09:59:48Z phk $ 36 */ 37 38 39#include <sys/param.h> 40#include <sys/stdint.h> 41#ifndef _KERNEL 42#include <stdio.h> 43#include <string.h> 44#include <stdlib.h> 45#include <signal.h> 46#include <err.h> 47#include <sched.h> 48#else 49#include <sys/systm.h> 50#include <sys/kernel.h> 51#include <sys/malloc.h> 52#include <sys/bio.h> 53#endif 54 55#include <sys/errno.h> 56#include <geom/geom.h> 57#include <geom/geom_int.h> 58#include <geom/geom_stats.h> 59 60static struct g_bioq g_bio_run_down; 61static struct g_bioq g_bio_run_up; 62static struct g_bioq g_bio_run_task; 63static struct g_bioq g_bio_idle; 64 65static u_int pace; 66 67#include <machine/atomic.h> 68 69static void 70g_bioq_lock(struct g_bioq *bq) 71{ 72 73 mtx_lock(&bq->bio_queue_lock); 74} 75 76static void 77g_bioq_unlock(struct g_bioq *bq) 78{ 79 80 mtx_unlock(&bq->bio_queue_lock); 81} 82 83#if 0 84static void 85g_bioq_destroy(struct g_bioq *bq) 86{ 87 88 mtx_destroy(&bq->bio_queue_lock); 89} 90#endif 91 92static void 93g_bioq_init(struct g_bioq *bq) 94{ 95 96 TAILQ_INIT(&bq->bio_queue); 97 mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF); 98} 99 100static struct bio * 101g_bioq_first(struct g_bioq *bq) 102{ 103 struct bio *bp; 104 105 bp = TAILQ_FIRST(&bq->bio_queue); 106 if (bp != NULL) { 107 TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue); 108 bq->bio_queue_length--; 109 } 110 return (bp); 111} 112 113static void 114g_bioq_enqueue_tail(struct bio *bp, struct g_bioq *rq) 115{ 116 117 g_bioq_lock(rq); 118 TAILQ_INSERT_TAIL(&rq->bio_queue, bp, bio_queue); 119 rq->bio_queue_length++; 120 g_bioq_unlock(rq); 121} 122 123struct bio * 124g_new_bio(void) 125{ 126 struct bio *bp; 127 128 g_bioq_lock(&g_bio_idle); 129 bp = g_bioq_first(&g_bio_idle); 130 g_bioq_unlock(&g_bio_idle); 131 if (bp == NULL) 132 bp = g_malloc(sizeof *bp, M_NOWAIT | M_ZERO); 133 /* g_trace(G_T_BIO, "g_new_bio() = %p", bp); */ 134 return (bp); 135} 136 137void 138g_destroy_bio(struct bio *bp) 139{ 140 141 /* g_trace(G_T_BIO, "g_destroy_bio(%p)", bp); */ 142 bzero(bp, sizeof *bp); 143 g_bioq_enqueue_tail(bp, &g_bio_idle); 144} 145 146struct bio * 147g_clone_bio(struct bio *bp) 148{ 149 struct bio *bp2; 150 151 bp2 = g_new_bio(); 152 if (bp2 != NULL) { 153 bp2->bio_parent = bp; 154 bp2->bio_cmd = bp->bio_cmd; 155 bp2->bio_length = bp->bio_length; 156 bp2->bio_offset = bp->bio_offset; 157 bp2->bio_data = bp->bio_data; 158 bp2->bio_attribute = bp->bio_attribute; 159 bp->bio_children++; 160 } 161 /* g_trace(G_T_BIO, "g_clone_bio(%p) = %p", bp, bp2); */ 162 return(bp2); 163} 164 165void 166g_io_init() 167{ 168 169 g_bioq_init(&g_bio_run_down); 170 g_bioq_init(&g_bio_run_up); 171 g_bioq_init(&g_bio_run_task); 172 g_bioq_init(&g_bio_idle); 173} 174 175int 176g_io_setattr(const char *attr, struct g_consumer *cp, int len, void *ptr) 177{ 178 struct bio *bp; 179 int error; 180 181 g_trace(G_T_BIO, "bio_setattr(%s)", attr); 182 bp = g_new_bio(); 183 bp->bio_cmd = BIO_SETATTR; 184 bp->bio_done = NULL; 185 bp->bio_attribute = attr; 186 bp->bio_length = len; 187 bp->bio_data = ptr; 188 g_io_request(bp, cp); 189 error = biowait(bp, "gsetattr"); 190 g_destroy_bio(bp); 191 return (error); 192} 193 194 195int 196g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr) 197{ 198 struct bio *bp; 199 int error; 200 201 g_trace(G_T_BIO, "bio_getattr(%s)", attr); 202 bp = g_new_bio(); 203 bp->bio_cmd = BIO_GETATTR; 204 bp->bio_done = NULL; 205 bp->bio_attribute = attr; 206 bp->bio_length = *len; 207 bp->bio_data = ptr; 208 g_io_request(bp, cp); 209 error = biowait(bp, "ggetattr"); 210 *len = bp->bio_completed; 211 g_destroy_bio(bp); 212 return (error); 213} 214 215static int 216g_io_check(struct bio *bp) 217{ 218 struct g_consumer *cp; 219 struct g_provider *pp; 220 221 cp = bp->bio_from; 222 pp = bp->bio_to; 223 224 /* Fail if access counters dont allow the operation */ 225 switch(bp->bio_cmd) { 226 case BIO_READ: 227 case BIO_GETATTR: 228 if (cp->acr == 0) 229 return (EPERM); 230 break; 231 case BIO_WRITE: 232 case BIO_DELETE: 233 case BIO_SETATTR: 234 if (cp->acw == 0) 235 return (EPERM); 236 break; 237 default: 238 return (EPERM); 239 } 240 /* if provider is marked for error, don't disturb. */ 241 if (pp->error) 242 return (pp->error); 243 244 switch(bp->bio_cmd) { 245 case BIO_READ: 246 case BIO_WRITE: 247 case BIO_DELETE: 248 /* Reject I/O not on sector boundary */ 249 if (bp->bio_offset % pp->sectorsize) 250 return (EINVAL); 251 /* Reject I/O not integral sector long */ 252 if (bp->bio_length % pp->sectorsize) 253 return (EINVAL); 254 /* Reject requests past the end of media. */ 255 if (bp->bio_offset > pp->mediasize) 256 return (EIO); 257 break; 258 default: 259 break; 260 } 261 return (0); 262} 263 264void 265g_io_request(struct bio *bp, struct g_consumer *cp) 266{ 267 struct g_provider *pp; 268 struct bintime bt; 269 270 pp = cp->provider; 271 KASSERT(cp != NULL, ("NULL cp in g_io_request")); 272 KASSERT(bp != NULL, ("NULL bp in g_io_request")); 273 KASSERT(bp->bio_data != NULL, ("NULL bp->data in g_io_request")); 274 KASSERT(pp != NULL, ("consumer not attached in g_io_request")); 275 276 bp->bio_from = cp; 277 bp->bio_to = pp; 278 bp->bio_error = 0; 279 bp->bio_completed = 0; 280 281 if (g_collectstats) { 282 binuptime(&bt); 283 bp->bio_t0 = bt; 284 if (cp->nstart == cp->nend) 285 cp->stat->wentbusy = bt; /* Consumer is idle */ 286 if (pp->nstart == pp->nend) 287 pp->stat->wentbusy = bt; /* Provider is idle */ 288 cp->stat->nop++; 289 pp->stat->nop++; 290 } 291 cp->nstart++; 292 pp->nstart++; 293 294 /* Pass it on down. */ 295 g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d", 296 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd); 297 g_bioq_enqueue_tail(bp, &g_bio_run_down); 298 wakeup(&g_wait_down); 299} 300 301void 302g_io_deliver(struct bio *bp, int error) 303{ 304 struct g_consumer *cp; 305 struct g_provider *pp; 306 struct bintime t1, dt; 307 int idx; 308 309 cp = bp->bio_from; 310 pp = bp->bio_to; 311 KASSERT(bp != NULL, ("NULL bp in g_io_deliver")); 312 KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver")); 313 KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver")); 314 KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver")); 315 316 g_trace(G_T_BIO, 317"g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd", 318 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error, 319 (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length); 320 321 if (g_collectstats) { 322 switch (bp->bio_cmd) { 323 case BIO_READ: idx = G_STAT_IDX_READ; break; 324 case BIO_WRITE: idx = G_STAT_IDX_WRITE; break; 325 case BIO_DELETE: idx = G_STAT_IDX_DELETE; break; 326 case BIO_GETATTR: idx = -1; break; 327 case BIO_SETATTR: idx = -1; break; 328 default: 329 panic("unknown bio_cmd in g_io_deliver"); 330 break; 331 } 332 binuptime(&t1); 333 /* Raise the "inconsistent" flag for userland */ 334 atomic_add_acq_int(&cp->stat->seq0, 1); 335 atomic_add_acq_int(&pp->stat->seq0, 1); 336 if (idx >= 0) { 337 /* Account the service time */ 338 dt = t1; 339 bintime_sub(&dt, &bp->bio_t0); 340 bintime_add(&cp->stat->ops[idx].dt, &dt); 341 bintime_add(&pp->stat->ops[idx].dt, &dt); 342 /* ... and the metrics */ 343 pp->stat->ops[idx].nbyte += bp->bio_completed; 344 cp->stat->ops[idx].nbyte += bp->bio_completed; 345 pp->stat->ops[idx].nop++; 346 cp->stat->ops[idx].nop++; 347 /* ... and any errors */ 348 if (error == ENOMEM) { 349 cp->stat->ops[idx].nmem++; 350 pp->stat->ops[idx].nmem++; 351 } else if (error != 0) { 352 cp->stat->ops[idx].nerr++; 353 pp->stat->ops[idx].nerr++; 354 } 355 } 356 /* Account for busy time on the consumer */ 357 dt = t1; 358 bintime_sub(&dt, &cp->stat->wentbusy); 359 bintime_add(&cp->stat->bt, &dt); 360 cp->stat->wentbusy = t1; 361 /* Account for busy time on the provider */ 362 dt = t1; 363 bintime_sub(&dt, &pp->stat->wentbusy); 364 bintime_add(&pp->stat->bt, &dt); 365 pp->stat->wentbusy = t1; 366 /* Mark the structures as consistent again */ 367 atomic_add_acq_int(&cp->stat->seq1, 1); 368 atomic_add_acq_int(&pp->stat->seq1, 1); 369 cp->stat->nend++; 370 pp->stat->nend++; 371 } 372 cp->nend++; 373 pp->nend++; 374 375 if (error == ENOMEM) { 376 printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name); 377 g_io_request(bp, cp); 378 pace++; 379 return; 380 } 381 bp->bio_error = error; 382 g_bioq_enqueue_tail(bp, &g_bio_run_up); 383 wakeup(&g_wait_up); 384} 385 386void 387g_io_schedule_down(struct thread *tp __unused) 388{ 389 struct bio *bp; 390 off_t excess; 391 int error; 392 struct mtx mymutex; 393 394 bzero(&mymutex, sizeof mymutex); 395 mtx_init(&mymutex, "g_xdown", MTX_DEF, 0); 396 397 for(;;) { 398 g_bioq_lock(&g_bio_run_down); 399 bp = g_bioq_first(&g_bio_run_down); 400 if (bp == NULL) { 401 msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock, 402 PRIBIO | PDROP, "g_down", hz/10); 403 continue; 404 } 405 g_bioq_unlock(&g_bio_run_down); 406 error = g_io_check(bp); 407 if (error) { 408 g_io_deliver(bp, error); 409 continue; 410 } 411 switch (bp->bio_cmd) { 412 case BIO_READ: 413 case BIO_WRITE: 414 case BIO_DELETE: 415 /* Truncate requests to the end of providers media. */ 416 excess = bp->bio_offset + bp->bio_length; 417 if (excess > bp->bio_to->mediasize) { 418 excess -= bp->bio_to->mediasize; 419 bp->bio_length -= excess; 420 } 421 /* Deliver zero length transfers right here. */ 422 if (bp->bio_length == 0) { 423 g_io_deliver(bp, 0); 424 continue; 425 } 426 break; 427 default: 428 break; 429 } 430 mtx_lock(&mymutex); 431 bp->bio_to->geom->start(bp); 432 mtx_unlock(&mymutex); 433 if (pace) { 434 pace--; 435 break; 436 } 437 } 438} 439 440void 441bio_taskqueue(struct bio *bp, bio_task_t *func, void *arg) 442{ 443 bp->bio_task = func; 444 bp->bio_task_arg = arg; 445 /* 446 * The taskqueue is actually just a second queue off the "up" 447 * queue, so we use the same lock. 448 */ 449 g_bioq_lock(&g_bio_run_up); 450 TAILQ_INSERT_TAIL(&g_bio_run_task.bio_queue, bp, bio_queue); 451 g_bio_run_task.bio_queue_length++; 452 wakeup(&g_wait_up); 453 g_bioq_unlock(&g_bio_run_up); 454} 455 456 457void 458g_io_schedule_up(struct thread *tp __unused) 459{ 460 struct bio *bp; 461 struct mtx mymutex; 462 463 bzero(&mymutex, sizeof mymutex); 464 mtx_init(&mymutex, "g_xup", MTX_DEF, 0); 465 for(;;) { 466 g_bioq_lock(&g_bio_run_up); 467 bp = g_bioq_first(&g_bio_run_task); 468 if (bp != NULL) { 469 g_bioq_unlock(&g_bio_run_up); 470 mtx_lock(&mymutex); 471 bp->bio_task(bp, bp->bio_task_arg); 472 mtx_unlock(&mymutex); 473 continue; 474 } 475 bp = g_bioq_first(&g_bio_run_up); 476 if (bp != NULL) { 477 g_bioq_unlock(&g_bio_run_up); 478 mtx_lock(&mymutex); 479 biodone(bp); 480 mtx_unlock(&mymutex); 481 continue; 482 } 483 msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock, 484 PRIBIO | PDROP, "g_up", hz/10); 485 } 486} 487 488void * 489g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error) 490{ 491 struct bio *bp; 492 void *ptr; 493 int errorc; 494 495 bp = g_new_bio(); 496 bp->bio_cmd = BIO_READ; 497 bp->bio_done = NULL; 498 bp->bio_offset = offset; 499 bp->bio_length = length; 500 ptr = g_malloc(length, M_WAITOK); 501 bp->bio_data = ptr; 502 g_io_request(bp, cp); 503 errorc = biowait(bp, "gread"); 504 if (error != NULL) 505 *error = errorc; 506 g_destroy_bio(bp); 507 if (errorc) { 508 g_free(ptr); 509 ptr = NULL; 510 } 511 return (ptr); 512} 513 514int 515g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length) 516{ 517 struct bio *bp; 518 int error; 519 520 bp = g_new_bio(); 521 bp->bio_cmd = BIO_WRITE; 522 bp->bio_done = NULL; 523 bp->bio_offset = offset; 524 bp->bio_length = length; 525 bp->bio_data = ptr; 526 g_io_request(bp, cp); 527 error = biowait(bp, "gwrite"); 528 g_destroy_bio(bp); 529 return (error); 530} 531