geom_io.c revision 112367
1/*- 2 * Copyright (c) 2002 Poul-Henning Kamp 3 * Copyright (c) 2002 Networks Associates Technology, Inc. 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7 * and NAI Labs, the Security Research Division of Network Associates, Inc. 8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9 * DARPA CHATS research program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. The names of the authors may not be used to endorse or promote 20 * products derived from this software without specific prior written 21 * permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * $FreeBSD: head/sys/geom/geom_io.c 112367 2003-03-18 08:45:25Z phk $ 36 */ 37 38 39#include <sys/param.h> 40#ifndef _KERNEL 41#include <stdio.h> 42#include <string.h> 43#include <stdlib.h> 44#include <signal.h> 45#include <err.h> 46#include <sched.h> 47#else 48#include <sys/systm.h> 49#include <sys/kernel.h> 50#include <sys/malloc.h> 51#include <sys/bio.h> 52#endif 53 54#include <sys/errno.h> 55#include <geom/geom.h> 56#include <geom/geom_int.h> 57#include <geom/geom_stats.h> 58 59static struct g_bioq g_bio_run_down; 60static struct g_bioq g_bio_run_up; 61static struct g_bioq g_bio_run_task; 62static struct g_bioq g_bio_idle; 63 64static u_int pace; 65 66#include <machine/atomic.h> 67 68static void 69g_bioq_lock(struct g_bioq *bq) 70{ 71 72 mtx_lock(&bq->bio_queue_lock); 73} 74 75static void 76g_bioq_unlock(struct g_bioq *bq) 77{ 78 79 mtx_unlock(&bq->bio_queue_lock); 80} 81 82#if 0 83static void 84g_bioq_destroy(struct g_bioq *bq) 85{ 86 87 mtx_destroy(&bq->bio_queue_lock); 88} 89#endif 90 91static void 92g_bioq_init(struct g_bioq *bq) 93{ 94 95 TAILQ_INIT(&bq->bio_queue); 96 mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF); 97} 98 99static struct bio * 100g_bioq_first(struct g_bioq *bq) 101{ 102 struct bio *bp; 103 104 bp = TAILQ_FIRST(&bq->bio_queue); 105 if (bp != NULL) { 106 TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue); 107 bq->bio_queue_length--; 108 } 109 return (bp); 110} 111 112static void 113g_bioq_enqueue_tail(struct bio *bp, struct g_bioq *rq) 114{ 115 116 g_bioq_lock(rq); 117 TAILQ_INSERT_TAIL(&rq->bio_queue, bp, bio_queue); 118 rq->bio_queue_length++; 119 g_bioq_unlock(rq); 120} 121 122struct bio * 123g_new_bio(void) 124{ 125 struct bio *bp; 126 127 g_bioq_lock(&g_bio_idle); 128 bp = g_bioq_first(&g_bio_idle); 129 g_bioq_unlock(&g_bio_idle); 130 if (bp == NULL) 131 bp = g_malloc(sizeof *bp, M_NOWAIT | M_ZERO); 132 /* g_trace(G_T_BIO, "g_new_bio() = %p", bp); */ 133 return (bp); 134} 135 136void 137g_destroy_bio(struct bio *bp) 138{ 139 140 /* g_trace(G_T_BIO, "g_destroy_bio(%p)", bp); */ 141 bzero(bp, sizeof *bp); 142 g_bioq_enqueue_tail(bp, &g_bio_idle); 143} 144 145struct bio * 146g_clone_bio(struct bio *bp) 147{ 148 struct bio *bp2; 149 150 bp2 = g_new_bio(); 151 if (bp2 != NULL) { 152 bp2->bio_parent = bp; 153 bp2->bio_cmd = bp->bio_cmd; 154 bp2->bio_length = bp->bio_length; 155 bp2->bio_offset = bp->bio_offset; 156 bp2->bio_data = bp->bio_data; 157 bp2->bio_attribute = bp->bio_attribute; 158 bp->bio_children++; 159 } 160 /* g_trace(G_T_BIO, "g_clone_bio(%p) = %p", bp, bp2); */ 161 return(bp2); 162} 163 164void 165g_io_init() 166{ 167 168 g_bioq_init(&g_bio_run_down); 169 g_bioq_init(&g_bio_run_up); 170 g_bioq_init(&g_bio_run_task); 171 g_bioq_init(&g_bio_idle); 172} 173 174int 175g_io_setattr(const char *attr, struct g_consumer *cp, int len, void *ptr) 176{ 177 struct bio *bp; 178 int error; 179 180 g_trace(G_T_BIO, "bio_setattr(%s)", attr); 181 bp = g_new_bio(); 182 bp->bio_cmd = BIO_SETATTR; 183 bp->bio_done = NULL; 184 bp->bio_attribute = attr; 185 bp->bio_length = len; 186 bp->bio_data = ptr; 187 g_io_request(bp, cp); 188 error = biowait(bp, "gsetattr"); 189 g_destroy_bio(bp); 190 return (error); 191} 192 193 194int 195g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr) 196{ 197 struct bio *bp; 198 int error; 199 200 g_trace(G_T_BIO, "bio_getattr(%s)", attr); 201 bp = g_new_bio(); 202 bp->bio_cmd = BIO_GETATTR; 203 bp->bio_done = NULL; 204 bp->bio_attribute = attr; 205 bp->bio_length = *len; 206 bp->bio_data = ptr; 207 g_io_request(bp, cp); 208 error = biowait(bp, "ggetattr"); 209 *len = bp->bio_completed; 210 g_destroy_bio(bp); 211 return (error); 212} 213 214static int 215g_io_check(struct bio *bp) 216{ 217 struct g_consumer *cp; 218 struct g_provider *pp; 219 220 cp = bp->bio_from; 221 pp = bp->bio_to; 222 223 /* Fail if access counters dont allow the operation */ 224 switch(bp->bio_cmd) { 225 case BIO_READ: 226 case BIO_GETATTR: 227 if (cp->acr == 0) 228 return (EPERM); 229 break; 230 case BIO_WRITE: 231 case BIO_DELETE: 232 case BIO_SETATTR: 233 if (cp->acw == 0) 234 return (EPERM); 235 break; 236 default: 237 return (EPERM); 238 } 239 /* if provider is marked for error, don't disturb. */ 240 if (pp->error) 241 return (pp->error); 242 243 switch(bp->bio_cmd) { 244 case BIO_READ: 245 case BIO_WRITE: 246 case BIO_DELETE: 247 /* Reject I/O not on sector boundary */ 248 if (bp->bio_offset % pp->sectorsize) 249 return (EINVAL); 250 /* Reject I/O not integral sector long */ 251 if (bp->bio_length % pp->sectorsize) 252 return (EINVAL); 253 /* Reject requests past the end of media. */ 254 if (bp->bio_offset > pp->mediasize) 255 return (EIO); 256 break; 257 default: 258 break; 259 } 260 return (0); 261} 262 263void 264g_io_request(struct bio *bp, struct g_consumer *cp) 265{ 266 struct g_provider *pp; 267 struct bintime bt; 268 269 pp = cp->provider; 270 KASSERT(cp != NULL, ("NULL cp in g_io_request")); 271 KASSERT(bp != NULL, ("NULL bp in g_io_request")); 272 KASSERT(bp->bio_data != NULL, ("NULL bp->data in g_io_request")); 273 KASSERT(pp != NULL, ("consumer not attached in g_io_request")); 274 275 bp->bio_from = cp; 276 bp->bio_to = pp; 277 bp->bio_error = 0; 278 bp->bio_completed = 0; 279 280 if (g_collectstats) { 281 binuptime(&bt); 282 bp->bio_t0 = bt; 283 if (cp->nstart == cp->nend) 284 cp->stat->wentbusy = bt; /* Consumer is idle */ 285 if (pp->nstart == pp->nend) 286 pp->stat->wentbusy = bt; /* Provider is idle */ 287 cp->stat->nop++; 288 pp->stat->nop++; 289 } 290 cp->nstart++; 291 pp->nstart++; 292 293 /* Pass it on down. */ 294 g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d", 295 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd); 296 g_bioq_enqueue_tail(bp, &g_bio_run_down); 297 wakeup(&g_wait_down); 298} 299 300void 301g_io_deliver(struct bio *bp, int error) 302{ 303 struct g_consumer *cp; 304 struct g_provider *pp; 305 struct bintime t1, dt; 306 int idx; 307 308 cp = bp->bio_from; 309 pp = bp->bio_to; 310 KASSERT(bp != NULL, ("NULL bp in g_io_deliver")); 311 KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver")); 312 KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver")); 313 KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver")); 314 315 g_trace(G_T_BIO, 316"g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd", 317 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error, 318 (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length); 319 320 if (g_collectstats) { 321 switch (bp->bio_cmd) { 322 case BIO_READ: idx = G_STAT_IDX_READ; break; 323 case BIO_WRITE: idx = G_STAT_IDX_WRITE; break; 324 case BIO_DELETE: idx = G_STAT_IDX_DELETE; break; 325 case BIO_GETATTR: idx = -1; break; 326 case BIO_SETATTR: idx = -1; break; 327 default: 328 panic("unknown bio_cmd in g_io_deliver"); 329 break; 330 } 331 binuptime(&t1); 332 /* Raise the "inconsistent" flag for userland */ 333 atomic_add_acq_int(&cp->stat->seq0, 1); 334 atomic_add_acq_int(&pp->stat->seq0, 1); 335 if (idx >= 0) { 336 /* Account the service time */ 337 dt = t1; 338 bintime_sub(&dt, &bp->bio_t0); 339 bintime_add(&cp->stat->ops[idx].dt, &dt); 340 bintime_add(&pp->stat->ops[idx].dt, &dt); 341 /* ... and the metrics */ 342 pp->stat->ops[idx].nbyte += bp->bio_completed; 343 cp->stat->ops[idx].nbyte += bp->bio_completed; 344 pp->stat->ops[idx].nop++; 345 cp->stat->ops[idx].nop++; 346 /* ... and any errors */ 347 if (error == ENOMEM) { 348 cp->stat->ops[idx].nmem++; 349 pp->stat->ops[idx].nmem++; 350 } else if (error != 0) { 351 cp->stat->ops[idx].nerr++; 352 pp->stat->ops[idx].nerr++; 353 } 354 } 355 /* Account for busy time on the consumer */ 356 dt = t1; 357 bintime_sub(&dt, &cp->stat->wentbusy); 358 bintime_add(&cp->stat->bt, &dt); 359 cp->stat->wentbusy = t1; 360 /* Account for busy time on the provider */ 361 dt = t1; 362 bintime_sub(&dt, &pp->stat->wentbusy); 363 bintime_add(&pp->stat->bt, &dt); 364 pp->stat->wentbusy = t1; 365 /* Mark the structures as consistent again */ 366 atomic_add_acq_int(&cp->stat->seq1, 1); 367 atomic_add_acq_int(&pp->stat->seq1, 1); 368 cp->stat->nend++; 369 pp->stat->nend++; 370 } 371 cp->nend++; 372 pp->nend++; 373 374 if (error == ENOMEM) { 375 printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name); 376 g_io_request(bp, cp); 377 pace++; 378 return; 379 } 380 bp->bio_error = error; 381 g_bioq_enqueue_tail(bp, &g_bio_run_up); 382 wakeup(&g_wait_up); 383} 384 385void 386g_io_schedule_down(struct thread *tp __unused) 387{ 388 struct bio *bp; 389 off_t excess; 390 int error; 391 struct mtx mymutex; 392 393 bzero(&mymutex, sizeof mymutex); 394 mtx_init(&mymutex, "g_xdown", MTX_DEF, 0); 395 396 for(;;) { 397 g_bioq_lock(&g_bio_run_down); 398 bp = g_bioq_first(&g_bio_run_down); 399 if (bp == NULL) { 400 msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock, 401 PRIBIO | PDROP, "g_down", hz/10); 402 continue; 403 } 404 g_bioq_unlock(&g_bio_run_down); 405 error = g_io_check(bp); 406 if (error) { 407 g_io_deliver(bp, error); 408 continue; 409 } 410 switch (bp->bio_cmd) { 411 case BIO_READ: 412 case BIO_WRITE: 413 case BIO_DELETE: 414 /* Truncate requests to the end of providers media. */ 415 excess = bp->bio_offset + bp->bio_length; 416 if (excess > bp->bio_to->mediasize) { 417 excess -= bp->bio_to->mediasize; 418 bp->bio_length -= excess; 419 } 420 /* Deliver zero length transfers right here. */ 421 if (bp->bio_length == 0) { 422 g_io_deliver(bp, 0); 423 continue; 424 } 425 break; 426 default: 427 break; 428 } 429 mtx_lock(&mymutex); 430 bp->bio_to->geom->start(bp); 431 mtx_unlock(&mymutex); 432 if (pace) { 433 pace--; 434 break; 435 } 436 } 437} 438 439void 440bio_taskqueue(struct bio *bp, bio_task_t *func, void *arg) 441{ 442 bp->bio_task = func; 443 bp->bio_task_arg = arg; 444 /* 445 * The taskqueue is actually just a second queue off the "up" 446 * queue, so we use the same lock. 447 */ 448 g_bioq_lock(&g_bio_run_up); 449 TAILQ_INSERT_TAIL(&g_bio_run_task.bio_queue, bp, bio_queue); 450 g_bio_run_task.bio_queue_length++; 451 wakeup(&g_wait_up); 452 g_bioq_unlock(&g_bio_run_up); 453} 454 455 456void 457g_io_schedule_up(struct thread *tp __unused) 458{ 459 struct bio *bp; 460 struct mtx mymutex; 461 462 bzero(&mymutex, sizeof mymutex); 463 mtx_init(&mymutex, "g_xup", MTX_DEF, 0); 464 for(;;) { 465 g_bioq_lock(&g_bio_run_up); 466 bp = g_bioq_first(&g_bio_run_task); 467 if (bp != NULL) { 468 g_bioq_unlock(&g_bio_run_up); 469 mtx_lock(&mymutex); 470 bp->bio_task(bp, bp->bio_task_arg); 471 mtx_unlock(&mymutex); 472 continue; 473 } 474 bp = g_bioq_first(&g_bio_run_up); 475 if (bp != NULL) { 476 g_bioq_unlock(&g_bio_run_up); 477 mtx_lock(&mymutex); 478 biodone(bp); 479 mtx_unlock(&mymutex); 480 continue; 481 } 482 msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock, 483 PRIBIO | PDROP, "g_up", hz/10); 484 } 485} 486 487void * 488g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error) 489{ 490 struct bio *bp; 491 void *ptr; 492 int errorc; 493 494 bp = g_new_bio(); 495 bp->bio_cmd = BIO_READ; 496 bp->bio_done = NULL; 497 bp->bio_offset = offset; 498 bp->bio_length = length; 499 ptr = g_malloc(length, M_WAITOK); 500 bp->bio_data = ptr; 501 g_io_request(bp, cp); 502 errorc = biowait(bp, "gread"); 503 if (error != NULL) 504 *error = errorc; 505 g_destroy_bio(bp); 506 if (errorc) { 507 g_free(ptr); 508 ptr = NULL; 509 } 510 return (ptr); 511} 512 513int 514g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length) 515{ 516 struct bio *bp; 517 int error; 518 519 bp = g_new_bio(); 520 bp->bio_cmd = BIO_WRITE; 521 bp->bio_done = NULL; 522 bp->bio_offset = offset; 523 bp->bio_length = length; 524 bp->bio_data = ptr; 525 g_io_request(bp, cp); 526 error = biowait(bp, "gwrite"); 527 g_destroy_bio(bp); 528 return (error); 529} 530