geom_io.c revision 110541
1/*- 2 * Copyright (c) 2002 Poul-Henning Kamp 3 * Copyright (c) 2002 Networks Associates Technology, Inc. 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7 * and NAI Labs, the Security Research Division of Network Associates, Inc. 8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9 * DARPA CHATS research program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. The names of the authors may not be used to endorse or promote 20 * products derived from this software without specific prior written 21 * permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * $FreeBSD: head/sys/geom/geom_io.c 110541 2003-02-08 13:03:57Z phk $ 36 */ 37 38 39#include <sys/param.h> 40#include <sys/stdint.h> 41#ifndef _KERNEL 42#include <stdio.h> 43#include <string.h> 44#include <stdlib.h> 45#include <signal.h> 46#include <err.h> 47#include <sched.h> 48#else 49#include <sys/systm.h> 50#include <sys/kernel.h> 51#include <sys/malloc.h> 52#include <sys/bio.h> 53#endif 54 55#include <sys/errno.h> 56#include <geom/geom.h> 57#include <geom/geom_int.h> 58#include <geom/geom_stats.h> 59 60static struct g_bioq g_bio_run_down; 61static struct g_bioq g_bio_run_up; 62static struct g_bioq g_bio_idle; 63 64static u_int pace; 65 66#include <machine/atomic.h> 67 68static void 69g_bioq_lock(struct g_bioq *bq) 70{ 71 72 mtx_lock(&bq->bio_queue_lock); 73} 74 75static void 76g_bioq_unlock(struct g_bioq *bq) 77{ 78 79 mtx_unlock(&bq->bio_queue_lock); 80} 81 82#if 0 83static void 84g_bioq_destroy(struct g_bioq *bq) 85{ 86 87 mtx_destroy(&bq->bio_queue_lock); 88} 89#endif 90 91static void 92g_bioq_init(struct g_bioq *bq) 93{ 94 95 TAILQ_INIT(&bq->bio_queue); 96 mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF); 97} 98 99static struct bio * 100g_bioq_first(struct g_bioq *bq) 101{ 102 struct bio *bp; 103 104 g_bioq_lock(bq); 105 bp = TAILQ_FIRST(&bq->bio_queue); 106 if (bp != NULL) { 107 TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue); 108 bq->bio_queue_length--; 109 } 110 g_bioq_unlock(bq); 111 return (bp); 112} 113 114static void 115g_bioq_enqueue_tail(struct bio *bp, struct g_bioq *rq) 116{ 117 118 g_bioq_lock(rq); 119 TAILQ_INSERT_TAIL(&rq->bio_queue, bp, bio_queue); 120 rq->bio_queue_length++; 121 g_bioq_unlock(rq); 122} 123 124struct bio * 125g_new_bio(void) 126{ 127 struct bio *bp; 128 129 bp = g_bioq_first(&g_bio_idle); 130 if (bp == NULL) 131 bp = g_malloc(sizeof *bp, M_NOWAIT | M_ZERO); 132 /* g_trace(G_T_BIO, "g_new_bio() = %p", bp); */ 133 return (bp); 134} 135 136void 137g_destroy_bio(struct bio *bp) 138{ 139 140 /* g_trace(G_T_BIO, "g_destroy_bio(%p)", bp); */ 141 bzero(bp, sizeof *bp); 142 g_bioq_enqueue_tail(bp, &g_bio_idle); 143} 144 145struct bio * 146g_clone_bio(struct bio *bp) 147{ 148 struct bio *bp2; 149 150 bp2 = g_new_bio(); 151 if (bp2 != NULL) { 152 bp2->bio_parent = bp; 153 bp2->bio_cmd = bp->bio_cmd; 154 bp2->bio_length = bp->bio_length; 155 bp2->bio_offset = bp->bio_offset; 156 bp2->bio_data = bp->bio_data; 157 bp2->bio_attribute = bp->bio_attribute; 158 bp->bio_children++; 159 } 160 /* g_trace(G_T_BIO, "g_clone_bio(%p) = %p", bp, bp2); */ 161 return(bp2); 162} 163 164void 165g_io_init() 166{ 167 168 g_bioq_init(&g_bio_run_down); 169 g_bioq_init(&g_bio_run_up); 170 g_bioq_init(&g_bio_idle); 171} 172 173int 174g_io_setattr(const char *attr, struct g_consumer *cp, int len, void *ptr) 175{ 176 struct bio *bp; 177 int error; 178 179 g_trace(G_T_BIO, "bio_setattr(%s)", attr); 180 bp = g_new_bio(); 181 bp->bio_cmd = BIO_SETATTR; 182 bp->bio_done = NULL; 183 bp->bio_attribute = attr; 184 bp->bio_length = len; 185 bp->bio_data = ptr; 186 g_io_request(bp, cp); 187 error = biowait(bp, "gsetattr"); 188 g_destroy_bio(bp); 189 return (error); 190} 191 192 193int 194g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr) 195{ 196 struct bio *bp; 197 int error; 198 199 g_trace(G_T_BIO, "bio_getattr(%s)", attr); 200 bp = g_new_bio(); 201 bp->bio_cmd = BIO_GETATTR; 202 bp->bio_done = NULL; 203 bp->bio_attribute = attr; 204 bp->bio_length = *len; 205 bp->bio_data = ptr; 206 g_io_request(bp, cp); 207 error = biowait(bp, "ggetattr"); 208 *len = bp->bio_completed; 209 g_destroy_bio(bp); 210 return (error); 211} 212 213static int 214g_io_check(struct bio *bp) 215{ 216 struct g_consumer *cp; 217 struct g_provider *pp; 218 219 cp = bp->bio_from; 220 pp = bp->bio_to; 221 222 /* Fail if access counters dont allow the operation */ 223 switch(bp->bio_cmd) { 224 case BIO_READ: 225 case BIO_GETATTR: 226 if (cp->acr == 0) 227 return (EPERM); 228 break; 229 case BIO_WRITE: 230 case BIO_DELETE: 231 case BIO_SETATTR: 232 if (cp->acw == 0) 233 return (EPERM); 234 break; 235 default: 236 return (EPERM); 237 } 238 /* if provider is marked for error, don't disturb. */ 239 if (pp->error) 240 return (pp->error); 241 242 switch(bp->bio_cmd) { 243 case BIO_READ: 244 case BIO_WRITE: 245 case BIO_DELETE: 246 /* Reject I/O not on sector boundary */ 247 if (bp->bio_offset % pp->sectorsize) 248 return (EINVAL); 249 /* Reject I/O not integral sector long */ 250 if (bp->bio_length % pp->sectorsize) 251 return (EINVAL); 252 /* Reject requests past the end of media. */ 253 if (bp->bio_offset > pp->mediasize) 254 return (EIO); 255 break; 256 default: 257 break; 258 } 259 return (0); 260} 261 262void 263g_io_request(struct bio *bp, struct g_consumer *cp) 264{ 265 struct g_provider *pp; 266 struct bintime bt; 267 268 pp = cp->provider; 269 KASSERT(cp != NULL, ("NULL cp in g_io_request")); 270 KASSERT(bp != NULL, ("NULL bp in g_io_request")); 271 KASSERT(bp->bio_data != NULL, ("NULL bp->data in g_io_request")); 272 KASSERT(pp != NULL, ("consumer not attached in g_io_request")); 273 274 bp->bio_from = cp; 275 bp->bio_to = pp; 276 bp->bio_error = 0; 277 bp->bio_completed = 0; 278 279 if (g_collectstats) { 280 /* Collect statistics */ 281 binuptime(&bp->bio_t0); 282 if (cp->stat->nop == cp->stat->nend) { 283 /* Consumer is idle */ 284 bt = bp->bio_t0; 285 bintime_sub(&bt, &cp->stat->wentidle); 286 bintime_add(&cp->stat->it, &bt); 287 if (pp->stat->nop == pp->stat->nend) { 288 /* 289 * NB: Provider can only be idle if the 290 * consumer is but we cannot trust them 291 * to have gone idle at the same time. 292 */ 293 bt = bp->bio_t0; 294 bintime_sub(&bt, &pp->stat->wentidle); 295 bintime_add(&pp->stat->it, &bt); 296 } 297 } 298 } 299 cp->stat->nop++; 300 pp->stat->nop++; 301 302 /* Pass it on down. */ 303 g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d", 304 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd); 305 g_bioq_enqueue_tail(bp, &g_bio_run_down); 306 wakeup(&g_wait_down); 307} 308 309void 310g_io_deliver(struct bio *bp, int error) 311{ 312 struct g_consumer *cp; 313 struct g_provider *pp; 314 struct bintime t1; 315 int idx; 316 317 cp = bp->bio_from; 318 pp = bp->bio_to; 319 KASSERT(bp != NULL, ("NULL bp in g_io_deliver")); 320 KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver")); 321 KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver")); 322 KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver")); 323 324 g_trace(G_T_BIO, 325"g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd", 326 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error, 327 (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length); 328 329 switch (bp->bio_cmd) { 330 case BIO_READ: idx = G_STAT_IDX_READ; break; 331 case BIO_WRITE: idx = G_STAT_IDX_WRITE; break; 332 case BIO_DELETE: idx = G_STAT_IDX_DELETE; break; 333 case BIO_GETATTR: idx = -1; break; 334 case BIO_SETATTR: idx = -1; break; 335 default: 336 panic("unknown bio_cmd in g_io_deliver"); 337 break; 338 } 339 340 /* Collect statistics */ 341 if (g_collectstats) { 342 binuptime(&t1); 343 pp->stat->wentidle = t1; 344 cp->stat->wentidle = t1; 345 346 if (idx >= 0) { 347 bintime_sub(&t1, &bp->bio_t0); 348 bintime_add(&cp->stat->ops[idx].dt, &t1); 349 bintime_add(&pp->stat->ops[idx].dt, &t1); 350 pp->stat->ops[idx].nbyte += bp->bio_completed; 351 cp->stat->ops[idx].nbyte += bp->bio_completed; 352 pp->stat->ops[idx].nop++; 353 cp->stat->ops[idx].nop++; 354 if (error == ENOMEM) { 355 cp->stat->ops[idx].nmem++; 356 pp->stat->ops[idx].nmem++; 357 } else if (error != 0) { 358 cp->stat->ops[idx].nerr++; 359 pp->stat->ops[idx].nerr++; 360 } 361 } 362 } 363 364 pp->stat->nend++; /* In reverse order of g_io_request() */ 365 cp->stat->nend++; 366 367 if (error == ENOMEM) { 368 printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name); 369 g_io_request(bp, cp); 370 pace++; 371 return; 372 } 373 bp->bio_error = error; 374 g_bioq_enqueue_tail(bp, &g_bio_run_up); 375 wakeup(&g_wait_up); 376} 377 378void 379g_io_schedule_down(struct thread *tp __unused) 380{ 381 struct bio *bp; 382 off_t excess; 383 int error; 384 385 for(;;) { 386 bp = g_bioq_first(&g_bio_run_down); 387 if (bp == NULL) 388 break; 389 error = g_io_check(bp); 390 if (error) { 391 g_io_deliver(bp, error); 392 continue; 393 } 394 /* Truncate requests to the end of providers media. */ 395 excess = bp->bio_offset + bp->bio_length; 396 if (excess > bp->bio_to->mediasize) { 397 excess -= bp->bio_to->mediasize; 398 bp->bio_length -= excess; 399 } 400 /* Deliver zero length transfers right here. */ 401 if (bp->bio_length == 0) { 402 g_io_deliver(bp, 0); 403 continue; 404 } 405 bp->bio_to->geom->start(bp); 406 if (pace) { 407 pace--; 408 break; 409 } 410 } 411} 412 413void 414g_io_schedule_up(struct thread *tp __unused) 415{ 416 struct bio *bp; 417 struct g_consumer *cp; 418 419 for(;;) { 420 bp = g_bioq_first(&g_bio_run_up); 421 if (bp == NULL) 422 break; 423 424 cp = bp->bio_from; 425 biodone(bp); 426 } 427} 428 429void * 430g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error) 431{ 432 struct bio *bp; 433 void *ptr; 434 int errorc; 435 436 bp = g_new_bio(); 437 bp->bio_cmd = BIO_READ; 438 bp->bio_done = NULL; 439 bp->bio_offset = offset; 440 bp->bio_length = length; 441 ptr = g_malloc(length, 0); 442 bp->bio_data = ptr; 443 g_io_request(bp, cp); 444 errorc = biowait(bp, "gread"); 445 if (error != NULL) 446 *error = errorc; 447 g_destroy_bio(bp); 448 if (errorc) { 449 g_free(ptr); 450 ptr = NULL; 451 } 452 return (ptr); 453} 454 455int 456g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length) 457{ 458 struct bio *bp; 459 int error; 460 461 bp = g_new_bio(); 462 bp->bio_cmd = BIO_WRITE; 463 bp->bio_done = NULL; 464 bp->bio_offset = offset; 465 bp->bio_length = length; 466 bp->bio_data = ptr; 467 g_io_request(bp, cp); 468 error = biowait(bp, "gwrite"); 469 g_destroy_bio(bp); 470 return (error); 471} 472