geom_io.c revision 110523
1/*- 2 * Copyright (c) 2002 Poul-Henning Kamp 3 * Copyright (c) 2002 Networks Associates Technology, Inc. 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7 * and NAI Labs, the Security Research Division of Network Associates, Inc. 8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9 * DARPA CHATS research program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. The names of the authors may not be used to endorse or promote 20 * products derived from this software without specific prior written 21 * permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * $FreeBSD: head/sys/geom/geom_io.c 110523 2003-02-07 23:08:24Z phk $ 36 */ 37 38 39#include <sys/param.h> 40#include <sys/stdint.h> 41#ifndef _KERNEL 42#include <stdio.h> 43#include <string.h> 44#include <stdlib.h> 45#include <signal.h> 46#include <err.h> 47#include <sched.h> 48#else 49#include <sys/systm.h> 50#include <sys/kernel.h> 51#include <sys/malloc.h> 52#include <sys/bio.h> 53#endif 54 55#include <sys/errno.h> 56#include <geom/geom.h> 57#include <geom/geom_int.h> 58 59static struct g_bioq g_bio_run_down; 60static struct g_bioq g_bio_run_up; 61static struct g_bioq g_bio_idle; 62 63static u_int pace; 64 65#include <machine/atomic.h> 66 67static void 68g_bioq_lock(struct g_bioq *bq) 69{ 70 71 mtx_lock(&bq->bio_queue_lock); 72} 73 74static void 75g_bioq_unlock(struct g_bioq *bq) 76{ 77 78 mtx_unlock(&bq->bio_queue_lock); 79} 80 81#if 0 82static void 83g_bioq_destroy(struct g_bioq *bq) 84{ 85 86 mtx_destroy(&bq->bio_queue_lock); 87} 88#endif 89 90static void 91g_bioq_init(struct g_bioq *bq) 92{ 93 94 TAILQ_INIT(&bq->bio_queue); 95 mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF); 96} 97 98static struct bio * 99g_bioq_first(struct g_bioq *bq) 100{ 101 struct bio *bp; 102 103 g_bioq_lock(bq); 104 bp = TAILQ_FIRST(&bq->bio_queue); 105 if (bp != NULL) { 106 TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue); 107 bq->bio_queue_length--; 108 } 109 g_bioq_unlock(bq); 110 return (bp); 111} 112 113static void 114g_bioq_enqueue_tail(struct bio *bp, struct g_bioq *rq) 115{ 116 117 g_bioq_lock(rq); 118 TAILQ_INSERT_TAIL(&rq->bio_queue, bp, bio_queue); 119 rq->bio_queue_length++; 120 g_bioq_unlock(rq); 121} 122 123struct bio * 124g_new_bio(void) 125{ 126 struct bio *bp; 127 128 bp = g_bioq_first(&g_bio_idle); 129 if (bp == NULL) 130 bp = g_malloc(sizeof *bp, M_NOWAIT | M_ZERO); 131 /* g_trace(G_T_BIO, "g_new_bio() = %p", bp); */ 132 return (bp); 133} 134 135void 136g_destroy_bio(struct bio *bp) 137{ 138 139 /* g_trace(G_T_BIO, "g_destroy_bio(%p)", bp); */ 140 bzero(bp, sizeof *bp); 141 g_bioq_enqueue_tail(bp, &g_bio_idle); 142} 143 144struct bio * 145g_clone_bio(struct bio *bp) 146{ 147 struct bio *bp2; 148 149 bp2 = g_new_bio(); 150 if (bp2 != NULL) { 151 bp2->bio_parent = bp; 152 bp2->bio_cmd = bp->bio_cmd; 153 bp2->bio_length = bp->bio_length; 154 bp2->bio_offset = bp->bio_offset; 155 bp2->bio_data = bp->bio_data; 156 bp2->bio_attribute = bp->bio_attribute; 157 bp->bio_children++; 158 } 159 /* g_trace(G_T_BIO, "g_clone_bio(%p) = %p", bp, bp2); */ 160 return(bp2); 161} 162 163void 164g_io_init() 165{ 166 167 g_bioq_init(&g_bio_run_down); 168 g_bioq_init(&g_bio_run_up); 169 g_bioq_init(&g_bio_idle); 170} 171 172int 173g_io_setattr(const char *attr, struct g_consumer *cp, int len, void *ptr) 174{ 175 struct bio *bp; 176 int error; 177 178 g_trace(G_T_BIO, "bio_setattr(%s)", attr); 179 bp = g_new_bio(); 180 bp->bio_cmd = BIO_SETATTR; 181 bp->bio_done = NULL; 182 bp->bio_attribute = attr; 183 bp->bio_length = len; 184 bp->bio_data = ptr; 185 g_io_request(bp, cp); 186 error = biowait(bp, "gsetattr"); 187 g_destroy_bio(bp); 188 return (error); 189} 190 191 192int 193g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr) 194{ 195 struct bio *bp; 196 int error; 197 198 g_trace(G_T_BIO, "bio_getattr(%s)", attr); 199 bp = g_new_bio(); 200 bp->bio_cmd = BIO_GETATTR; 201 bp->bio_done = NULL; 202 bp->bio_attribute = attr; 203 bp->bio_length = *len; 204 bp->bio_data = ptr; 205 g_io_request(bp, cp); 206 error = biowait(bp, "ggetattr"); 207 *len = bp->bio_completed; 208 g_destroy_bio(bp); 209 return (error); 210} 211 212static int 213g_io_check(struct bio *bp) 214{ 215 struct g_consumer *cp; 216 struct g_provider *pp; 217 218 cp = bp->bio_from; 219 pp = bp->bio_to; 220 221 /* Fail if access counters dont allow the operation */ 222 switch(bp->bio_cmd) { 223 case BIO_READ: 224 case BIO_GETATTR: 225 if (cp->acr == 0) 226 return (EPERM); 227 break; 228 case BIO_WRITE: 229 case BIO_DELETE: 230 case BIO_SETATTR: 231 if (cp->acw == 0) 232 return (EPERM); 233 break; 234 default: 235 return (EPERM); 236 } 237 /* if provider is marked for error, don't disturb. */ 238 if (pp->error) 239 return (pp->error); 240 241 switch(bp->bio_cmd) { 242 case BIO_READ: 243 case BIO_WRITE: 244 case BIO_DELETE: 245 /* Reject I/O not on sector boundary */ 246 if (bp->bio_offset % pp->sectorsize) 247 return (EINVAL); 248 /* Reject I/O not integral sector long */ 249 if (bp->bio_length % pp->sectorsize) 250 return (EINVAL); 251 /* Reject requests past the end of media. */ 252 if (bp->bio_offset > pp->mediasize) 253 return (EIO); 254 break; 255 default: 256 break; 257 } 258 return (0); 259} 260 261void 262g_io_request(struct bio *bp, struct g_consumer *cp) 263{ 264 struct g_provider *pp; 265 struct bintime bt; 266 267 pp = cp->provider; 268 KASSERT(cp != NULL, ("NULL cp in g_io_request")); 269 KASSERT(bp != NULL, ("NULL bp in g_io_request")); 270 KASSERT(bp->bio_data != NULL, ("NULL bp->data in g_io_request")); 271 KASSERT(pp != NULL, ("consumer not attached in g_io_request")); 272 273 bp->bio_from = cp; 274 bp->bio_to = pp; 275 bp->bio_error = 0; 276 bp->bio_completed = 0; 277 278 if (g_collectstats) { 279 /* Collect statistics */ 280 binuptime(&bp->bio_t0); 281 if (cp->stat.nop == cp->stat.nend) { 282 /* Consumer is idle */ 283 bt = bp->bio_t0; 284 bintime_sub(&bt, &cp->stat.wentidle); 285 bintime_add(&cp->stat.it, &bt); 286 if (pp->stat.nop == pp->stat.nend) { 287 /* 288 * NB: Provider can only be idle if the 289 * consumer is but we cannot trust them 290 * to have gone idle at the same time. 291 */ 292 bt = bp->bio_t0; 293 bintime_sub(&bt, &pp->stat.wentidle); 294 bintime_add(&pp->stat.it, &bt); 295 } 296 } 297 } 298 cp->stat.nop++; 299 pp->stat.nop++; 300 301 /* Pass it on down. */ 302 g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d", 303 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd); 304 g_bioq_enqueue_tail(bp, &g_bio_run_down); 305 wakeup(&g_wait_down); 306} 307 308void 309g_io_deliver(struct bio *bp, int error) 310{ 311 struct g_consumer *cp; 312 struct g_provider *pp; 313 struct bintime t1; 314 int idx; 315 316 cp = bp->bio_from; 317 pp = bp->bio_to; 318 KASSERT(bp != NULL, ("NULL bp in g_io_deliver")); 319 KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver")); 320 KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver")); 321 KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver")); 322 323 g_trace(G_T_BIO, 324"g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd", 325 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error, 326 (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length); 327 328 switch (bp->bio_cmd) { 329 case BIO_READ: idx = G_STAT_IDX_READ; break; 330 case BIO_WRITE: idx = G_STAT_IDX_WRITE; break; 331 case BIO_DELETE: idx = G_STAT_IDX_DELETE; break; 332 case BIO_GETATTR: idx = -1; break; 333 case BIO_SETATTR: idx = -1; break; 334 default: 335 panic("unknown bio_cmd in g_io_deliver"); 336 break; 337 } 338 339 /* Collect statistics */ 340 if (g_collectstats) { 341 binuptime(&t1); 342 pp->stat.wentidle = t1; 343 cp->stat.wentidle = t1; 344 345 if (idx >= 0) { 346 bintime_sub(&t1, &bp->bio_t0); 347 bintime_add(&cp->stat.ops[idx].dt, &t1); 348 bintime_add(&pp->stat.ops[idx].dt, &t1); 349 pp->stat.ops[idx].nbyte += bp->bio_completed; 350 cp->stat.ops[idx].nbyte += bp->bio_completed; 351 pp->stat.ops[idx].nop++; 352 cp->stat.ops[idx].nop++; 353 if (error == ENOMEM) { 354 cp->stat.ops[idx].nmem++; 355 pp->stat.ops[idx].nmem++; 356 } else if (error != 0) { 357 cp->stat.ops[idx].nerr++; 358 pp->stat.ops[idx].nerr++; 359 } 360 } 361 } 362 363 pp->stat.nend++; /* In reverse order of g_io_request() */ 364 cp->stat.nend++; 365 366 if (error == ENOMEM) { 367 printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name); 368 g_io_request(bp, cp); 369 pace++; 370 return; 371 } 372 bp->bio_error = error; 373 g_bioq_enqueue_tail(bp, &g_bio_run_up); 374 wakeup(&g_wait_up); 375} 376 377void 378g_io_schedule_down(struct thread *tp __unused) 379{ 380 struct bio *bp; 381 off_t excess; 382 int error; 383 384 for(;;) { 385 bp = g_bioq_first(&g_bio_run_down); 386 if (bp == NULL) 387 break; 388 error = g_io_check(bp); 389 if (error) { 390 g_io_deliver(bp, error); 391 continue; 392 } 393 /* Truncate requests to the end of providers media. */ 394 excess = bp->bio_offset + bp->bio_length; 395 if (excess > bp->bio_to->mediasize) { 396 excess -= bp->bio_to->mediasize; 397 bp->bio_length -= excess; 398 } 399 /* Deliver zero length transfers right here. */ 400 if (bp->bio_length == 0) { 401 g_io_deliver(bp, 0); 402 continue; 403 } 404 bp->bio_to->geom->start(bp); 405 if (pace) { 406 pace--; 407 break; 408 } 409 } 410} 411 412void 413g_io_schedule_up(struct thread *tp __unused) 414{ 415 struct bio *bp; 416 struct g_consumer *cp; 417 418 for(;;) { 419 bp = g_bioq_first(&g_bio_run_up); 420 if (bp == NULL) 421 break; 422 423 cp = bp->bio_from; 424 biodone(bp); 425 } 426} 427 428void * 429g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error) 430{ 431 struct bio *bp; 432 void *ptr; 433 int errorc; 434 435 bp = g_new_bio(); 436 bp->bio_cmd = BIO_READ; 437 bp->bio_done = NULL; 438 bp->bio_offset = offset; 439 bp->bio_length = length; 440 ptr = g_malloc(length, 0); 441 bp->bio_data = ptr; 442 g_io_request(bp, cp); 443 errorc = biowait(bp, "gread"); 444 if (error != NULL) 445 *error = errorc; 446 g_destroy_bio(bp); 447 if (errorc) { 448 g_free(ptr); 449 ptr = NULL; 450 } 451 return (ptr); 452} 453 454int 455g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length) 456{ 457 struct bio *bp; 458 int error; 459 460 bp = g_new_bio(); 461 bp->bio_cmd = BIO_WRITE; 462 bp->bio_done = NULL; 463 bp->bio_offset = offset; 464 bp->bio_length = length; 465 bp->bio_data = ptr; 466 g_io_request(bp, cp); 467 error = biowait(bp, "gwrite"); 468 g_destroy_bio(bp); 469 return (error); 470} 471