geom_io.c revision 110592
1/*- 2 * Copyright (c) 2002 Poul-Henning Kamp 3 * Copyright (c) 2002 Networks Associates Technology, Inc. 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7 * and NAI Labs, the Security Research Division of Network Associates, Inc. 8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9 * DARPA CHATS research program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. The names of the authors may not be used to endorse or promote 20 * products derived from this software without specific prior written 21 * permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * $FreeBSD: head/sys/geom/geom_io.c 110592 2003-02-09 17:04:57Z phk $ 36 */ 37 38 39#include <sys/param.h> 40#include <sys/stdint.h> 41#ifndef _KERNEL 42#include <stdio.h> 43#include <string.h> 44#include <stdlib.h> 45#include <signal.h> 46#include <err.h> 47#include <sched.h> 48#else 49#include <sys/systm.h> 50#include <sys/kernel.h> 51#include <sys/malloc.h> 52#include <sys/bio.h> 53#endif 54 55#include <sys/errno.h> 56#include <geom/geom.h> 57#include <geom/geom_int.h> 58#include <geom/geom_stats.h> 59 60static struct g_bioq g_bio_run_down; 61static struct g_bioq g_bio_run_up; 62static struct g_bioq g_bio_idle; 63 64static u_int pace; 65 66#include <machine/atomic.h> 67 68static void 69g_bioq_lock(struct g_bioq *bq) 70{ 71 72 mtx_lock(&bq->bio_queue_lock); 73} 74 75static void 76g_bioq_unlock(struct g_bioq *bq) 77{ 78 79 mtx_unlock(&bq->bio_queue_lock); 80} 81 82#if 0 83static void 84g_bioq_destroy(struct g_bioq *bq) 85{ 86 87 mtx_destroy(&bq->bio_queue_lock); 88} 89#endif 90 91static void 92g_bioq_init(struct g_bioq *bq) 93{ 94 95 TAILQ_INIT(&bq->bio_queue); 96 mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF); 97} 98 99static struct bio * 100g_bioq_first(struct g_bioq *bq) 101{ 102 struct bio *bp; 103 104 g_bioq_lock(bq); 105 bp = TAILQ_FIRST(&bq->bio_queue); 106 if (bp != NULL) { 107 TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue); 108 bq->bio_queue_length--; 109 } 110 g_bioq_unlock(bq); 111 return (bp); 112} 113 114static void 115g_bioq_enqueue_tail(struct bio *bp, struct g_bioq *rq) 116{ 117 118 g_bioq_lock(rq); 119 TAILQ_INSERT_TAIL(&rq->bio_queue, bp, bio_queue); 120 rq->bio_queue_length++; 121 g_bioq_unlock(rq); 122} 123 124struct bio * 125g_new_bio(void) 126{ 127 struct bio *bp; 128 129 bp = g_bioq_first(&g_bio_idle); 130 if (bp == NULL) 131 bp = g_malloc(sizeof *bp, M_NOWAIT | M_ZERO); 132 /* g_trace(G_T_BIO, "g_new_bio() = %p", bp); */ 133 return (bp); 134} 135 136void 137g_destroy_bio(struct bio *bp) 138{ 139 140 /* g_trace(G_T_BIO, "g_destroy_bio(%p)", bp); */ 141 bzero(bp, sizeof *bp); 142 g_bioq_enqueue_tail(bp, &g_bio_idle); 143} 144 145struct bio * 146g_clone_bio(struct bio *bp) 147{ 148 struct bio *bp2; 149 150 bp2 = g_new_bio(); 151 if (bp2 != NULL) { 152 bp2->bio_parent = bp; 153 bp2->bio_cmd = bp->bio_cmd; 154 bp2->bio_length = bp->bio_length; 155 bp2->bio_offset = bp->bio_offset; 156 bp2->bio_data = bp->bio_data; 157 bp2->bio_attribute = bp->bio_attribute; 158 bp->bio_children++; 159 } 160 /* g_trace(G_T_BIO, "g_clone_bio(%p) = %p", bp, bp2); */ 161 return(bp2); 162} 163 164void 165g_io_init() 166{ 167 168 g_bioq_init(&g_bio_run_down); 169 g_bioq_init(&g_bio_run_up); 170 g_bioq_init(&g_bio_idle); 171} 172 173int 174g_io_setattr(const char *attr, struct g_consumer *cp, int len, void *ptr) 175{ 176 struct bio *bp; 177 int error; 178 179 g_trace(G_T_BIO, "bio_setattr(%s)", attr); 180 bp = g_new_bio(); 181 bp->bio_cmd = BIO_SETATTR; 182 bp->bio_done = NULL; 183 bp->bio_attribute = attr; 184 bp->bio_length = len; 185 bp->bio_data = ptr; 186 g_io_request(bp, cp); 187 error = biowait(bp, "gsetattr"); 188 g_destroy_bio(bp); 189 return (error); 190} 191 192 193int 194g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr) 195{ 196 struct bio *bp; 197 int error; 198 199 g_trace(G_T_BIO, "bio_getattr(%s)", attr); 200 bp = g_new_bio(); 201 bp->bio_cmd = BIO_GETATTR; 202 bp->bio_done = NULL; 203 bp->bio_attribute = attr; 204 bp->bio_length = *len; 205 bp->bio_data = ptr; 206 g_io_request(bp, cp); 207 error = biowait(bp, "ggetattr"); 208 *len = bp->bio_completed; 209 g_destroy_bio(bp); 210 return (error); 211} 212 213static int 214g_io_check(struct bio *bp) 215{ 216 struct g_consumer *cp; 217 struct g_provider *pp; 218 219 cp = bp->bio_from; 220 pp = bp->bio_to; 221 222 /* Fail if access counters dont allow the operation */ 223 switch(bp->bio_cmd) { 224 case BIO_READ: 225 case BIO_GETATTR: 226 if (cp->acr == 0) 227 return (EPERM); 228 break; 229 case BIO_WRITE: 230 case BIO_DELETE: 231 case BIO_SETATTR: 232 if (cp->acw == 0) 233 return (EPERM); 234 break; 235 default: 236 return (EPERM); 237 } 238 /* if provider is marked for error, don't disturb. */ 239 if (pp->error) 240 return (pp->error); 241 242 switch(bp->bio_cmd) { 243 case BIO_READ: 244 case BIO_WRITE: 245 case BIO_DELETE: 246 /* Reject I/O not on sector boundary */ 247 if (bp->bio_offset % pp->sectorsize) 248 return (EINVAL); 249 /* Reject I/O not integral sector long */ 250 if (bp->bio_length % pp->sectorsize) 251 return (EINVAL); 252 /* Reject requests past the end of media. */ 253 if (bp->bio_offset > pp->mediasize) 254 return (EIO); 255 break; 256 default: 257 break; 258 } 259 return (0); 260} 261 262void 263g_io_request(struct bio *bp, struct g_consumer *cp) 264{ 265 struct g_provider *pp; 266 struct bintime bt; 267 268 pp = cp->provider; 269 KASSERT(cp != NULL, ("NULL cp in g_io_request")); 270 KASSERT(bp != NULL, ("NULL bp in g_io_request")); 271 KASSERT(bp->bio_data != NULL, ("NULL bp->data in g_io_request")); 272 KASSERT(pp != NULL, ("consumer not attached in g_io_request")); 273 274 bp->bio_from = cp; 275 bp->bio_to = pp; 276 bp->bio_error = 0; 277 bp->bio_completed = 0; 278 279 if (g_collectstats) { 280 binuptime(&bt); 281 bp->bio_t0 = bt; 282 if (cp->stat->nop == cp->stat->nend) 283 cp->stat->wentbusy = bt; /* Consumer is idle */ 284 if (pp->stat->nop == pp->stat->nend) 285 pp->stat->wentbusy = bt; /* Provider is idle */ 286 } 287 cp->stat->nop++; 288 pp->stat->nop++; 289 290 /* Pass it on down. */ 291 g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d", 292 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd); 293 g_bioq_enqueue_tail(bp, &g_bio_run_down); 294 wakeup(&g_wait_down); 295} 296 297void 298g_io_deliver(struct bio *bp, int error) 299{ 300 struct g_consumer *cp; 301 struct g_provider *pp; 302 struct bintime t1, dt; 303 int idx; 304 305 cp = bp->bio_from; 306 pp = bp->bio_to; 307 KASSERT(bp != NULL, ("NULL bp in g_io_deliver")); 308 KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver")); 309 KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver")); 310 KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver")); 311 312 g_trace(G_T_BIO, 313"g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd", 314 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error, 315 (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length); 316 317 if (g_collectstats) { 318 switch (bp->bio_cmd) { 319 case BIO_READ: idx = G_STAT_IDX_READ; break; 320 case BIO_WRITE: idx = G_STAT_IDX_WRITE; break; 321 case BIO_DELETE: idx = G_STAT_IDX_DELETE; break; 322 case BIO_GETATTR: idx = -1; break; 323 case BIO_SETATTR: idx = -1; break; 324 default: 325 panic("unknown bio_cmd in g_io_deliver"); 326 break; 327 } 328 binuptime(&t1); 329 /* Raise the "inconsistent" flag for userland */ 330 atomic_set_acq_int(&cp->stat->updating, 1); 331 atomic_set_acq_int(&pp->stat->updating, 1); 332 if (idx >= 0) { 333 /* Account the service time */ 334 dt = t1; 335 bintime_sub(&dt, &bp->bio_t0); 336 bintime_add(&cp->stat->ops[idx].dt, &dt); 337 bintime_add(&pp->stat->ops[idx].dt, &dt); 338 /* ... and the metrics */ 339 pp->stat->ops[idx].nbyte += bp->bio_completed; 340 cp->stat->ops[idx].nbyte += bp->bio_completed; 341 pp->stat->ops[idx].nop++; 342 cp->stat->ops[idx].nop++; 343 /* ... and any errors */ 344 if (error == ENOMEM) { 345 cp->stat->ops[idx].nmem++; 346 pp->stat->ops[idx].nmem++; 347 } else if (error != 0) { 348 cp->stat->ops[idx].nerr++; 349 pp->stat->ops[idx].nerr++; 350 } 351 } 352 /* Account for busy time on the consumer */ 353 dt = t1; 354 bintime_sub(&dt, &cp->stat->wentbusy); 355 bintime_add(&cp->stat->bt, &dt); 356 cp->stat->wentbusy = t1; 357 /* Account for busy time on the provider */ 358 dt = t1; 359 bintime_sub(&dt, &pp->stat->wentbusy); 360 bintime_add(&pp->stat->bt, &dt); 361 pp->stat->wentbusy = t1; 362 /* Mark the structures as consistent again */ 363 atomic_store_rel_int(&cp->stat->updating, 0); 364 atomic_store_rel_int(&pp->stat->updating, 0); 365 } 366 cp->stat->nend++; 367 pp->stat->nend++; 368 369 if (error == ENOMEM) { 370 printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name); 371 g_io_request(bp, cp); 372 pace++; 373 return; 374 } 375 bp->bio_error = error; 376 g_bioq_enqueue_tail(bp, &g_bio_run_up); 377 wakeup(&g_wait_up); 378} 379 380void 381g_io_schedule_down(struct thread *tp __unused) 382{ 383 struct bio *bp; 384 off_t excess; 385 int error; 386 387 for(;;) { 388 bp = g_bioq_first(&g_bio_run_down); 389 if (bp == NULL) 390 break; 391 error = g_io_check(bp); 392 if (error) { 393 g_io_deliver(bp, error); 394 continue; 395 } 396 /* Truncate requests to the end of providers media. */ 397 excess = bp->bio_offset + bp->bio_length; 398 if (excess > bp->bio_to->mediasize) { 399 excess -= bp->bio_to->mediasize; 400 bp->bio_length -= excess; 401 } 402 /* Deliver zero length transfers right here. */ 403 if (bp->bio_length == 0) { 404 g_io_deliver(bp, 0); 405 continue; 406 } 407 bp->bio_to->geom->start(bp); 408 if (pace) { 409 pace--; 410 break; 411 } 412 } 413} 414 415void 416g_io_schedule_up(struct thread *tp __unused) 417{ 418 struct bio *bp; 419 struct g_consumer *cp; 420 421 for(;;) { 422 bp = g_bioq_first(&g_bio_run_up); 423 if (bp == NULL) 424 break; 425 426 cp = bp->bio_from; 427 biodone(bp); 428 } 429} 430 431void * 432g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error) 433{ 434 struct bio *bp; 435 void *ptr; 436 int errorc; 437 438 bp = g_new_bio(); 439 bp->bio_cmd = BIO_READ; 440 bp->bio_done = NULL; 441 bp->bio_offset = offset; 442 bp->bio_length = length; 443 ptr = g_malloc(length, 0); 444 bp->bio_data = ptr; 445 g_io_request(bp, cp); 446 errorc = biowait(bp, "gread"); 447 if (error != NULL) 448 *error = errorc; 449 g_destroy_bio(bp); 450 if (errorc) { 451 g_free(ptr); 452 ptr = NULL; 453 } 454 return (ptr); 455} 456 457int 458g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length) 459{ 460 struct bio *bp; 461 int error; 462 463 bp = g_new_bio(); 464 bp->bio_cmd = BIO_WRITE; 465 bp->bio_done = NULL; 466 bp->bio_offset = offset; 467 bp->bio_length = length; 468 bp->bio_data = ptr; 469 g_io_request(bp, cp); 470 error = biowait(bp, "gwrite"); 471 g_destroy_bio(bp); 472 return (error); 473} 474