geom_io.c revision 113032
1/*- 2 * Copyright (c) 2002 Poul-Henning Kamp 3 * Copyright (c) 2002 Networks Associates Technology, Inc. 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7 * and NAI Labs, the Security Research Division of Network Associates, Inc. 8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9 * DARPA CHATS research program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. The names of the authors may not be used to endorse or promote 20 * products derived from this software without specific prior written 21 * permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * $FreeBSD: head/sys/geom/geom_io.c 113032 2003-04-03 19:19:36Z phk $ 36 */ 37 38 39#include <sys/param.h> 40#ifndef _KERNEL 41#include <stdio.h> 42#include <string.h> 43#include <stdlib.h> 44#include <signal.h> 45#include <err.h> 46#include <sched.h> 47#else 48#include <sys/systm.h> 49#include <sys/kernel.h> 50#include <sys/malloc.h> 51#include <sys/bio.h> 52#endif 53 54#include <sys/errno.h> 55#include <geom/geom.h> 56#include <geom/geom_int.h> 57#include <sys/devicestat.h> 58 59static struct g_bioq g_bio_run_down; 60static struct g_bioq g_bio_run_up; 61static struct g_bioq g_bio_run_task; 62static struct g_bioq g_bio_idle; 63 64static u_int pace; 65 66#include <machine/atomic.h> 67 68static void 69g_bioq_lock(struct g_bioq *bq) 70{ 71 72 mtx_lock(&bq->bio_queue_lock); 73} 74 75static void 76g_bioq_unlock(struct g_bioq *bq) 77{ 78 79 mtx_unlock(&bq->bio_queue_lock); 80} 81 82#if 0 83static void 84g_bioq_destroy(struct g_bioq *bq) 85{ 86 87 mtx_destroy(&bq->bio_queue_lock); 88} 89#endif 90 91static void 92g_bioq_init(struct g_bioq *bq) 93{ 94 95 TAILQ_INIT(&bq->bio_queue); 96 mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF); 97} 98 99static struct bio * 100g_bioq_first(struct g_bioq *bq) 101{ 102 struct bio *bp; 103 104 bp = TAILQ_FIRST(&bq->bio_queue); 105 if (bp != NULL) { 106 TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue); 107 bq->bio_queue_length--; 108 } 109 return (bp); 110} 111 112static void 113g_bioq_enqueue_tail(struct bio *bp, struct g_bioq *rq) 114{ 115 116 g_bioq_lock(rq); 117 TAILQ_INSERT_TAIL(&rq->bio_queue, bp, bio_queue); 118 rq->bio_queue_length++; 119 g_bioq_unlock(rq); 120} 121 122struct bio * 123g_new_bio(void) 124{ 125 struct bio *bp; 126 127 g_bioq_lock(&g_bio_idle); 128 bp = g_bioq_first(&g_bio_idle); 129 g_bioq_unlock(&g_bio_idle); 130 if (bp == NULL) 131 bp = g_malloc(sizeof *bp, M_NOWAIT | M_ZERO); 132 /* g_trace(G_T_BIO, "g_new_bio() = %p", bp); */ 133 return (bp); 134} 135 136void 137g_destroy_bio(struct bio *bp) 138{ 139 140 /* g_trace(G_T_BIO, "g_destroy_bio(%p)", bp); */ 141 bzero(bp, sizeof *bp); 142 g_bioq_enqueue_tail(bp, &g_bio_idle); 143} 144 145struct bio * 146g_clone_bio(struct bio *bp) 147{ 148 struct bio *bp2; 149 150 bp2 = g_new_bio(); 151 if (bp2 != NULL) { 152 bp2->bio_parent = bp; 153 bp2->bio_cmd = bp->bio_cmd; 154 bp2->bio_length = bp->bio_length; 155 bp2->bio_offset = bp->bio_offset; 156 bp2->bio_data = bp->bio_data; 157 bp2->bio_attribute = bp->bio_attribute; 158 bp->bio_children++; 159 } 160 /* g_trace(G_T_BIO, "g_clone_bio(%p) = %p", bp, bp2); */ 161 return(bp2); 162} 163 164void 165g_io_init() 166{ 167 168 g_bioq_init(&g_bio_run_down); 169 g_bioq_init(&g_bio_run_up); 170 g_bioq_init(&g_bio_run_task); 171 g_bioq_init(&g_bio_idle); 172} 173 174int 175g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr) 176{ 177 struct bio *bp; 178 int error; 179 180 g_trace(G_T_BIO, "bio_getattr(%s)", attr); 181 bp = g_new_bio(); 182 bp->bio_cmd = BIO_GETATTR; 183 bp->bio_done = NULL; 184 bp->bio_attribute = attr; 185 bp->bio_length = *len; 186 bp->bio_data = ptr; 187 g_io_request(bp, cp); 188 error = biowait(bp, "ggetattr"); 189 *len = bp->bio_completed; 190 g_destroy_bio(bp); 191 return (error); 192} 193 194static int 195g_io_check(struct bio *bp) 196{ 197 struct g_consumer *cp; 198 struct g_provider *pp; 199 200 cp = bp->bio_from; 201 pp = bp->bio_to; 202 203 /* Fail if access counters dont allow the operation */ 204 switch(bp->bio_cmd) { 205 case BIO_READ: 206 case BIO_GETATTR: 207 if (cp->acr == 0) 208 return (EPERM); 209 break; 210 case BIO_WRITE: 211 case BIO_DELETE: 212 if (cp->acw == 0) 213 return (EPERM); 214 break; 215 default: 216 return (EPERM); 217 } 218 /* if provider is marked for error, don't disturb. */ 219 if (pp->error) 220 return (pp->error); 221 222 switch(bp->bio_cmd) { 223 case BIO_READ: 224 case BIO_WRITE: 225 case BIO_DELETE: 226 /* Reject I/O not on sector boundary */ 227 if (bp->bio_offset % pp->sectorsize) 228 return (EINVAL); 229 /* Reject I/O not integral sector long */ 230 if (bp->bio_length % pp->sectorsize) 231 return (EINVAL); 232 /* Reject requests past the end of media. */ 233 if (bp->bio_offset > pp->mediasize) 234 return (EIO); 235 break; 236 default: 237 break; 238 } 239 return (0); 240} 241 242void 243g_io_request(struct bio *bp, struct g_consumer *cp) 244{ 245 struct g_provider *pp; 246 247 pp = cp->provider; 248 KASSERT(cp != NULL, ("NULL cp in g_io_request")); 249 KASSERT(bp != NULL, ("NULL bp in g_io_request")); 250 KASSERT(bp->bio_data != NULL, ("NULL bp->data in g_io_request")); 251 KASSERT(pp != NULL, ("consumer not attached in g_io_request")); 252 253 bp->bio_from = cp; 254 bp->bio_to = pp; 255 bp->bio_error = 0; 256 bp->bio_completed = 0; 257 258 if (g_collectstats) { 259 devstat_start_transaction_bio(cp->stat, bp); 260 devstat_start_transaction_bio(pp->stat, bp); 261 } 262 cp->nstart++; 263 pp->nstart++; 264 265 /* Pass it on down. */ 266 g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d", 267 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd); 268 g_bioq_enqueue_tail(bp, &g_bio_run_down); 269 wakeup(&g_wait_down); 270} 271 272void 273g_io_deliver(struct bio *bp, int error) 274{ 275 struct g_consumer *cp; 276 struct g_provider *pp; 277 278 cp = bp->bio_from; 279 pp = bp->bio_to; 280 KASSERT(bp != NULL, ("NULL bp in g_io_deliver")); 281 KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver")); 282 KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver")); 283 KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver")); 284 285 g_trace(G_T_BIO, 286"g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd", 287 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error, 288 (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length); 289 290 bp->bio_bcount = bp->bio_length; 291 if (g_collectstats) { 292 bp->bio_resid = bp->bio_bcount - bp->bio_completed; 293 devstat_end_transaction_bio(cp->stat, bp); 294 devstat_end_transaction_bio(pp->stat, bp); 295 } 296 cp->nend++; 297 pp->nend++; 298 299 if (error == ENOMEM) { 300 printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name); 301 g_io_request(bp, cp); 302 pace++; 303 return; 304 } 305 bp->bio_error = error; 306 g_bioq_enqueue_tail(bp, &g_bio_run_up); 307 wakeup(&g_wait_up); 308} 309 310void 311g_io_schedule_down(struct thread *tp __unused) 312{ 313 struct bio *bp; 314 off_t excess; 315 int error; 316 struct mtx mymutex; 317 318 bzero(&mymutex, sizeof mymutex); 319 mtx_init(&mymutex, "g_xdown", MTX_DEF, 0); 320 321 for(;;) { 322 g_bioq_lock(&g_bio_run_down); 323 bp = g_bioq_first(&g_bio_run_down); 324 if (bp == NULL) { 325 msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock, 326 PRIBIO | PDROP, "g_down", hz/10); 327 continue; 328 } 329 g_bioq_unlock(&g_bio_run_down); 330 if (pace > 0) { 331 msleep(&error, NULL, PRIBIO, "g_down", hz/10); 332 pace--; 333 } 334 error = g_io_check(bp); 335 if (error) { 336 g_io_deliver(bp, error); 337 continue; 338 } 339 switch (bp->bio_cmd) { 340 case BIO_READ: 341 case BIO_WRITE: 342 case BIO_DELETE: 343 /* Truncate requests to the end of providers media. */ 344 excess = bp->bio_offset + bp->bio_length; 345 if (excess > bp->bio_to->mediasize) { 346 excess -= bp->bio_to->mediasize; 347 bp->bio_length -= excess; 348 } 349 /* Deliver zero length transfers right here. */ 350 if (bp->bio_length == 0) { 351 g_io_deliver(bp, 0); 352 continue; 353 } 354 break; 355 default: 356 break; 357 } 358 mtx_lock(&mymutex); 359 bp->bio_to->geom->start(bp); 360 mtx_unlock(&mymutex); 361 } 362} 363 364void 365bio_taskqueue(struct bio *bp, bio_task_t *func, void *arg) 366{ 367 bp->bio_task = func; 368 bp->bio_task_arg = arg; 369 /* 370 * The taskqueue is actually just a second queue off the "up" 371 * queue, so we use the same lock. 372 */ 373 g_bioq_lock(&g_bio_run_up); 374 TAILQ_INSERT_TAIL(&g_bio_run_task.bio_queue, bp, bio_queue); 375 g_bio_run_task.bio_queue_length++; 376 wakeup(&g_wait_up); 377 g_bioq_unlock(&g_bio_run_up); 378} 379 380 381void 382g_io_schedule_up(struct thread *tp __unused) 383{ 384 struct bio *bp; 385 struct mtx mymutex; 386 387 bzero(&mymutex, sizeof mymutex); 388 mtx_init(&mymutex, "g_xup", MTX_DEF, 0); 389 for(;;) { 390 g_bioq_lock(&g_bio_run_up); 391 bp = g_bioq_first(&g_bio_run_task); 392 if (bp != NULL) { 393 g_bioq_unlock(&g_bio_run_up); 394 mtx_lock(&mymutex); 395 bp->bio_task(bp, bp->bio_task_arg); 396 mtx_unlock(&mymutex); 397 continue; 398 } 399 bp = g_bioq_first(&g_bio_run_up); 400 if (bp != NULL) { 401 g_bioq_unlock(&g_bio_run_up); 402 mtx_lock(&mymutex); 403 biodone(bp); 404 mtx_unlock(&mymutex); 405 continue; 406 } 407 msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock, 408 PRIBIO | PDROP, "g_up", hz/10); 409 } 410} 411 412void * 413g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error) 414{ 415 struct bio *bp; 416 void *ptr; 417 int errorc; 418 419 bp = g_new_bio(); 420 bp->bio_cmd = BIO_READ; 421 bp->bio_done = NULL; 422 bp->bio_offset = offset; 423 bp->bio_length = length; 424 ptr = g_malloc(length, M_WAITOK); 425 bp->bio_data = ptr; 426 g_io_request(bp, cp); 427 errorc = biowait(bp, "gread"); 428 if (error != NULL) 429 *error = errorc; 430 g_destroy_bio(bp); 431 if (errorc) { 432 g_free(ptr); 433 ptr = NULL; 434 } 435 return (ptr); 436} 437 438int 439g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length) 440{ 441 struct bio *bp; 442 int error; 443 444 bp = g_new_bio(); 445 bp->bio_cmd = BIO_WRITE; 446 bp->bio_done = NULL; 447 bp->bio_offset = offset; 448 bp->bio_length = length; 449 bp->bio_data = ptr; 450 g_io_request(bp, cp); 451 error = biowait(bp, "gwrite"); 452 g_destroy_bio(bp); 453 return (error); 454} 455