geom_subr.c revision 94284
1/*- 2 * Copyright (c) 2002 Poul-Henning Kamp 3 * Copyright (c) 2002 Networks Associates Technology, Inc. 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7 * and NAI Labs, the Security Research Division of Network Associates, Inc. 8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9 * DARPA CHATS research program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. The names of the authors may not be used to endorse or promote 20 * products derived from this software without specific prior written 21 * permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * $FreeBSD: head/sys/geom/geom_subr.c 94284 2002-04-09 15:13:42Z phk $ 36 */ 37 38 39#include <sys/param.h> 40#ifndef _KERNEL 41#include <stdio.h> 42#include <unistd.h> 43#include <stdlib.h> 44#include <signal.h> 45#include <string.h> 46#include <err.h> 47#else 48#include <sys/systm.h> 49#include <sys/kernel.h> 50#include <sys/malloc.h> 51#include <sys/bio.h> 52#include <sys/sysctl.h> 53#include <sys/proc.h> 54#include <sys/kthread.h> 55#include <sys/lock.h> 56#include <sys/mutex.h> 57#endif 58#include <sys/errno.h> 59#include <sys/sbuf.h> 60#include <geom/geom.h> 61#include <geom/geom_int.h> 62#include <machine/stdarg.h> 63 64struct class_list_head g_classes = LIST_HEAD_INITIALIZER(g_classes); 65static struct g_tailq_head geoms = TAILQ_HEAD_INITIALIZER(geoms); 66static int g_nproviders; 67char *g_wait_event, *g_wait_up, *g_wait_down, *g_wait_sim; 68 69static int g_ignition; 70 71void 72g_add_class(struct g_class *mp) 73{ 74 75 if (!g_ignition) { 76 g_ignition++; 77 g_init(); 78 } 79 g_topology_lock(); 80 g_trace(G_T_TOPOLOGY, "g_add_class(%s)", mp->name); 81 LIST_INIT(&mp->geom); 82 LIST_INSERT_HEAD(&g_classes, mp, class); 83 if (g_nproviders > 0) 84 g_post_event(EV_NEW_CLASS, mp, NULL, NULL, NULL); 85 g_topology_unlock(); 86} 87 88struct g_geom * 89g_new_geomf(struct g_class *mp, char *fmt, ...) 90{ 91 struct g_geom *gp; 92 va_list ap; 93 struct sbuf *sb; 94 95 g_topology_assert(); 96 va_start(ap, fmt); 97 mtx_lock(&Giant); 98 sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND); 99 sbuf_vprintf(sb, fmt, ap); 100 sbuf_finish(sb); 101 mtx_unlock(&Giant); 102 gp = g_malloc(sizeof *gp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO); 103 gp->name = (char *)(gp + 1); 104 gp->class = mp; 105 gp->rank = 1; 106 LIST_INIT(&gp->consumer); 107 LIST_INIT(&gp->provider); 108 LIST_INSERT_HEAD(&mp->geom, gp, geom); 109 TAILQ_INSERT_HEAD(&geoms, gp, geoms); 110 strcpy(gp->name, sbuf_data(sb)); 111 sbuf_delete(sb); 112 return (gp); 113} 114 115void 116g_destroy_geom(struct g_geom *gp) 117{ 118 119 g_trace(G_T_TOPOLOGY, "g_destroy_geom(%p(%s))", gp, gp->name); 120 g_topology_assert(); 121 KASSERT(gp->event == NULL, ("g_destroy_geom() with event")); 122 KASSERT(LIST_EMPTY(&gp->consumer), 123 ("g_destroy_geom(%s) with consumer(s) [%p]", 124 gp->name, LIST_FIRST(&gp->consumer))); 125 KASSERT(LIST_EMPTY(&gp->provider), 126 ("g_destroy_geom(%s) with provider(s) [%p]", 127 gp->name, LIST_FIRST(&gp->consumer))); 128 LIST_REMOVE(gp, geom); 129 TAILQ_REMOVE(&geoms, gp, geoms); 130 g_free(gp); 131} 132 133struct g_consumer * 134g_new_consumer(struct g_geom *gp) 135{ 136 struct g_consumer *cp; 137 138 g_topology_assert(); 139 KASSERT(gp->orphan != NULL, 140 ("g_new_consumer on geom(%s) (class %s) without orphan", 141 gp->name, gp->class->name)); 142 143 cp = g_malloc(sizeof *cp, M_WAITOK | M_ZERO); 144 cp->geom = gp; 145 LIST_INSERT_HEAD(&gp->consumer, cp, consumer); 146 return(cp); 147} 148 149void 150g_destroy_consumer(struct g_consumer *cp) 151{ 152 153 g_trace(G_T_TOPOLOGY, "g_destroy_consumer(%p)", cp); 154 g_topology_assert(); 155 KASSERT(cp->event == NULL, ("g_destroy_consumer() with event")); 156 KASSERT (cp->provider == NULL, ("g_destroy_consumer but attached")); 157 KASSERT (cp->acr == 0, ("g_destroy_consumer with acr")); 158 KASSERT (cp->acw == 0, ("g_destroy_consumer with acw")); 159 KASSERT (cp->ace == 0, ("g_destroy_consumer with ace")); 160 LIST_REMOVE(cp, consumer); 161 g_free(cp); 162} 163 164struct g_provider * 165g_new_providerf(struct g_geom *gp, char *fmt, ...) 166{ 167 struct g_provider *pp; 168 struct sbuf *sb; 169 va_list ap; 170 171 g_topology_assert(); 172 va_start(ap, fmt); 173 mtx_lock(&Giant); 174 sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND); 175 sbuf_vprintf(sb, fmt, ap); 176 sbuf_finish(sb); 177 mtx_unlock(&Giant); 178 pp = g_malloc(sizeof *pp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO); 179 pp->name = (char *)(pp + 1); 180 strcpy(pp->name, sbuf_data(sb)); 181 sbuf_delete(sb); 182 LIST_INIT(&pp->consumers); 183 pp->error = ENXIO; 184 pp->geom = gp; 185 LIST_INSERT_HEAD(&gp->provider, pp, provider); 186 g_nproviders++; 187 g_post_event(EV_NEW_PROVIDER, NULL, NULL, pp, NULL); 188 return (pp); 189} 190 191void 192g_error_provider(struct g_provider *pp, int error) 193{ 194 195 pp->error = error; 196} 197 198 199void 200g_destroy_provider(struct g_provider *pp) 201{ 202 struct g_geom *gp; 203 struct g_consumer *cp; 204 205 g_topology_assert(); 206 KASSERT(pp->event == NULL, ("g_destroy_provider() with event")); 207 KASSERT(LIST_EMPTY(&pp->consumers), 208 ("g_destroy_provider but attached")); 209 KASSERT (pp->acr == 0, ("g_destroy_provider with acr")); 210 KASSERT (pp->acw == 0, ("g_destroy_provider with acw")); 211 KASSERT (pp->acw == 0, ("g_destroy_provider with ace")); 212 g_nproviders--; 213 LIST_REMOVE(pp, provider); 214 gp = pp->geom; 215 g_free(pp); 216 if (!(gp->flags & G_GEOM_WITHER)) 217 return; 218 if (!LIST_EMPTY(&gp->provider)) 219 return; 220 for (;;) { 221 cp = LIST_FIRST(&gp->consumer); 222 if (cp == NULL) 223 break; 224 g_dettach(cp); 225 g_destroy_consumer(cp); 226 } 227 g_destroy_geom(gp); 228} 229 230/* 231 * We keep the "geoms" list sorted by topological order (== increasing 232 * numerical rank) at all times. 233 * When an attach is done, the attaching geoms rank is invalidated 234 * and it is moved to the tail of the list. 235 * All geoms later in the sequence has their ranks reevaluated in 236 * sequence. If we cannot assign rank to a geom because it's 237 * prerequisites do not have rank, we move that element to the tail 238 * of the sequence with invalid rank as well. 239 * At some point we encounter our original geom and if we stil fail 240 * to assign it a rank, there must be a loop and we fail back to 241 * g_attach() which dettach again and calls redo_rank again 242 * to fix up the damage. 243 * It would be much simpler code wise to do it recursively, but we 244 * can't risk that on the kernel stack. 245 */ 246 247static int 248redo_rank(struct g_geom *gp) 249{ 250 struct g_consumer *cp; 251 struct g_geom *gp1, *gp2; 252 int n, m; 253 254 g_topology_assert(); 255 256 /* Invalidate this geoms rank and move it to the tail */ 257 gp1 = TAILQ_NEXT(gp, geoms); 258 if (gp1 != NULL) { 259 gp->rank = 0; 260 TAILQ_REMOVE(&geoms, gp, geoms); 261 TAILQ_INSERT_TAIL(&geoms, gp, geoms); 262 } else { 263 gp1 = gp; 264 } 265 266 /* re-rank the rest of the sequence */ 267 for (; gp1 != NULL; gp1 = gp2) { 268 gp1->rank = 0; 269 m = 1; 270 LIST_FOREACH(cp, &gp1->consumer, consumer) { 271 if (cp->provider == NULL) 272 continue; 273 n = cp->provider->geom->rank; 274 if (n == 0) { 275 m = 0; 276 break; 277 } else if (n >= m) 278 m = n + 1; 279 } 280 gp1->rank = m; 281 gp2 = TAILQ_NEXT(gp1, geoms); 282 283 /* got a rank, moving on */ 284 if (m != 0) 285 continue; 286 287 /* no rank to original geom means loop */ 288 if (gp == gp1) { 289 return (ELOOP); 290 291 /* no rank, put it at the end move on */ 292 TAILQ_REMOVE(&geoms, gp1, geoms); 293 TAILQ_INSERT_TAIL(&geoms, gp1, geoms); 294 } 295 } 296 return (0); 297} 298 299int 300g_attach(struct g_consumer *cp, struct g_provider *pp) 301{ 302 int error; 303 304 g_topology_assert(); 305 KASSERT(cp->provider == NULL, ("attach but attached")); 306 cp->provider = pp; 307 LIST_INSERT_HEAD(&pp->consumers, cp, consumers); 308 error = redo_rank(cp->geom); 309 if (error) { 310 LIST_REMOVE(cp, consumers); 311 cp->provider = NULL; 312 redo_rank(cp->geom); 313 } 314 return (error); 315} 316 317void 318g_dettach(struct g_consumer *cp) 319{ 320 struct g_provider *pp; 321 322 g_trace(G_T_TOPOLOGY, "g_dettach(%p)", cp); 323 KASSERT(cp != (void*)0xd0d0d0d0, ("ARGH!")); 324 g_topology_assert(); 325 KASSERT(cp->provider != NULL, ("dettach but not attached")); 326 KASSERT(cp->acr == 0, ("dettach but nonzero acr")); 327 KASSERT(cp->acw == 0, ("dettach but nonzero acw")); 328 KASSERT(cp->ace == 0, ("dettach but nonzero ace")); 329 KASSERT(cp->biocount == 0, ("dettach but nonzero biocount")); 330 pp = cp->provider; 331 LIST_REMOVE(cp, consumers); 332 cp->provider = NULL; 333 if (LIST_EMPTY(&pp->consumers)) { 334 if (pp->geom->flags & G_GEOM_WITHER) 335 g_destroy_provider(pp); 336 } 337 redo_rank(cp->geom); 338} 339 340 341/* 342 * g_access_abs() 343 * 344 * Access-check with absolute new values: Just fall through 345 * and use the relative version. 346 */ 347int 348g_access_abs(struct g_consumer *cp, int acr, int acw, int ace) 349{ 350 351 g_topology_assert(); 352 return(g_access_rel(cp, 353 acr - cp->acr, 354 acw - cp->acw, 355 ace - cp->ace)); 356} 357 358/* 359 * g_access_rel() 360 * 361 * Access-check with delta values. The question asked is "can provider 362 * "cp" change the access counters by the relative amounts dc[rwe] ?" 363 */ 364 365int 366g_access_rel(struct g_consumer *cp, int dcr, int dcw, int dce) 367{ 368 struct g_provider *pp; 369 int pr,pw,pe; 370 int error; 371 372 pp = cp->provider; 373 374 g_trace(G_T_ACCESS, "g_access_rel(%p(%s), %d, %d, %d)", 375 cp, pp->name, dcr, dcw, dce); 376 377 g_topology_assert(); 378 KASSERT(cp->provider != NULL, ("access but not attached")); 379 KASSERT(cp->acr + dcr >= 0, ("access resulting in negative acr")); 380 KASSERT(cp->acw + dcw >= 0, ("access resulting in negative acw")); 381 KASSERT(cp->ace + dce >= 0, ("access resulting in negative ace")); 382 KASSERT(pp->geom->access != NULL, ("NULL geom->access")); 383 384 /* 385 * If our class cares about being spoiled, and we have been, we 386 * are probably just ahead of the event telling us that. Fail 387 * now rather than having to unravel this later. 388 */ 389 if (cp->geom->spoiled != NULL && cp->spoiled) { 390 KASSERT(dcr >= 0, ("spoiled but dcr = %d", dcr)); 391 KASSERT(dcw >= 0, ("spoiled but dce = %d", dcw)); 392 KASSERT(dce >= 0, ("spoiled but dcw = %d", dce)); 393 KASSERT(cp->acr == 0, ("spoiled but cp->acr = %d", cp->acr)); 394 KASSERT(cp->acw == 0, ("spoiled but cp->acw = %d", cp->acw)); 395 KASSERT(cp->ace == 0, ("spoiled but cp->ace = %d", cp->ace)); 396 return(ENXIO); 397 } 398 399 /* 400 * Figure out what counts the provider would have had, if this 401 * consumer had (r0w0e0) at this time. 402 */ 403 pr = pp->acr - cp->acr; 404 pw = pp->acw - cp->acw; 405 pe = pp->ace - cp->ace; 406 407 g_trace(G_T_ACCESS, 408 "open delta:[r%dw%de%d] old:[r%dw%de%d] provider:[r%dw%de%d] %p(%s)", 409 dcr, dcw, dce, 410 cp->acr, cp->acw, cp->ace, 411 pp->acr, pp->acw, pp->ace, 412 pp, pp->name); 413 414 /* If we try exclusive but already write: fail */ 415 if (dce > 0 && pw > 0) 416 return (EPERM); 417 /* If we try write but already exclusive: fail */ 418 if (dcw > 0 && pe > 0) 419 return (EPERM); 420 /* If we try to open more but provider is error'ed: fail */ 421 if ((dcr > 0 || dcw > 0 || dce > 0) && pp->error != 0) 422 return (pp->error); 423 424 /* Ok then... */ 425 426 /* 427 * If we open first write, spoil any partner consumers. 428 * If we close last write, trigger re-taste. 429 */ 430 if (pp->acw == 0 && dcw != 0) 431 g_spoil(pp, cp); 432 else if (pp->acw != 0 && pp->acw == -dcw && !(pp->geom->flags & G_GEOM_WITHER)) 433 g_post_event(EV_NEW_PROVIDER, NULL, NULL, pp, NULL); 434 435 error = pp->geom->access(pp, dcr, dcw, dce); 436 if (!error) { 437 pp->acr += dcr; 438 pp->acw += dcw; 439 pp->ace += dce; 440 cp->acr += dcr; 441 cp->acw += dcw; 442 cp->ace += dce; 443 } 444 return (error); 445} 446 447int 448g_haveattr_int(struct bio *bp, char *attribute, int val) 449{ 450 451 return (g_haveattr(bp, attribute, &val, sizeof val)); 452} 453 454int 455g_haveattr_off_t(struct bio *bp, char *attribute, off_t val) 456{ 457 458 return (g_haveattr(bp, attribute, &val, sizeof val)); 459} 460 461 462int 463g_haveattr(struct bio *bp, char *attribute, void *val, int len) 464{ 465 int error; 466 467 if (strcmp(bp->bio_attribute, attribute)) 468 return (0); 469 if (bp->bio_length != len) { 470 printf("bio_length %lld len %d -> EFAULT\n", 471 (long long)bp->bio_length, len); 472 error = EFAULT; 473 } else { 474 error = 0; 475 bcopy(val, bp->bio_data, len); 476 bp->bio_completed = len; 477 } 478 bp->bio_error = error; 479 g_io_deliver(bp); 480 return (1); 481} 482 483int 484g_std_access(struct g_provider *pp __unused, 485 int dr __unused, int dw __unused, int de __unused) 486{ 487 488 return (0); 489} 490 491void 492g_std_done(struct bio *bp) 493{ 494 struct bio *bp2; 495 496 bp2 = bp->bio_linkage; 497 bp2->bio_error = bp->bio_error; 498 bp2->bio_completed = bp->bio_completed; 499 g_destroy_bio(bp); 500 g_io_deliver(bp2); 501} 502 503/* XXX: maybe this is only g_slice_spoiled */ 504 505void 506g_std_spoiled(struct g_consumer *cp) 507{ 508 struct g_geom *gp; 509 struct g_provider *pp; 510 511 g_trace(G_T_TOPOLOGY, "g_std_spoiled(%p)", cp); 512 g_topology_assert(); 513 g_dettach(cp); 514 gp = cp->geom; 515 LIST_FOREACH(pp, &gp->provider, provider) 516 g_orphan_provider(pp, ENXIO); 517 g_destroy_consumer(cp); 518 if (LIST_EMPTY(&gp->provider) && LIST_EMPTY(&gp->consumer)) 519 g_destroy_geom(gp); 520 else 521 gp->flags |= G_GEOM_WITHER; 522} 523 524/* 525 * Spoiling happens when a provider is opened for writing, but consumers 526 * which are configured by in-band data are attached (slicers for instance). 527 * Since the write might potentially change the in-band data, such consumers 528 * need to re-evaluate their existence after the writing session closes. 529 * We do this by (offering to) tear them down when the open for write happens 530 * in return for a re-taste when it closes again. 531 * Together with the fact that such consumers grab an 'e' bit whenever they 532 * are open, regardless of mode, this ends up DTRT. 533 */ 534 535void 536g_spoil(struct g_provider *pp, struct g_consumer *cp) 537{ 538 struct g_consumer *cp2; 539 540 g_topology_assert(); 541 542 LIST_FOREACH(cp2, &pp->consumers, consumers) { 543 if (cp2 == cp) 544 continue; 545/* 546 KASSERT(cp2->acr == 0, ("spoiling cp->acr = %d", cp2->acr)); 547 KASSERT(cp2->acw == 0, ("spoiling cp->acw = %d", cp2->acw)); 548*/ 549 KASSERT(cp2->ace == 0, ("spoiling cp->ace = %d", cp2->ace)); 550 cp2->spoiled++; 551 } 552 g_post_event(EV_SPOILED, NULL, NULL, pp, cp); 553} 554 555static struct g_class * 556g_class_by_name(char *name) 557{ 558 struct g_class *mp; 559 560 g_trace(G_T_TOPOLOGY, "g_class_by_name(%s)", name); 561 g_topology_assert(); 562 LIST_FOREACH(mp, &g_classes, class) 563 if (!strcmp(mp->name, name)) 564 return (mp); 565 return (NULL); 566} 567 568struct g_geom * 569g_create_geomf(char *class, struct g_provider *pp, char *fmt, ...) 570{ 571 va_list ap; 572 struct sbuf *sb; 573 char *s; 574 struct g_class *mp; 575 struct g_geom *gp; 576 577 g_trace(G_T_TOPOLOGY, "g_create_geom(%s, %p(%s))", class, 578 pp, pp == NULL ? "" : pp->name); 579 g_topology_assert(); 580 gp = NULL; 581 mp = g_class_by_name(class); 582 if (mp == NULL) 583 return (NULL); 584 if (fmt != NULL) { 585 va_start(ap, fmt); 586 mtx_lock(&Giant); 587 sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND); 588 sbuf_vprintf(sb, fmt, ap); 589 sbuf_finish(sb); 590 mtx_unlock(&Giant); 591 s = sbuf_data(sb); 592 } else { 593 s = NULL; 594 } 595 if (pp != NULL) 596 gp = mp->taste(mp, pp, G_TF_INSIST); 597 if (gp == NULL && mp->create_geom == NULL) 598 return (NULL); 599 if (gp == NULL) 600 gp = mp->create_geom(mp, pp, s); 601 /* XXX: delete sbuf */ 602 return (gp); 603} 604 605struct g_geom * 606g_insert_geom(char *class, struct g_consumer *cp) 607{ 608 struct g_class *mp; 609 struct g_geom *gp; 610 struct g_provider *pp, *pp2; 611 struct g_consumer *cp2; 612 int error; 613 614 g_trace(G_T_TOPOLOGY, "g_insert_geomf(%s, %p)", class, cp); 615 g_topology_assert(); 616 KASSERT(cp->provider != NULL, ("g_insert_geomf but not attached")); 617 /* XXX: check for events ?? */ 618 mp = g_class_by_name(class); 619 if (mp == NULL) 620 return (NULL); 621 if (mp->create_geom == NULL) 622 return (NULL); 623 pp = cp->provider; 624 gp = mp->taste(mp, pp, G_TF_TRANSPARENT); 625 if (gp == NULL) 626 return (NULL); 627 pp2 = LIST_FIRST(&gp->provider); 628 cp2 = LIST_FIRST(&gp->consumer); 629 cp2->acr += pp->acr; 630 cp2->acw += pp->acw; 631 cp2->ace += pp->ace; 632 pp2->acr += pp->acr; 633 pp2->acw += pp->acw; 634 pp2->ace += pp->ace; 635 LIST_REMOVE(cp, consumers); 636 LIST_INSERT_HEAD(&pp2->consumers, cp, consumers); 637 cp->provider = pp2; 638 error = redo_rank(gp); 639 KASSERT(error == 0, ("redo_rank failed in g_insert_geom")); 640 return (gp); 641} 642 643int 644g_getattr__(const char *attr, struct g_consumer *cp, void *var, int len) 645{ 646 int error, i; 647 648 i = len; 649 error = g_io_getattr(attr, cp, &i, var); 650 if (error) 651 return (error); 652 if (i != len) 653 return (EINVAL); 654 return (0); 655} 656