geom_subr.c revision 93774
1/*- 2 * Copyright (c) 2002 Poul-Henning Kamp 3 * Copyright (c) 2002 Networks Associates Technology, Inc. 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7 * and NAI Labs, the Security Research Division of Network Associates, Inc. 8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9 * DARPA CHATS research program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. The names of the authors may not be used to endorse or promote 20 * products derived from this software without specific prior written 21 * permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * $FreeBSD: head/sys/geom/geom_subr.c 93774 2002-04-04 09:41:47Z phk $ 36 */ 37 38 39#include <sys/param.h> 40#ifndef _KERNEL 41#include <stdio.h> 42#include <unistd.h> 43#include <stdlib.h> 44#include <signal.h> 45#include <string.h> 46#include <err.h> 47#else 48#include <sys/systm.h> 49#include <sys/kernel.h> 50#include <sys/malloc.h> 51#include <sys/bio.h> 52#include <sys/sysctl.h> 53#include <sys/proc.h> 54#include <sys/kthread.h> 55#include <sys/lock.h> 56#include <sys/mutex.h> 57#endif 58#include <sys/errno.h> 59#include <sys/sbuf.h> 60#include <geom/geom.h> 61#include <geom/geom_int.h> 62#include <machine/stdarg.h> 63 64struct class_list_head g_classes = LIST_HEAD_INITIALIZER(g_classes); 65static struct g_tailq_head geoms = TAILQ_HEAD_INITIALIZER(geoms); 66static int g_nproviders; 67char *g_wait_event, *g_wait_up, *g_wait_down, *g_wait_sim; 68 69static int g_ignition; 70 71void 72g_add_class(struct g_class *mp) 73{ 74 75 if (!g_ignition) { 76 g_ignition++; 77 g_init(); 78 } 79 g_topology_lock(); 80 g_trace(G_T_TOPOLOGY, "g_add_class(%s)", mp->name); 81 LIST_INIT(&mp->geom); 82 LIST_INSERT_HEAD(&g_classes, mp, class); 83 if (g_nproviders > 0) 84 g_post_event(EV_NEW_CLASS, mp, NULL, NULL, NULL); 85 g_topology_unlock(); 86} 87 88struct g_geom * 89g_new_geomf(struct g_class *mp, char *fmt, ...) 90{ 91 struct g_geom *gp; 92 va_list ap; 93 struct sbuf *sb; 94 95 g_topology_assert(); 96 va_start(ap, fmt); 97 mtx_lock(&Giant); 98 sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND); 99 sbuf_vprintf(sb, fmt, ap); 100 sbuf_finish(sb); 101 mtx_unlock(&Giant); 102 gp = g_malloc(sizeof *gp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO); 103 gp->name = (char *)(gp + 1); 104 gp->class = mp; 105 gp->rank = 1; 106 LIST_INIT(&gp->consumer); 107 LIST_INIT(&gp->provider); 108 LIST_INSERT_HEAD(&mp->geom, gp, geom); 109 TAILQ_INSERT_HEAD(&geoms, gp, geoms); 110 strcpy(gp->name, sbuf_data(sb)); 111 sbuf_delete(sb); 112 return (gp); 113} 114 115void 116g_destroy_geom(struct g_geom *gp) 117{ 118 119 g_trace(G_T_TOPOLOGY, "g_destroy_geom(%p(%s))", gp, gp->name); 120 g_topology_assert(); 121 KASSERT(gp->event == NULL, ("g_destroy_geom() with event")); 122 KASSERT(LIST_EMPTY(&gp->consumer), 123 ("g_destroy_geom(%s) with consumer(s) [%p]", 124 gp->name, LIST_FIRST(&gp->consumer))); 125 KASSERT(LIST_EMPTY(&gp->provider), 126 ("g_destroy_geom(%s) with provider(s) [%p]", 127 gp->name, LIST_FIRST(&gp->consumer))); 128 LIST_REMOVE(gp, geom); 129 TAILQ_REMOVE(&geoms, gp, geoms); 130 g_free(gp); 131} 132 133struct g_consumer * 134g_new_consumer(struct g_geom *gp) 135{ 136 struct g_consumer *cp; 137 138 g_topology_assert(); 139 KASSERT(gp->class->orphan != NULL, 140 ("g_new_consumer on class(%s) without orphan", gp->class->name)); 141 142 cp = g_malloc(sizeof *cp, M_WAITOK | M_ZERO); 143 cp->geom = gp; 144 LIST_INSERT_HEAD(&gp->consumer, cp, consumer); 145 return(cp); 146} 147 148void 149g_destroy_consumer(struct g_consumer *cp) 150{ 151 152 g_trace(G_T_TOPOLOGY, "g_destroy_consumer(%p)", cp); 153 g_topology_assert(); 154 KASSERT(cp->event == NULL, ("g_destroy_consumer() with event")); 155 KASSERT (cp->provider == NULL, ("g_destroy_consumer but attached")); 156 KASSERT (cp->acr == 0, ("g_destroy_consumer with acr")); 157 KASSERT (cp->acw == 0, ("g_destroy_consumer with acw")); 158 KASSERT (cp->ace == 0, ("g_destroy_consumer with ace")); 159 LIST_REMOVE(cp, consumer); 160 g_free(cp); 161} 162 163struct g_provider * 164g_new_providerf(struct g_geom *gp, char *fmt, ...) 165{ 166 struct g_provider *pp; 167 struct sbuf *sb; 168 va_list ap; 169 170 g_topology_assert(); 171 va_start(ap, fmt); 172 mtx_lock(&Giant); 173 sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND); 174 sbuf_vprintf(sb, fmt, ap); 175 sbuf_finish(sb); 176 mtx_unlock(&Giant); 177 pp = g_malloc(sizeof *pp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO); 178 pp->name = (char *)(pp + 1); 179 strcpy(pp->name, sbuf_data(sb)); 180 sbuf_delete(sb); 181 LIST_INIT(&pp->consumers); 182 pp->error = ENXIO; 183 pp->geom = gp; 184 LIST_INSERT_HEAD(&gp->provider, pp, provider); 185 g_nproviders++; 186 g_post_event(EV_NEW_PROVIDER, NULL, NULL, pp, NULL); 187 return (pp); 188} 189 190void 191g_error_provider(struct g_provider *pp, int error) 192{ 193 194 pp->error = error; 195} 196 197 198void 199g_destroy_provider(struct g_provider *pp) 200{ 201 struct g_geom *gp; 202 struct g_consumer *cp; 203 204 g_topology_assert(); 205 KASSERT(pp->event == NULL, ("g_destroy_provider() with event")); 206 KASSERT(LIST_EMPTY(&pp->consumers), 207 ("g_destroy_provider but attached")); 208 KASSERT (pp->acr == 0, ("g_destroy_provider with acr")); 209 KASSERT (pp->acw == 0, ("g_destroy_provider with acw")); 210 KASSERT (pp->acw == 0, ("g_destroy_provider with ace")); 211 g_nproviders--; 212 LIST_REMOVE(pp, provider); 213 gp = pp->geom; 214 g_free(pp); 215 if (!(gp->flags & G_GEOM_WITHER)) 216 return; 217 if (!LIST_EMPTY(&gp->provider)) 218 return; 219 for (;;) { 220 cp = LIST_FIRST(&gp->consumer); 221 if (cp == NULL) 222 break; 223 g_dettach(cp); 224 g_destroy_consumer(cp); 225 } 226 g_destroy_geom(gp); 227} 228 229/* 230 * We keep the "geoms" list sorted by topological order (== increasing 231 * numerical rank) at all times. 232 * When an attach is done, the attaching geoms rank is invalidated 233 * and it is moved to the tail of the list. 234 * All geoms later in the sequence has their ranks reevaluated in 235 * sequence. If we cannot assign rank to a geom because it's 236 * prerequisites do not have rank, we move that element to the tail 237 * of the sequence with invalid rank as well. 238 * At some point we encounter our original geom and if we stil fail 239 * to assign it a rank, there must be a loop and we fail back to 240 * g_attach() which dettach again and calls redo_rank again 241 * to fix up the damage. 242 * It would be much simpler code wise to do it recursively, but we 243 * can't risk that on the kernel stack. 244 */ 245 246static int 247redo_rank(struct g_geom *gp) 248{ 249 struct g_consumer *cp; 250 struct g_geom *gp1, *gp2; 251 int n, m; 252 253 g_topology_assert(); 254 255 /* Invalidate this geoms rank and move it to the tail */ 256 gp1 = TAILQ_NEXT(gp, geoms); 257 if (gp1 != NULL) { 258 gp->rank = 0; 259 TAILQ_REMOVE(&geoms, gp, geoms); 260 TAILQ_INSERT_TAIL(&geoms, gp, geoms); 261 } else { 262 gp1 = gp; 263 } 264 265 /* re-rank the rest of the sequence */ 266 for (; gp1 != NULL; gp1 = gp2) { 267 gp1->rank = 0; 268 m = 1; 269 LIST_FOREACH(cp, &gp1->consumer, consumer) { 270 if (cp->provider == NULL) 271 continue; 272 n = cp->provider->geom->rank; 273 if (n == 0) { 274 m = 0; 275 break; 276 } else if (n >= m) 277 m = n + 1; 278 } 279 gp1->rank = m; 280 gp2 = TAILQ_NEXT(gp1, geoms); 281 282 /* got a rank, moving on */ 283 if (m != 0) 284 continue; 285 286 /* no rank to original geom means loop */ 287 if (gp == gp1) { 288 return (ELOOP); 289 290 /* no rank, put it at the end move on */ 291 TAILQ_REMOVE(&geoms, gp1, geoms); 292 TAILQ_INSERT_TAIL(&geoms, gp1, geoms); 293 } 294 } 295 return (0); 296} 297 298int 299g_attach(struct g_consumer *cp, struct g_provider *pp) 300{ 301 int error; 302 303 g_topology_assert(); 304 KASSERT(cp->provider == NULL, ("attach but attached")); 305 cp->provider = pp; 306 LIST_INSERT_HEAD(&pp->consumers, cp, consumers); 307 error = redo_rank(cp->geom); 308 if (error) { 309 LIST_REMOVE(cp, consumers); 310 cp->provider = NULL; 311 redo_rank(cp->geom); 312 } 313 return (error); 314} 315 316void 317g_dettach(struct g_consumer *cp) 318{ 319 struct g_provider *pp; 320 321 g_trace(G_T_TOPOLOGY, "g_dettach(%p)", cp); 322 KASSERT(cp != (void*)0xd0d0d0d0, ("ARGH!")); 323 g_topology_assert(); 324 KASSERT(cp->provider != NULL, ("dettach but not attached")); 325 KASSERT(cp->acr == 0, ("dettach but nonzero acr")); 326 KASSERT(cp->acw == 0, ("dettach but nonzero acw")); 327 KASSERT(cp->ace == 0, ("dettach but nonzero ace")); 328 KASSERT(cp->biocount == 0, ("dettach but nonzero biocount")); 329 pp = cp->provider; 330 LIST_REMOVE(cp, consumers); 331 cp->provider = NULL; 332 if (LIST_EMPTY(&pp->consumers)) { 333 if (pp->geom->flags & G_GEOM_WITHER) 334 g_destroy_provider(pp); 335 } 336 redo_rank(cp->geom); 337} 338 339 340/* 341 * g_access_abs() 342 * 343 * Access-check with absolute new values: Just fall through 344 * and use the relative version. 345 */ 346int 347g_access_abs(struct g_consumer *cp, int acr, int acw, int ace) 348{ 349 350 g_topology_assert(); 351 return(g_access_rel(cp, 352 acr - cp->acr, 353 acw - cp->acw, 354 ace - cp->ace)); 355} 356 357/* 358 * g_access_rel() 359 * 360 * Access-check with delta values. The question asked is "can provider 361 * "cp" change the access counters by the relative amounts dc[rwe] ?" 362 */ 363 364int 365g_access_rel(struct g_consumer *cp, int dcr, int dcw, int dce) 366{ 367 struct g_provider *pp; 368 int pr,pw,pe; 369 int error; 370 371 pp = cp->provider; 372 373 g_trace(G_T_ACCESS, "g_access_rel(%p(%s), %d, %d, %d)", 374 cp, pp->name, dcr, dcw, dce); 375 376 g_topology_assert(); 377 KASSERT(cp->provider != NULL, ("access but not attached")); 378 KASSERT(cp->acr + dcr >= 0, ("access resulting in negative acr")); 379 KASSERT(cp->acw + dcw >= 0, ("access resulting in negative acw")); 380 KASSERT(cp->ace + dce >= 0, ("access resulting in negative ace")); 381 KASSERT(pp->geom->class->access != NULL, ("NULL class->access")); 382 383 /* 384 * If our class cares about being spoiled, and we have been, we 385 * are probably just ahead of the event telling us that. Fail 386 * now rather than having to unravel this later. 387 */ 388 if (cp->geom->spoiled != NULL && cp->spoiled) { 389 KASSERT(dcr >= 0, ("spoiled but dcr = %d", dcr)); 390 KASSERT(dcw >= 0, ("spoiled but dce = %d", dcw)); 391 KASSERT(dce >= 0, ("spoiled but dcw = %d", dce)); 392 KASSERT(cp->acr == 0, ("spoiled but cp->acr = %d", cp->acr)); 393 KASSERT(cp->acw == 0, ("spoiled but cp->acw = %d", cp->acw)); 394 KASSERT(cp->ace == 0, ("spoiled but cp->ace = %d", cp->ace)); 395 return(ENXIO); 396 } 397 398 /* 399 * Figure out what counts the provider would have had, if this 400 * consumer had (r0w0e0) at this time. 401 */ 402 pr = pp->acr - cp->acr; 403 pw = pp->acw - cp->acw; 404 pe = pp->ace - cp->ace; 405 406 g_trace(G_T_ACCESS, 407 "open delta:[r%dw%de%d] old:[r%dw%de%d] provider:[r%dw%de%d] %p(%s)", 408 dcr, dcw, dce, 409 cp->acr, cp->acw, cp->ace, 410 pp->acr, pp->acw, pp->ace, 411 pp, pp->name); 412 413 /* If we try exclusive but already write: fail */ 414 if (dce > 0 && pw > 0) 415 return (EPERM); 416 /* If we try write but already exclusive: fail */ 417 if (dcw > 0 && pe > 0) 418 return (EPERM); 419 /* If we try to open more but provider is error'ed: fail */ 420 if ((dcr > 0 || dcw > 0 || dce > 0) && pp->error != 0) 421 return (pp->error); 422 423 /* Ok then... */ 424 425 /* 426 * If we open first write, spoil any partner consumers. 427 * If we close last write, trigger re-taste. 428 */ 429 if (pp->acw == 0 && dcw != 0) 430 g_spoil(pp, cp); 431 else if (pp->acw != 0 && pp->acw == -dcw && !(pp->geom->flags & G_GEOM_WITHER)) 432 g_post_event(EV_NEW_PROVIDER, NULL, NULL, pp, NULL); 433 434 error = pp->geom->class->access(pp, dcr, dcw, dce); 435 if (!error) { 436 pp->acr += dcr; 437 pp->acw += dcw; 438 pp->ace += dce; 439 cp->acr += dcr; 440 cp->acw += dcw; 441 cp->ace += dce; 442 } 443 return (error); 444} 445 446int 447g_haveattr_int(struct bio *bp, char *attribute, int val) 448{ 449 450 return (g_haveattr(bp, attribute, &val, sizeof val)); 451} 452 453int 454g_haveattr_off_t(struct bio *bp, char *attribute, off_t val) 455{ 456 457 return (g_haveattr(bp, attribute, &val, sizeof val)); 458} 459 460 461int 462g_haveattr(struct bio *bp, char *attribute, void *val, int len) 463{ 464 int error; 465 466 if (strcmp(bp->bio_attribute, attribute)) 467 return (0); 468 if (bp->bio_length != len) { 469 printf("bio_length %lld len %d -> EFAULT\n", 470 (long long)bp->bio_length, len); 471 error = EFAULT; 472 } else { 473 error = 0; 474 bcopy(val, bp->bio_data, len); 475 bp->bio_completed = len; 476 } 477 bp->bio_error = error; 478 g_io_deliver(bp); 479 return (1); 480} 481 482int 483g_std_access(struct g_provider *pp __unused, 484 int dr __unused, int dw __unused, int de __unused) 485{ 486 487 return (0); 488} 489 490void 491g_std_done(struct bio *bp) 492{ 493 struct bio *bp2; 494 495 bp2 = bp->bio_linkage; 496 bp2->bio_error = bp->bio_error; 497 bp2->bio_completed = bp->bio_completed; 498 g_destroy_bio(bp); 499 g_io_deliver(bp2); 500} 501 502/* XXX: maybe this is only g_slice_spoiled */ 503 504void 505g_std_spoiled(struct g_consumer *cp) 506{ 507 struct g_geom *gp; 508 struct g_provider *pp; 509 510 g_trace(G_T_TOPOLOGY, "g_std_spoiled(%p)", cp); 511 g_topology_assert(); 512 g_dettach(cp); 513 gp = cp->geom; 514 LIST_FOREACH(pp, &gp->provider, provider) 515 g_orphan_provider(pp, ENXIO); 516 g_destroy_consumer(cp); 517 if (LIST_EMPTY(&gp->provider) && LIST_EMPTY(&gp->consumer)) 518 g_destroy_geom(gp); 519 else 520 gp->flags |= G_GEOM_WITHER; 521} 522 523/* 524 * Spoiling happens when a provider is opened for writing, but consumers 525 * which are configured by in-band data are attached (slicers for instance). 526 * Since the write might potentially change the in-band data, such consumers 527 * need to re-evaluate their existence after the writing session closes. 528 * We do this by (offering to) tear them down when the open for write happens 529 * in return for a re-taste when it closes again. 530 * Together with the fact that such consumers grab an 'e' bit whenever they 531 * are open, regardless of mode, this ends up DTRT. 532 */ 533 534void 535g_spoil(struct g_provider *pp, struct g_consumer *cp) 536{ 537 struct g_consumer *cp2; 538 539 g_topology_assert(); 540 541 LIST_FOREACH(cp2, &pp->consumers, consumers) { 542 if (cp2 == cp) 543 continue; 544/* 545 KASSERT(cp2->acr == 0, ("spoiling cp->acr = %d", cp2->acr)); 546 KASSERT(cp2->acw == 0, ("spoiling cp->acw = %d", cp2->acw)); 547*/ 548 KASSERT(cp2->ace == 0, ("spoiling cp->ace = %d", cp2->ace)); 549 cp2->spoiled++; 550 } 551 g_post_event(EV_SPOILED, NULL, NULL, pp, cp); 552} 553 554static struct g_class * 555g_class_by_name(char *name) 556{ 557 struct g_class *mp; 558 559 g_trace(G_T_TOPOLOGY, "g_class_by_name(%s)", name); 560 g_topology_assert(); 561 LIST_FOREACH(mp, &g_classes, class) 562 if (!strcmp(mp->name, name)) 563 return (mp); 564 return (NULL); 565} 566 567struct g_geom * 568g_create_geomf(char *class, struct g_provider *pp, char *fmt, ...) 569{ 570 va_list ap; 571 struct sbuf *sb; 572 char *s; 573 struct g_class *mp; 574 struct g_geom *gp; 575 576 g_trace(G_T_TOPOLOGY, "g_create_geom(%s, %p(%s))", class, 577 pp, pp == NULL ? "" : pp->name); 578 g_topology_assert(); 579 gp = NULL; 580 mp = g_class_by_name(class); 581 if (mp == NULL) 582 return (NULL); 583 if (fmt != NULL) { 584 va_start(ap, fmt); 585 mtx_lock(&Giant); 586 sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND); 587 sbuf_vprintf(sb, fmt, ap); 588 sbuf_finish(sb); 589 mtx_unlock(&Giant); 590 s = sbuf_data(sb); 591 } else { 592 s = NULL; 593 } 594 if (pp != NULL) 595 gp = mp->taste(mp, pp, G_TF_INSIST); 596 if (gp == NULL && mp->create_geom == NULL) 597 return (NULL); 598 if (gp == NULL) 599 gp = mp->create_geom(mp, pp, s); 600 /* XXX: delete sbuf */ 601 return (gp); 602} 603 604struct g_geom * 605g_insert_geom(char *class, struct g_consumer *cp) 606{ 607 struct g_class *mp; 608 struct g_geom *gp; 609 struct g_provider *pp, *pp2; 610 struct g_consumer *cp2; 611 int error; 612 613 g_trace(G_T_TOPOLOGY, "g_insert_geomf(%s, %p)", class, cp); 614 g_topology_assert(); 615 KASSERT(cp->provider != NULL, ("g_insert_geomf but not attached")); 616 /* XXX: check for events ?? */ 617 mp = g_class_by_name(class); 618 if (mp == NULL) 619 return (NULL); 620 if (mp->create_geom == NULL) 621 return (NULL); 622 pp = cp->provider; 623 gp = mp->taste(mp, pp, G_TF_TRANSPARENT); 624 if (gp == NULL) 625 return (NULL); 626 pp2 = LIST_FIRST(&gp->provider); 627 cp2 = LIST_FIRST(&gp->consumer); 628 cp2->acr += pp->acr; 629 cp2->acw += pp->acw; 630 cp2->ace += pp->ace; 631 pp2->acr += pp->acr; 632 pp2->acw += pp->acw; 633 pp2->ace += pp->ace; 634 LIST_REMOVE(cp, consumers); 635 LIST_INSERT_HEAD(&pp2->consumers, cp, consumers); 636 cp->provider = pp2; 637 error = redo_rank(gp); 638 KASSERT(error == 0, ("redo_rank failed in g_insert_geom")); 639 return (gp); 640} 641 642