geom_bsd.c revision 139778
1/*- 2 * Copyright (c) 2002 Poul-Henning Kamp 3 * Copyright (c) 2002 Networks Associates Technology, Inc. 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7 * and NAI Labs, the Security Research Division of Network Associates, Inc. 8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9 * DARPA CHATS research program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. The names of the authors may not be used to endorse or promote 20 * products derived from this software without specific prior written 21 * permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36/* 37 * This is the method for dealing with BSD disklabels. It has been 38 * extensively (by my standards at least) commented, in the vain hope that 39 * it will serve as the source in future copy&paste operations. 40 */ 41 42#include <sys/cdefs.h> 43__FBSDID("$FreeBSD: head/sys/geom/geom_bsd.c 139778 2005-01-06 18:27:30Z imp $"); 44 45#include <sys/param.h> 46#include <sys/endian.h> 47#include <sys/systm.h> 48#include <sys/kernel.h> 49#include <sys/fcntl.h> 50#include <sys/conf.h> 51#include <sys/bio.h> 52#include <sys/malloc.h> 53#include <sys/lock.h> 54#include <sys/mutex.h> 55#include <sys/md5.h> 56#include <sys/errno.h> 57#include <sys/disklabel.h> 58#include <geom/geom.h> 59#include <geom/geom_slice.h> 60 61#define BSD_CLASS_NAME "BSD" 62 63#define ALPHA_LABEL_OFFSET 64 64 65#define LABELSIZE (148 + 16 * MAXPARTITIONS) 66 67static void g_bsd_hotwrite(void *arg, int flag); 68/* 69 * Our private data about one instance. All the rest is handled by the 70 * slice code and stored in its softc, so this is just the stuff 71 * specific to BSD disklabels. 72 */ 73struct g_bsd_softc { 74 off_t labeloffset; 75 off_t mbroffset; 76 off_t rawoffset; 77 struct disklabel ondisk; 78 u_char label[LABELSIZE]; 79 u_char labelsum[16]; 80}; 81 82/* 83 * Modify our slicer to match proposed disklabel, if possible. 84 * This is where we make sure we don't do something stupid. 85 */ 86static int 87g_bsd_modify(struct g_geom *gp, u_char *label) 88{ 89 int i, error; 90 struct partition *ppp; 91 struct g_slicer *gsp; 92 struct g_consumer *cp; 93 struct g_bsd_softc *ms; 94 u_int secsize, u; 95 off_t rawoffset, o; 96 struct disklabel dl; 97 MD5_CTX md5sum; 98 99 g_topology_assert(); 100 gsp = gp->softc; 101 ms = gsp->softc; 102 103 error = bsd_disklabel_le_dec(label, &dl, MAXPARTITIONS); 104 if (error) { 105 return (error); 106 } 107 108 /* Get dimensions of our device. */ 109 cp = LIST_FIRST(&gp->consumer); 110 secsize = cp->provider->sectorsize; 111 112 /* ... or a smaller sector size. */ 113 if (dl.d_secsize < secsize) { 114 return (EINVAL); 115 } 116 117 /* ... or a non-multiple sector size. */ 118 if (dl.d_secsize % secsize != 0) { 119 return (EINVAL); 120 } 121 122 /* Historical braindamage... */ 123 rawoffset = (off_t)dl.d_partitions[RAW_PART].p_offset * dl.d_secsize; 124 125 for (i = 0; i < dl.d_npartitions; i++) { 126 ppp = &dl.d_partitions[i]; 127 if (ppp->p_size == 0) 128 continue; 129 o = (off_t)ppp->p_offset * dl.d_secsize; 130 131 if (o < rawoffset) 132 rawoffset = 0; 133 } 134 135 if (rawoffset != 0 && (off_t)rawoffset != ms->mbroffset) 136 printf("WARNING: Expected rawoffset %jd, found %jd\n", 137 (intmax_t)ms->mbroffset/dl.d_secsize, 138 (intmax_t)rawoffset/dl.d_secsize); 139 140 /* Don't munge open partitions. */ 141 for (i = 0; i < dl.d_npartitions; i++) { 142 ppp = &dl.d_partitions[i]; 143 144 o = (off_t)ppp->p_offset * dl.d_secsize; 145 if (o == 0) 146 o = rawoffset; 147 error = g_slice_config(gp, i, G_SLICE_CONFIG_CHECK, 148 o - rawoffset, 149 (off_t)ppp->p_size * dl.d_secsize, 150 dl.d_secsize, 151 "%s%c", gp->name, 'a' + i); 152 if (error) 153 return (error); 154 } 155 156 /* Look good, go for it... */ 157 for (u = 0; u < gsp->nslice; u++) { 158 ppp = &dl.d_partitions[u]; 159 o = (off_t)ppp->p_offset * dl.d_secsize; 160 if (o == 0) 161 o = rawoffset; 162 g_slice_config(gp, u, G_SLICE_CONFIG_SET, 163 o - rawoffset, 164 (off_t)ppp->p_size * dl.d_secsize, 165 dl.d_secsize, 166 "%s%c", gp->name, 'a' + u); 167 } 168 169 /* Update our softc */ 170 ms->ondisk = dl; 171 if (label != ms->label) 172 bcopy(label, ms->label, LABELSIZE); 173 ms->rawoffset = rawoffset; 174 175 /* 176 * In order to avoid recursively attaching to the same 177 * on-disk label (it's usually visible through the 'c' 178 * partition) we calculate an MD5 and ask if other BSD's 179 * below us love that label. If they do, we don't. 180 */ 181 MD5Init(&md5sum); 182 MD5Update(&md5sum, ms->label, sizeof(ms->label)); 183 MD5Final(ms->labelsum, &md5sum); 184 185 return (0); 186} 187 188/* 189 * This is an internal helper function, called multiple times from the taste 190 * function to try to locate a disklabel on the disk. More civilized formats 191 * will not need this, as there is only one possible place on disk to look 192 * for the magic spot. 193 */ 194 195static int 196g_bsd_try(struct g_geom *gp, struct g_slicer *gsp, struct g_consumer *cp, int secsize, struct g_bsd_softc *ms, off_t offset) 197{ 198 int error; 199 u_char *buf; 200 struct disklabel *dl; 201 off_t secoff; 202 203 /* 204 * We need to read entire aligned sectors, and we assume that the 205 * disklabel does not span sectors, so one sector is enough. 206 */ 207 error = 0; 208 secoff = offset % secsize; 209 buf = g_read_data(cp, offset - secoff, secsize, &error); 210 if (buf == NULL || error != 0) 211 return (ENOENT); 212 213 /* Decode into our native format. */ 214 dl = &ms->ondisk; 215 error = bsd_disklabel_le_dec(buf + secoff, dl, MAXPARTITIONS); 216 if (!error) 217 bcopy(buf + secoff, ms->label, LABELSIZE); 218 219 /* Remember to free the buffer g_read_data() gave us. */ 220 g_free(buf); 221 222 ms->labeloffset = offset; 223 return (error); 224} 225 226/* 227 * This function writes the current label to disk, possibly updating 228 * the alpha SRM checksum. 229 */ 230 231static int 232g_bsd_writelabel(struct g_geom *gp, u_char *bootcode) 233{ 234 off_t secoff; 235 u_int secsize; 236 struct g_consumer *cp; 237 struct g_slicer *gsp; 238 struct g_bsd_softc *ms; 239 u_char *buf; 240 uint64_t sum; 241 int error, i; 242 243 gsp = gp->softc; 244 ms = gsp->softc; 245 cp = LIST_FIRST(&gp->consumer); 246 /* Get sector size, we need it to read data. */ 247 secsize = cp->provider->sectorsize; 248 secoff = ms->labeloffset % secsize; 249 if (bootcode == NULL) { 250 buf = g_read_data(cp, ms->labeloffset - secoff, secsize, &error); 251 if (buf == NULL || error != 0) 252 return (error); 253 bcopy(ms->label, buf + secoff, sizeof(ms->label)); 254 } else { 255 buf = bootcode; 256 bcopy(ms->label, buf + ms->labeloffset, sizeof(ms->label)); 257 } 258 if (ms->labeloffset == ALPHA_LABEL_OFFSET) { 259 sum = 0; 260 for (i = 0; i < 63; i++) 261 sum += le64dec(buf + i * 8); 262 le64enc(buf + 504, sum); 263 } 264 if (bootcode == NULL) { 265 error = g_write_data(cp, ms->labeloffset - secoff, buf, secsize); 266 g_free(buf); 267 } else { 268 error = g_write_data(cp, 0, bootcode, BBSIZE); 269 } 270 return(error); 271} 272 273/* 274 * If the user tries to overwrite our disklabel through an open partition 275 * or via a magicwrite config call, we end up here and try to prevent 276 * footshooting as best we can. 277 */ 278static void 279g_bsd_hotwrite(void *arg, int flag) 280{ 281 struct bio *bp; 282 struct g_geom *gp; 283 struct g_slicer *gsp; 284 struct g_slice *gsl; 285 struct g_bsd_softc *ms; 286 u_char *p; 287 int error; 288 289 g_topology_assert(); 290 /* 291 * We should never get canceled, because that would amount to a removal 292 * of the geom while there was outstanding I/O requests. 293 */ 294 KASSERT(flag != EV_CANCEL, ("g_bsd_hotwrite cancelled")); 295 bp = arg; 296 gp = bp->bio_to->geom; 297 gsp = gp->softc; 298 ms = gsp->softc; 299 gsl = &gsp->slices[bp->bio_to->index]; 300 p = (u_char*)bp->bio_data + ms->labeloffset 301 - (bp->bio_offset + gsl->offset); 302 error = g_bsd_modify(gp, p); 303 if (error) { 304 g_io_deliver(bp, EPERM); 305 return; 306 } 307 g_slice_finish_hot(bp); 308} 309 310/*- 311 * This start routine is only called for non-trivial requests, all the 312 * trivial ones are handled autonomously by the slice code. 313 * For requests we handle here, we must call the g_io_deliver() on the 314 * bio, and return non-zero to indicate to the slice code that we did so. 315 * This code executes in the "DOWN" I/O path, this means: 316 * * No sleeping. 317 * * Don't grab the topology lock. 318 * * Don't call biowait, g_getattr(), g_setattr() or g_read_data() 319 */ 320static int 321g_bsd_ioctl(struct g_provider *pp, u_long cmd, void *data, int fflag, struct thread *td) 322{ 323 struct g_geom *gp; 324 struct g_bsd_softc *ms; 325 struct g_slicer *gsp; 326 u_char *label; 327 int error; 328 329 gp = pp->geom; 330 gsp = gp->softc; 331 ms = gsp->softc; 332 333 switch(cmd) { 334 case DIOCGDINFO: 335 /* Return a copy of the disklabel to userland. */ 336 bsd_disklabel_le_dec(ms->label, data, MAXPARTITIONS); 337 return(0); 338 case DIOCBSDBB: { 339 struct g_consumer *cp; 340 u_char *buf; 341 void *p; 342 int error, i; 343 uint64_t sum; 344 345 if (!(fflag & FWRITE)) 346 return (EPERM); 347 /* The disklabel to set is the ioctl argument. */ 348 buf = g_malloc(BBSIZE, M_WAITOK); 349 p = *(void **)data; 350 error = copyin(p, buf, BBSIZE); 351 if (!error) { 352 /* XXX: Rude, but supposedly safe */ 353 DROP_GIANT(); 354 g_topology_lock(); 355 /* Validate and modify our slice instance to match. */ 356 error = g_bsd_modify(gp, buf + ms->labeloffset); 357 if (!error) { 358 cp = LIST_FIRST(&gp->consumer); 359 if (ms->labeloffset == ALPHA_LABEL_OFFSET) { 360 sum = 0; 361 for (i = 0; i < 63; i++) 362 sum += le64dec(buf + i * 8); 363 le64enc(buf + 504, sum); 364 } 365 error = g_write_data(cp, 0, buf, BBSIZE); 366 } 367 g_topology_unlock(); 368 PICKUP_GIANT(); 369 } 370 g_free(buf); 371 return (error); 372 } 373 case DIOCSDINFO: 374 case DIOCWDINFO: { 375 label = g_malloc(LABELSIZE, M_WAITOK); 376 377 if (!(fflag & FWRITE)) 378 return (EPERM); 379 /* The disklabel to set is the ioctl argument. */ 380 bsd_disklabel_le_enc(label, data); 381 382 DROP_GIANT(); 383 g_topology_lock(); 384 /* Validate and modify our slice instance to match. */ 385 error = g_bsd_modify(gp, label); 386 if (error == 0 && cmd == DIOCWDINFO) 387 error = g_bsd_writelabel(gp, NULL); 388 g_topology_unlock(); 389 PICKUP_GIANT(); 390 g_free(label); 391 return(error); 392 } 393 default: 394 return (ENOIOCTL); 395 } 396} 397 398static int 399g_bsd_start(struct bio *bp) 400{ 401 struct g_geom *gp; 402 struct g_bsd_softc *ms; 403 struct g_slicer *gsp; 404 405 gp = bp->bio_to->geom; 406 gsp = gp->softc; 407 ms = gsp->softc; 408 if (bp->bio_cmd == BIO_GETATTR) { 409 if (g_handleattr(bp, "BSD::labelsum", ms->labelsum, 410 sizeof(ms->labelsum))) 411 return (1); 412 } 413 return (0); 414} 415 416/* 417 * Dump configuration information in XML format. 418 * Notice that the function is called once for the geom and once for each 419 * consumer and provider. We let g_slice_dumpconf() do most of the work. 420 */ 421static void 422g_bsd_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp) 423{ 424 struct g_bsd_softc *ms; 425 struct g_slicer *gsp; 426 427 gsp = gp->softc; 428 ms = gsp->softc; 429 g_slice_dumpconf(sb, indent, gp, cp, pp); 430 if (indent != NULL && pp == NULL && cp == NULL) { 431 sbuf_printf(sb, "%s<labeloffset>%jd</labeloffset>\n", 432 indent, (intmax_t)ms->labeloffset); 433 sbuf_printf(sb, "%s<rawoffset>%jd</rawoffset>\n", 434 indent, (intmax_t)ms->rawoffset); 435 sbuf_printf(sb, "%s<mbroffset>%jd</mbroffset>\n", 436 indent, (intmax_t)ms->mbroffset); 437 } else if (pp != NULL) { 438 if (indent == NULL) 439 sbuf_printf(sb, " ty %d", 440 ms->ondisk.d_partitions[pp->index].p_fstype); 441 else 442 sbuf_printf(sb, "%s<type>%d</type>\n", indent, 443 ms->ondisk.d_partitions[pp->index].p_fstype); 444 } 445} 446 447/* 448 * The taste function is called from the event-handler, with the topology 449 * lock already held and a provider to examine. The flags are unused. 450 * 451 * If flags == G_TF_NORMAL, the idea is to take a bite of the provider and 452 * if we find valid, consistent magic on it, build a geom on it. 453 * any magic bits which indicate that we should automatically put a BSD 454 * geom on it. 455 * 456 * There may be cases where the operator would like to put a BSD-geom on 457 * providers which do not meet all of the requirements. This can be done 458 * by instead passing the G_TF_INSIST flag, which will override these 459 * checks. 460 * 461 * The final flags value is G_TF_TRANSPARENT, which instructs the method 462 * to put a geom on top of the provider and configure it to be as transparent 463 * as possible. This is not really relevant to the BSD method and therefore 464 * not implemented here. 465 */ 466 467static struct g_geom * 468g_bsd_taste(struct g_class *mp, struct g_provider *pp, int flags) 469{ 470 struct g_geom *gp; 471 struct g_consumer *cp; 472 int error, i; 473 struct g_bsd_softc *ms; 474 u_int secsize; 475 struct g_slicer *gsp; 476 u_char hash[16]; 477 MD5_CTX md5sum; 478 479 g_trace(G_T_TOPOLOGY, "bsd_taste(%s,%s)", mp->name, pp->name); 480 g_topology_assert(); 481 482 /* We don't implement transparent inserts. */ 483 if (flags == G_TF_TRANSPARENT) 484 return (NULL); 485 486 /* 487 * BSD labels are a subclass of the general "slicing" topology so 488 * a lot of the work can be done by the common "slice" code. 489 * Create a geom with space for MAXPARTITIONS providers, one consumer 490 * and a softc structure for us. Specify the provider to attach 491 * the consumer to and our "start" routine for special requests. 492 * The provider is opened with mode (1,0,0) so we can do reads 493 * from it. 494 */ 495 gp = g_slice_new(mp, MAXPARTITIONS, pp, &cp, &ms, 496 sizeof(*ms), g_bsd_start); 497 if (gp == NULL) 498 return (NULL); 499 500 /* Get the geom_slicer softc from the geom. */ 501 gsp = gp->softc; 502 503 /* 504 * The do...while loop here allows us to have multiple escapes 505 * using a simple "break". This improves code clarity without 506 * ending up in deep nesting and without using goto or come from. 507 */ 508 do { 509 /* 510 * If the provider is an MBR we will only auto attach 511 * to type 165 slices in the G_TF_NORMAL case. We will 512 * attach to any other type. 513 */ 514 error = g_getattr("MBR::type", cp, &i); 515 if (!error) { 516 if (i != 165 && flags == G_TF_NORMAL) 517 break; 518 error = g_getattr("MBR::offset", cp, &ms->mbroffset); 519 if (error) 520 break; 521 } 522 523 /* Same thing if we are inside a PC98 */ 524 error = g_getattr("PC98::type", cp, &i); 525 if (!error) { 526 if (i != 0xc494 && flags == G_TF_NORMAL) 527 break; 528 error = g_getattr("PC98::offset", cp, &ms->mbroffset); 529 if (error) 530 break; 531 } 532 533 /* Get sector size, we need it to read data. */ 534 secsize = cp->provider->sectorsize; 535 if (secsize < 512) 536 break; 537 538 /* First look for a label at the start of the second sector. */ 539 error = g_bsd_try(gp, gsp, cp, secsize, ms, secsize); 540 541 /* Next, look for alpha labels */ 542 if (error) 543 error = g_bsd_try(gp, gsp, cp, secsize, ms, 544 ALPHA_LABEL_OFFSET); 545 546 /* If we didn't find a label, punt. */ 547 if (error) 548 break; 549 550 /* 551 * In order to avoid recursively attaching to the same 552 * on-disk label (it's usually visible through the 'c' 553 * partition) we calculate an MD5 and ask if other BSD's 554 * below us love that label. If they do, we don't. 555 */ 556 MD5Init(&md5sum); 557 MD5Update(&md5sum, ms->label, sizeof(ms->label)); 558 MD5Final(ms->labelsum, &md5sum); 559 560 error = g_getattr("BSD::labelsum", cp, &hash); 561 if (!error && !bcmp(ms->labelsum, hash, sizeof(hash))) 562 break; 563 564 /* 565 * Process the found disklabel, and modify our "slice" 566 * instance to match it, if possible. 567 */ 568 error = g_bsd_modify(gp, ms->label); 569 } while (0); 570 571 /* Success or failure, we can close our provider now. */ 572 g_access(cp, -1, 0, 0); 573 574 /* If we have configured any providers, return the new geom. */ 575 if (gsp->nprovider > 0) { 576 g_slice_conf_hot(gp, 0, ms->labeloffset, LABELSIZE, 577 G_SLICE_HOT_ALLOW, G_SLICE_HOT_DENY, G_SLICE_HOT_CALL); 578 gsp->hot = g_bsd_hotwrite; 579 return (gp); 580 } 581 /* 582 * ...else push the "self-destruct" button, by spoiling our own 583 * consumer. This triggers a call to g_slice_spoiled which will 584 * dismantle what was setup. 585 */ 586 g_slice_spoiled(cp); 587 return (NULL); 588} 589 590struct h0h0 { 591 struct g_geom *gp; 592 struct g_bsd_softc *ms; 593 u_char *label; 594 int error; 595}; 596 597static void 598g_bsd_callconfig(void *arg, int flag) 599{ 600 struct h0h0 *hp; 601 602 hp = arg; 603 hp->error = g_bsd_modify(hp->gp, hp->label); 604 if (!hp->error) 605 hp->error = g_bsd_writelabel(hp->gp, NULL); 606} 607 608/* 609 * NB! curthread is user process which GCTL'ed. 610 */ 611static void 612g_bsd_config(struct gctl_req *req, struct g_class *mp, char const *verb) 613{ 614 u_char *label; 615 int error; 616 struct h0h0 h0h0; 617 struct g_geom *gp; 618 struct g_slicer *gsp; 619 struct g_consumer *cp; 620 struct g_bsd_softc *ms; 621 622 g_topology_assert(); 623 gp = gctl_get_geom(req, mp, "geom"); 624 if (gp == NULL) 625 return; 626 cp = LIST_FIRST(&gp->consumer); 627 gsp = gp->softc; 628 ms = gsp->softc; 629 if (!strcmp(verb, "read mbroffset")) { 630 gctl_set_param(req, "mbroffset", 631 &ms->mbroffset, sizeof(ms->mbroffset)); 632 return; 633 } else if (!strcmp(verb, "write label")) { 634 label = gctl_get_paraml(req, "label", LABELSIZE); 635 if (label == NULL) 636 return; 637 h0h0.gp = gp; 638 h0h0.ms = gsp->softc; 639 h0h0.label = label; 640 h0h0.error = -1; 641 /* XXX: Does this reference register with our selfdestruct code ? */ 642 error = g_access(cp, 1, 1, 1); 643 if (error) { 644 gctl_error(req, "could not access consumer"); 645 return; 646 } 647 g_bsd_callconfig(&h0h0, 0); 648 error = h0h0.error; 649 g_access(cp, -1, -1, -1); 650 } else if (!strcmp(verb, "write bootcode")) { 651 label = gctl_get_paraml(req, "bootcode", BBSIZE); 652 if (label == NULL) 653 return; 654 /* XXX: Does this reference register with our selfdestruct code ? */ 655 error = g_access(cp, 1, 1, 1); 656 if (error) { 657 gctl_error(req, "could not access consumer"); 658 return; 659 } 660 error = g_bsd_writelabel(gp, label); 661 g_access(cp, -1, -1, -1); 662 } else { 663 gctl_error(req, "Unknown verb parameter"); 664 } 665 666 return; 667} 668 669/* Finally, register with GEOM infrastructure. */ 670static struct g_class g_bsd_class = { 671 .name = BSD_CLASS_NAME, 672 .version = G_VERSION, 673 .taste = g_bsd_taste, 674 .ctlreq = g_bsd_config, 675 .dumpconf = g_bsd_dumpconf, 676 .ioctl = g_bsd_ioctl, 677}; 678 679DECLARE_GEOM_CLASS(g_bsd_class, g_bsd); 680