geom_bsd.c revision 112988
1/*- 2 * Copyright (c) 2002 Poul-Henning Kamp 3 * Copyright (c) 2002 Networks Associates Technology, Inc. 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7 * and NAI Labs, the Security Research Division of Network Associates, Inc. 8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9 * DARPA CHATS research program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. The names of the authors may not be used to endorse or promote 20 * products derived from this software without specific prior written 21 * permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * $FreeBSD: head/sys/geom/geom_bsd.c 112988 2003-04-02 20:41:18Z phk $ 36 * 37 * This is the method for dealing with BSD disklabels. It has been 38 * extensively (by my standards at least) commented, in the vain hope that 39 * it will serve as the source in future copy&paste operations. 40 */ 41 42#include <sys/param.h> 43#ifndef _KERNEL 44#include <stdio.h> 45#include <string.h> 46#include <stdlib.h> 47#include <signal.h> 48#include <err.h> 49#else 50#include <sys/systm.h> 51#include <sys/kernel.h> 52#include <sys/conf.h> 53#include <sys/bio.h> 54#include <sys/malloc.h> 55#include <sys/lock.h> 56#include <sys/mutex.h> 57#endif 58#include <sys/md5.h> 59#include <sys/errno.h> 60#include <sys/disklabel.h> 61#include <geom/geom.h> 62#include <geom/geom_slice.h> 63 64#define BSD_CLASS_NAME "BSD" 65 66#define ALPHA_LABEL_OFFSET 64 67 68/* 69 * Our private data about one instance. All the rest is handled by the 70 * slice code and stored in its softc, so this is just the stuff 71 * specific to BSD disklabels. 72 */ 73struct g_bsd_softc { 74 off_t labeloffset; 75 off_t mbroffset; 76 off_t rawoffset; 77 struct disklabel ondisk; 78 struct disklabel inram; 79 u_char labelsum[16]; 80}; 81 82/* 83 * The next 4 functions isolate us from how the compiler lays out and pads 84 * "struct disklabel". We treat what we read from disk as a bytestream and 85 * explicitly convert it into a struct disklabel. This makes us compiler- 86 * endianness- and wordsize- agnostic. 87 * For now we only have little-endian formats to deal with. 88 */ 89 90static void 91g_bsd_ledec_partition(u_char *ptr, struct partition *d) 92{ 93 d->p_size = g_dec_le4(ptr + 0); 94 d->p_offset = g_dec_le4(ptr + 4); 95 d->p_fsize = g_dec_le4(ptr + 8); 96 d->p_fstype = ptr[12]; 97 d->p_frag = ptr[13]; 98 d->p_cpg = g_dec_le2(ptr + 14); 99} 100 101static void 102g_bsd_ledec_disklabel(u_char *ptr, struct disklabel *d) 103{ 104 int i; 105 106 d->d_magic = g_dec_le4(ptr + 0); 107 d->d_type = g_dec_le2(ptr + 4); 108 d->d_subtype = g_dec_le2(ptr + 6); 109 bcopy(ptr + 8, d->d_typename, 16); 110 bcopy(ptr + 24, d->d_packname, 16); 111 d->d_secsize = g_dec_le4(ptr + 40); 112 d->d_nsectors = g_dec_le4(ptr + 44); 113 d->d_ntracks = g_dec_le4(ptr + 48); 114 d->d_ncylinders = g_dec_le4(ptr + 52); 115 d->d_secpercyl = g_dec_le4(ptr + 56); 116 d->d_secperunit = g_dec_le4(ptr + 60); 117 d->d_sparespertrack = g_dec_le2(ptr + 64); 118 d->d_sparespercyl = g_dec_le2(ptr + 66); 119 d->d_acylinders = g_dec_le4(ptr + 68); 120 d->d_rpm = g_dec_le2(ptr + 72); 121 d->d_interleave = g_dec_le2(ptr + 74); 122 d->d_trackskew = g_dec_le2(ptr + 76); 123 d->d_cylskew = g_dec_le2(ptr + 78); 124 d->d_headswitch = g_dec_le4(ptr + 80); 125 d->d_trkseek = g_dec_le4(ptr + 84); 126 d->d_flags = g_dec_le4(ptr + 88); 127 d->d_drivedata[0] = g_dec_le4(ptr + 92); 128 d->d_drivedata[1] = g_dec_le4(ptr + 96); 129 d->d_drivedata[2] = g_dec_le4(ptr + 100); 130 d->d_drivedata[3] = g_dec_le4(ptr + 104); 131 d->d_drivedata[4] = g_dec_le4(ptr + 108); 132 d->d_spare[0] = g_dec_le4(ptr + 112); 133 d->d_spare[1] = g_dec_le4(ptr + 116); 134 d->d_spare[2] = g_dec_le4(ptr + 120); 135 d->d_spare[3] = g_dec_le4(ptr + 124); 136 d->d_spare[4] = g_dec_le4(ptr + 128); 137 d->d_magic2 = g_dec_le4(ptr + 132); 138 d->d_checksum = g_dec_le2(ptr + 136); 139 d->d_npartitions = g_dec_le2(ptr + 138); 140 d->d_bbsize = g_dec_le4(ptr + 140); 141 d->d_sbsize = g_dec_le4(ptr + 144); 142 for (i = 0; i < MAXPARTITIONS; i++) 143 g_bsd_ledec_partition(ptr + 148 + 16 * i, &d->d_partitions[i]); 144} 145 146static void 147g_bsd_leenc_partition(u_char *ptr, struct partition *d) 148{ 149 g_enc_le4(ptr + 0, d->p_size); 150 g_enc_le4(ptr + 4, d->p_offset); 151 g_enc_le4(ptr + 8, d->p_fsize); 152 ptr[12] = d->p_fstype; 153 ptr[13] = d->p_frag; 154 g_enc_le2(ptr + 14, d->p_cpg); 155} 156 157static void 158g_bsd_leenc_disklabel(u_char *ptr, struct disklabel *d) 159{ 160 int i; 161 162 g_enc_le4(ptr + 0, d->d_magic); 163 g_enc_le2(ptr + 4, d->d_type); 164 g_enc_le2(ptr + 6, d->d_subtype); 165 bcopy(d->d_typename, ptr + 8, 16); 166 bcopy(d->d_packname, ptr + 24, 16); 167 g_enc_le4(ptr + 40, d->d_secsize); 168 g_enc_le4(ptr + 44, d->d_nsectors); 169 g_enc_le4(ptr + 48, d->d_ntracks); 170 g_enc_le4(ptr + 52, d->d_ncylinders); 171 g_enc_le4(ptr + 56, d->d_secpercyl); 172 g_enc_le4(ptr + 60, d->d_secperunit); 173 g_enc_le2(ptr + 64, d->d_sparespertrack); 174 g_enc_le2(ptr + 66, d->d_sparespercyl); 175 g_enc_le4(ptr + 68, d->d_acylinders); 176 g_enc_le2(ptr + 72, d->d_rpm); 177 g_enc_le2(ptr + 74, d->d_interleave); 178 g_enc_le2(ptr + 76, d->d_trackskew); 179 g_enc_le2(ptr + 78, d->d_cylskew); 180 g_enc_le4(ptr + 80, d->d_headswitch); 181 g_enc_le4(ptr + 84, d->d_trkseek); 182 g_enc_le4(ptr + 88, d->d_flags); 183 g_enc_le4(ptr + 92, d->d_drivedata[0]); 184 g_enc_le4(ptr + 96, d->d_drivedata[1]); 185 g_enc_le4(ptr + 100, d->d_drivedata[2]); 186 g_enc_le4(ptr + 104, d->d_drivedata[3]); 187 g_enc_le4(ptr + 108, d->d_drivedata[4]); 188 g_enc_le4(ptr + 112, d->d_spare[0]); 189 g_enc_le4(ptr + 116, d->d_spare[1]); 190 g_enc_le4(ptr + 120, d->d_spare[2]); 191 g_enc_le4(ptr + 124, d->d_spare[3]); 192 g_enc_le4(ptr + 128, d->d_spare[4]); 193 g_enc_le4(ptr + 132, d->d_magic2); 194 g_enc_le2(ptr + 136, d->d_checksum); 195 g_enc_le2(ptr + 138, d->d_npartitions); 196 g_enc_le4(ptr + 140, d->d_bbsize); 197 g_enc_le4(ptr + 144, d->d_sbsize); 198 for (i = 0; i < MAXPARTITIONS; i++) 199 g_bsd_leenc_partition(ptr + 148 + 16 * i, &d->d_partitions[i]); 200} 201 202static int 203g_bsd_ondisk_size(void) 204{ 205 return (148 + 16 * MAXPARTITIONS); 206} 207 208/* 209 * For reasons which were valid and just in their days, FreeBSD/i386 uses 210 * absolute disk-addresses in disklabels. The way it works is that the 211 * p_offset field of all partitions have the first sector number of the 212 * disk slice added to them. This was hidden kernel-magic, userland did 213 * not see these offsets. These two functions subtract and add them 214 * while converting from the "ondisk" to the "inram" labels and vice 215 * versa. 216 */ 217static void 218ondisk2inram(struct g_bsd_softc *sc) 219{ 220 struct partition *ppp; 221 struct disklabel *dl; 222 int i; 223 224 sc->inram = sc->ondisk; 225 dl = &sc->inram; 226 227 /* Basic sanity-check needed to avoid mistakes. */ 228 if (dl->d_magic != DISKMAGIC || dl->d_magic2 != DISKMAGIC) 229 return; 230 if (dl->d_npartitions > MAXPARTITIONS) 231 return; 232 233 sc->rawoffset = dl->d_partitions[RAW_PART].p_offset; 234 for (i = 0; i < dl->d_npartitions; i++) { 235 ppp = &dl->d_partitions[i]; 236 if (ppp->p_size != 0 && ppp->p_offset < sc->rawoffset) 237 sc->rawoffset = 0; 238 } 239 if (sc->rawoffset > 0) { 240 for (i = 0; i < dl->d_npartitions; i++) { 241 ppp = &dl->d_partitions[i]; 242 if (ppp->p_offset != 0) 243 ppp->p_offset -= sc->rawoffset; 244 } 245 } 246 dl->d_checksum = 0; 247 dl->d_checksum = dkcksum(&sc->inram); 248} 249 250static void 251inram2ondisk(struct g_bsd_softc *sc) 252{ 253 struct partition *ppp; 254 int i; 255 256 sc->ondisk = sc->inram; 257 if (sc->mbroffset != 0) 258 sc->rawoffset = sc->mbroffset / sc->inram.d_secsize; 259 if (sc->rawoffset != 0) { 260 for (i = 0; i < sc->inram.d_npartitions; i++) { 261 ppp = &sc->ondisk.d_partitions[i]; 262 if (ppp->p_size > 0) 263 ppp->p_offset += sc->rawoffset; 264 else 265 ppp->p_offset = 0; 266 } 267 } 268 sc->ondisk.d_checksum = 0; 269 sc->ondisk.d_checksum = dkcksum(&sc->ondisk); 270} 271 272/* 273 * Check that this looks like a valid disklabel, but be prepared 274 * to get any kind of junk. The checksum must be checked only 275 * after this function returns success to prevent a bogus d_npartitions 276 * value from tripping us up. 277 */ 278static int 279g_bsd_checklabel(struct disklabel *dl) 280{ 281 struct partition *ppp; 282 int i; 283 284 if (dl->d_magic != DISKMAGIC || dl->d_magic2 != DISKMAGIC) 285 return (EINVAL); 286 /* 287 * If the label specifies more partitions than we can handle 288 * we have to reject it: If people updated the label they would 289 * trash it, and that would break the checksum. 290 */ 291 if (dl->d_npartitions > MAXPARTITIONS) 292 return (EINVAL); 293 294 for (i = 0; i < dl->d_npartitions; i++) { 295 ppp = &dl->d_partitions[i]; 296 /* Cannot extend past unit. */ 297 if (ppp->p_size != 0 && 298 ppp->p_offset + ppp->p_size > dl->d_secperunit) { 299 return (EINVAL); 300 } 301 } 302 return (0); 303} 304 305/* 306 * Modify our slicer to match proposed disklabel, if possible. 307 * First carry out all the simple checks, then lock topology 308 * and check that no open providers are affected negatively 309 * then carry out all the changes. 310 * 311 * NB: Returns with topology held only if successful return. 312 */ 313static int 314g_bsd_modify(struct g_geom *gp, struct disklabel *dl) 315{ 316 int i, error; 317 struct partition *ppp; 318 struct g_slicer *gsp; 319 struct g_consumer *cp; 320 u_int secsize, u; 321 off_t mediasize; 322 323 /* Basic check that this is indeed a disklabel. */ 324 error = g_bsd_checklabel(dl); 325 if (error) 326 return (error); 327 328 /* Make sure the checksum is OK. */ 329 if (dkcksum(dl) != 0) 330 return (EINVAL); 331 332 /* Get dimensions of our device. */ 333 cp = LIST_FIRST(&gp->consumer); 334 secsize = cp->provider->sectorsize; 335 mediasize = cp->provider->mediasize; 336 337#ifdef nolonger 338 /* 339 * The raw-partition must start at zero. We do not check that the 340 * size == mediasize because this is overly restrictive. We have 341 * already tested in g_bsd_checklabel() that it is not longer. 342 * XXX: RAW_PART is archaic anyway, and we should drop it. 343 */ 344 if (dl->d_partitions[RAW_PART].p_offset != 0) 345 return (EINVAL); 346#endif 347 348#ifdef notyet 349 /* 350 * Indications are that the d_secperunit is not correctly 351 * initialized in many cases, and since we don't need it 352 * for anything, we dont strictly need this test. 353 * Preemptive action to avoid confusing people in disklabel(8) 354 * may be in order. 355 */ 356 /* The label cannot claim a larger size than the media. */ 357 if ((off_t)dl->d_secperunit * dl->d_secsize > mediasize) 358 return (EINVAL); 359#endif 360 361 362 /* ... or a smaller sector size. */ 363 if (dl->d_secsize < secsize) 364 return (EINVAL); 365 366 /* ... or a non-multiple sector size. */ 367 if (dl->d_secsize % secsize != 0) 368 return (EINVAL); 369 370 g_topology_lock(); 371 372 /* Don't munge open partitions. */ 373 gsp = gp->softc; 374 for (i = 0; i < dl->d_npartitions; i++) { 375 ppp = &dl->d_partitions[i]; 376 377 error = g_slice_config(gp, i, G_SLICE_CONFIG_CHECK, 378 (off_t)ppp->p_offset * dl->d_secsize, 379 (off_t)ppp->p_size * dl->d_secsize, 380 dl->d_secsize, 381 "%s%c", gp->name, 'a' + i); 382 if (error) { 383 g_topology_unlock(); 384 return (error); 385 } 386 } 387 388 /* Look good, go for it... */ 389 for (u = 0; u < gsp->nslice; u++) { 390 ppp = &dl->d_partitions[u]; 391 g_slice_config(gp, u, G_SLICE_CONFIG_SET, 392 (off_t)ppp->p_offset * dl->d_secsize, 393 (off_t)ppp->p_size * dl->d_secsize, 394 dl->d_secsize, 395 "%s%c", gp->name, 'a' + u); 396 } 397 return (0); 398} 399 400/* 401 * Calculate a disklabel checksum for a little-endian byte-stream. 402 * We need access to the decoded disklabel because the checksum only 403 * covers the partition data for the first d_npartitions. 404 */ 405static int 406g_bsd_lesum(struct disklabel *dl, u_char *p) 407{ 408 u_char *pe; 409 uint16_t sum; 410 411 pe = p + 148 + 16 * dl->d_npartitions; 412 sum = 0; 413 while (p < pe) { 414 sum ^= g_dec_le2(p); 415 p += 2; 416 } 417 return (sum); 418} 419 420/* 421 * This is an internal helper function, called multiple times from the taste 422 * function to try to locate a disklabel on the disk. More civilized formats 423 * will not need this, as there is only one possible place on disk to look 424 * for the magic spot. 425 */ 426 427static int 428g_bsd_try(struct g_geom *gp, struct g_slicer *gsp, struct g_consumer *cp, int secsize, struct g_bsd_softc *ms, off_t offset) 429{ 430 int error; 431 u_char *buf; 432 struct disklabel *dl; 433 off_t secoff; 434 435 /* 436 * We need to read entire aligned sectors, and we assume that the 437 * disklabel does not span sectors, so one sector is enough. 438 */ 439 error = 0; 440 secoff = offset % secsize; 441 buf = g_read_data(cp, offset - secoff, secsize, &error); 442 if (buf == NULL || error != 0) 443 return (ENOENT); 444 445 /* Decode into our native format. */ 446 dl = &ms->ondisk; 447 g_bsd_ledec_disklabel(buf + secoff, dl); 448 449 ondisk2inram(ms); 450 451 dl = &ms->inram; 452 /* Does it look like a label at all? */ 453 if (g_bsd_checklabel(dl)) 454 error = ENOENT; 455 /* ... and does the raw data have a good checksum? */ 456 if (error == 0 && g_bsd_lesum(dl, buf + secoff) != 0) 457 error = ENOENT; 458 459 /* Remember to free the buffer g_read_data() gave us. */ 460 g_free(buf); 461 462 /* If we had a label, record it properly. */ 463 if (error == 0) { 464 gsp->frontstuff = 16 * secsize; /* XXX */ 465 ms->labeloffset = offset; 466 g_topology_lock(); 467 g_slice_conf_hot(gp, 0, offset, g_bsd_ondisk_size()); 468 g_topology_unlock(); 469 } 470 return (error); 471} 472 473/* 474 * Implement certain ioctls to modify disklabels with. This function 475 * is called by the event handler thread with topology locked as result 476 * of the g_call_me() in g_bsd_start(). It is not necessary to keep 477 * topology locked all the time but make sure to return with topology 478 * locked as well. 479 */ 480 481static void 482g_bsd_ioctl(void *arg, int flag __unused) 483{ 484 struct bio *bp; 485 struct g_geom *gp; 486 struct g_slicer *gsp; 487 struct g_bsd_softc *ms; 488 struct disklabel *dl; 489 struct g_ioctl *gio; 490 struct g_consumer *cp; 491 u_char *buf; 492 off_t secoff; 493 u_int secsize; 494 int error, i; 495 uint64_t sum; 496 497 /* We don't need topology for now. */ 498 g_topology_unlock(); 499 500 /* Get hold of the interesting bits from the bio. */ 501 bp = arg; 502 gp = bp->bio_to->geom; 503 gsp = gp->softc; 504 ms = gsp->softc; 505 gio = (struct g_ioctl *)bp->bio_data; 506 507 /* The disklabel to set is the ioctl argument. */ 508 dl = gio->data; 509 510 /* Validate and modify our slice instance to match. */ 511 error = g_bsd_modify(gp, dl); /* Picks up topology lock on success. */ 512 if (error) { 513 g_topology_lock(); 514 g_io_deliver(bp, error); 515 return; 516 } 517 /* Update our copy of the disklabel. */ 518 ms->inram = *dl; 519 inram2ondisk(ms); 520 521 if (gio->cmd == DIOCSDINFO) { 522 g_io_deliver(bp, 0); 523 return; 524 } 525 KASSERT(gio->cmd == DIOCWDINFO, ("Unknown ioctl in g_bsd_ioctl")); 526 cp = LIST_FIRST(&gp->consumer); 527 /* Get sector size, we need it to read data. */ 528 secsize = cp->provider->sectorsize; 529 secoff = ms->labeloffset % secsize; 530 buf = g_read_data(cp, ms->labeloffset - secoff, secsize, &error); 531 if (buf == NULL || error != 0) { 532 g_io_deliver(bp, error); 533 return; 534 } 535 dl = &ms->ondisk; 536 g_bsd_leenc_disklabel(buf + secoff, dl); 537 if (ms->labeloffset == ALPHA_LABEL_OFFSET) { 538 sum = 0; 539 for (i = 0; i < 63; i++) 540 sum += g_dec_le8(buf + i * 8); 541 g_enc_le8(buf + 504, sum); 542 } 543 error = g_write_data(cp, ms->labeloffset - secoff, buf, secsize); 544 g_free(buf); 545 g_io_deliver(bp, error); 546} 547 548/* 549 * Rewrite the bootblock, which is BBSIZE bytes from the start of the disk. 550 * We punch down the disklabel where we expect it to be before writing. 551 */ 552static int 553g_bsd_diocbsdbb(dev_t dev, u_long cmd __unused, caddr_t data, int fflag __unused, struct thread *td __unused) 554{ 555 struct g_geom *gp; 556 struct g_slicer *gsp; 557 struct g_bsd_softc *ms; 558 struct disklabel *dl; 559 struct g_consumer *cp; 560 u_char *buf; 561 void *p; 562 u_int secsize; 563 int error, i; 564 uint64_t sum; 565 566 /* Get hold of the interesting bits from the bio. */ 567 gp = (void *)dev; 568 gsp = gp->softc; 569 ms = gsp->softc; 570 571 /* The disklabel to set is the ioctl argument. */ 572 buf = g_malloc(BBSIZE, M_WAITOK); 573 p = *(void **)data; 574 error = copyin(p, buf, BBSIZE); 575 if (error) { 576 g_free(buf); 577 return (error); 578 } 579 /* The disklabel to set is the ioctl argument. */ 580 dl = (void *)(buf + ms->labeloffset); 581 582 DROP_GIANT(); 583 584 /* Validate and modify our slice instance to match. */ 585 error = g_bsd_modify(gp, dl); /* Picks up topology lock on success. */ 586 if (!error) { 587 cp = LIST_FIRST(&gp->consumer); 588 secsize = cp->provider->sectorsize; 589 dl = &ms->ondisk; 590 g_bsd_leenc_disklabel(buf + ms->labeloffset, dl); 591 if (ms->labeloffset == ALPHA_LABEL_OFFSET) { 592 sum = 0; 593 for (i = 0; i < 63; i++) 594 sum += g_dec_le8(buf + i * 8); 595 g_enc_le8(buf + 504, sum); 596 } 597 error = g_write_data(cp, 0, buf, BBSIZE); 598 g_topology_unlock(); 599 } 600 g_free(buf); 601 PICKUP_GIANT(); 602 return (error); 603} 604 605/* 606 * If the user tries to overwrite our disklabel through an open partition 607 * or via a magicwrite config call, we end up here and try to prevent 608 * footshooting as best we can. 609 */ 610static void 611g_bsd_hotwrite(void *arg, int flag __unused) 612{ 613 struct bio *bp; 614 struct g_geom *gp; 615 struct g_slicer *gsp; 616 struct g_slice *gsl; 617 struct g_bsd_softc *ms; 618 struct g_bsd_softc fake; 619 u_char *p; 620 int error; 621 622 bp = arg; 623 gp = bp->bio_to->geom; 624 gsp = gp->softc; 625 ms = gsp->softc; 626 gsl = &gsp->slices[bp->bio_to->index]; 627 p = (u_char*)bp->bio_data + ms->labeloffset 628 - (bp->bio_offset + gsl->offset); 629 g_bsd_ledec_disklabel(p, &fake.ondisk); 630 631 ondisk2inram(&fake); 632 if (g_bsd_checklabel(&fake.inram)) { 633 g_io_deliver(bp, EPERM); 634 return; 635 } 636 if (g_bsd_lesum(&fake.ondisk, p) != 0) { 637 g_io_deliver(bp, EPERM); 638 return; 639 } 640 g_topology_unlock(); 641 error = g_bsd_modify(gp, &fake.inram); /* May pick up topology. */ 642 if (error) { 643 g_io_deliver(bp, EPERM); 644 g_topology_lock(); 645 return; 646 } 647 /* Update our copy of the disklabel. */ 648 ms->inram = fake.inram; 649 inram2ondisk(ms); 650 g_bsd_leenc_disklabel(p, &ms->ondisk); 651 g_slice_finish_hot(bp); 652} 653 654/*- 655 * This start routine is only called for non-trivial requests, all the 656 * trivial ones are handled autonomously by the slice code. 657 * For requests we handle here, we must call the g_io_deliver() on the 658 * bio, and return non-zero to indicate to the slice code that we did so. 659 * This code executes in the "DOWN" I/O path, this means: 660 * * No sleeping. 661 * * Don't grab the topology lock. 662 * * Don't call biowait, g_getattr(), g_setattr() or g_read_data() 663 */ 664 665static int 666g_bsd_start(struct bio *bp) 667{ 668 struct g_geom *gp; 669 struct g_bsd_softc *ms; 670 struct g_slicer *gsp; 671 struct g_ioctl *gio; 672 int error; 673 674 gp = bp->bio_to->geom; 675 gsp = gp->softc; 676 ms = gsp->softc; 677 switch(bp->bio_cmd) { 678 case BIO_READ: 679 /* We allow reading of our hot spots */ 680 return (0); 681 case BIO_DELETE: 682 /* We do not allow deleting our hot spots */ 683 return (EPERM); 684 case BIO_WRITE: 685 g_call_me(g_bsd_hotwrite, bp, gp, NULL); 686 return (EJUSTRETURN); 687 case BIO_GETATTR: 688 if (g_handleattr(bp, "BSD::labelsum", ms->labelsum, 689 sizeof(ms->labelsum))) 690 return (1); 691 break; 692 case BIO_SETATTR: 693 break; 694 default: 695 KASSERT(0 == 1, ("Unknown bio_cmd in g_bsd_start (%d)", 696 bp->bio_cmd)); 697 } 698 699 /* We only handle ioctl(2) requests of the right format. */ 700 if (strcmp(bp->bio_attribute, "GEOM::ioctl")) 701 return (0); 702 else if (bp->bio_length != sizeof(*gio)) 703 return (0); 704 705 /* Get hold of the ioctl parameters. */ 706 gio = (struct g_ioctl *)bp->bio_data; 707 708 switch (gio->cmd) { 709 case DIOCGDINFO: 710 /* Return a copy of the disklabel to userland. */ 711 bcopy(&ms->inram, gio->data, sizeof(ms->inram)); 712 g_io_deliver(bp, 0); 713 return (1); 714 case DIOCBSDBB: 715 gio->func = g_bsd_diocbsdbb; 716 gio->dev = (void *)gp; 717 g_io_deliver(bp, EDIRIOCTL); 718 return (1); 719 case DIOCSDINFO: 720 case DIOCWDINFO: 721 /* 722 * These we cannot do without the topology lock and some 723 * some I/O requests. Ask the event-handler to schedule 724 * us in a less restricted environment. 725 */ 726 error = g_call_me(g_bsd_ioctl, bp, gp, NULL); 727 if (error) 728 g_io_deliver(bp, error); 729 /* 730 * We must return non-zero to indicate that we will deal 731 * with this bio, even though we have not done so yet. 732 */ 733 return (1); 734 default: 735 return (0); 736 } 737} 738 739/* 740 * Dump configuration information in XML format. 741 * Notice that the function is called once for the geom and once for each 742 * consumer and provider. We let g_slice_dumpconf() do most of the work. 743 */ 744static void 745g_bsd_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp) 746{ 747 struct g_bsd_softc *ms; 748 struct g_slicer *gsp; 749 750 gsp = gp->softc; 751 ms = gsp->softc; 752 g_slice_dumpconf(sb, indent, gp, cp, pp); 753 if (indent != NULL && pp == NULL && cp == NULL) { 754 sbuf_printf(sb, "%s<labeloffset>%jd</labeloffset>\n", 755 indent, (intmax_t)ms->labeloffset); 756 sbuf_printf(sb, "%s<rawoffset>%jd</rawoffset>\n", 757 indent, (intmax_t)ms->rawoffset); 758 sbuf_printf(sb, "%s<mbroffset>%jd</mbroffset>\n", 759 indent, (intmax_t)ms->mbroffset); 760 } else if (pp != NULL) { 761 if (indent == NULL) 762 sbuf_printf(sb, " ty %d", 763 ms->inram.d_partitions[pp->index].p_fstype); 764 else 765 sbuf_printf(sb, "%s<type>%d</type>\n", indent, 766 ms->inram.d_partitions[pp->index].p_fstype); 767 } 768} 769 770/* 771 * The taste function is called from the event-handler, with the topology 772 * lock already held and a provider to examine. The flags are unused. 773 * 774 * If flags == G_TF_NORMAL, the idea is to take a bite of the provider and 775 * if we find valid, consistent magic on it, build a geom on it. 776 * any magic bits which indicate that we should automatically put a BSD 777 * geom on it. 778 * 779 * There may be cases where the operator would like to put a BSD-geom on 780 * providers which do not meet all of the requirements. This can be done 781 * by instead passing the G_TF_INSIST flag, which will override these 782 * checks. 783 * 784 * The final flags value is G_TF_TRANSPARENT, which instructs the method 785 * to put a geom on top of the provider and configure it to be as transparent 786 * as possible. This is not really relevant to the BSD method and therefore 787 * not implemented here. 788 */ 789 790static struct g_geom * 791g_bsd_taste(struct g_class *mp, struct g_provider *pp, int flags) 792{ 793 struct g_geom *gp; 794 struct g_consumer *cp; 795 int error, i; 796 struct g_bsd_softc *ms; 797 struct disklabel *dl; 798 u_int secsize; 799 struct g_slicer *gsp; 800 MD5_CTX md5sum; 801 u_char hash[16]; 802 803 g_trace(G_T_TOPOLOGY, "bsd_taste(%s,%s)", mp->name, pp->name); 804 g_topology_assert(); 805 806 /* We don't implement transparent inserts. */ 807 if (flags == G_TF_TRANSPARENT) 808 return (NULL); 809 810 /* 811 * BSD labels are a subclass of the general "slicing" topology so 812 * a lot of the work can be done by the common "slice" code. 813 * Create a geom with space for MAXPARTITIONS providers, one consumer 814 * and a softc structure for us. Specify the provider to attach 815 * the consumer to and our "start" routine for special requests. 816 * The provider is opened with mode (1,0,0) so we can do reads 817 * from it. 818 */ 819 gp = g_slice_new(mp, MAXPARTITIONS, pp, &cp, &ms, 820 sizeof(*ms), g_bsd_start); 821 if (gp == NULL) 822 return (NULL); 823 824 /* 825 * Now that we have attached to and opened our provider, we do 826 * not need the topology lock until we change the topology again 827 * next time. 828 */ 829 g_topology_unlock(); 830 831 /* 832 * Fill in the optional details, in our case we have a dumpconf 833 * routine which the "slice" code should call at the right time 834 */ 835 gp->dumpconf = g_bsd_dumpconf; 836 837 /* Get the geom_slicer softc from the geom. */ 838 gsp = gp->softc; 839 840 /* 841 * The do...while loop here allows us to have multiple escapes 842 * using a simple "break". This improves code clarity without 843 * ending up in deep nesting and without using goto or come from. 844 */ 845 do { 846 /* 847 * If the provider is an MBR we will only auto attach 848 * to type 165 slices in the G_TF_NORMAL case. We will 849 * attach to any other type. 850 */ 851 error = g_getattr("MBR::type", cp, &i); 852 if (!error) { 853 if (i != 165 && flags == G_TF_NORMAL) 854 break; 855 error = g_getattr("MBR::offset", cp, &ms->mbroffset); 856 if (error) 857 break; 858 } 859 860 /* Same thing if we are inside a PC98 */ 861 error = g_getattr("PC98::type", cp, &i); 862 if (!error) { 863 if (i != 0xc494 && flags == G_TF_NORMAL) 864 break; 865 error = g_getattr("PC98::offset", cp, &ms->mbroffset); 866 if (error) 867 break; 868 } 869 870 /* Get sector size, we need it to read data. */ 871 secsize = cp->provider->sectorsize; 872 if (secsize < 512) 873 break; 874 875 /* First look for a label at the start of the second sector. */ 876 error = g_bsd_try(gp, gsp, cp, secsize, ms, secsize); 877 878 /* Next, look for alpha labels */ 879 if (error) 880 error = g_bsd_try(gp, gsp, cp, secsize, ms, 881 ALPHA_LABEL_OFFSET); 882 883 /* If we didn't find a label, punt. */ 884 if (error) 885 break; 886 887 /* 888 * In order to avoid recursively attaching to the same 889 * on-disk label (it's usually visible through the 'c' 890 * partition) we calculate an MD5 and ask if other BSD's 891 * below us love that label. If they do, we don't. 892 */ 893 894 dl = &ms->inram; 895 MD5Init(&md5sum); 896 MD5Update(&md5sum, (u_char *)dl, sizeof(dl)); 897 MD5Final(ms->labelsum, &md5sum); 898 899 error = g_getattr("BSD::labelsum", cp, &hash); 900 if (!error && !strncmp(ms->labelsum, hash, sizeof(hash))) 901 break; 902 903 /* 904 * Process the found disklabel, and modify our "slice" 905 * instance to match it, if possible. 906 */ 907 error = g_bsd_modify(gp, dl); /* Picks up topology lock. */ 908 if (!error) 909 g_topology_unlock(); 910 break; 911 } while (0); 912 913 /* Success or failure, we can close our provider now. */ 914 g_topology_lock(); 915 error = g_access_rel(cp, -1, 0, 0); 916 917 /* If we have configured any providers, return the new geom. */ 918 if (gsp->nprovider > 0) 919 return (gp); 920 /* 921 * ...else push the "self-destruct" button, by spoiling our own 922 * consumer. This triggers a call to g_std_spoiled which will 923 * dismantle what was setup. 924 */ 925 g_std_spoiled(cp); 926 return (NULL); 927} 928 929/* Finally, register with GEOM infrastructure. */ 930static struct g_class g_bsd_class = { 931 .name = BSD_CLASS_NAME, 932 .taste = g_bsd_taste, 933 G_CLASS_INITIALIZER 934}; 935 936DECLARE_GEOM_CLASS(g_bsd_class, g_bsd); 937