geom_bsd.c revision 109900
1163516Simp/*- 2163516Simp * Copyright (c) 2002 Poul-Henning Kamp 3163516Simp * Copyright (c) 2002 Networks Associates Technology, Inc. 4163516Simp * All rights reserved. 5163516Simp * 6163516Simp * This software was developed for the FreeBSD Project by Poul-Henning Kamp 7163516Simp * and NAI Labs, the Security Research Division of Network Associates, Inc. 8163516Simp * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 9163516Simp * DARPA CHATS research program. 10163516Simp * 11163516Simp * Redistribution and use in source and binary forms, with or without 12163516Simp * modification, are permitted provided that the following conditions 13163516Simp * are met: 14163516Simp * 1. Redistributions of source code must retain the above copyright 15163516Simp * notice, this list of conditions and the following disclaimer. 16163516Simp * 2. Redistributions in binary form must reproduce the above copyright 17163516Simp * notice, this list of conditions and the following disclaimer in the 18163516Simp * documentation and/or other materials provided with the distribution. 19163516Simp * 3. The names of the authors may not be used to endorse or promote 20163516Simp * products derived from this software without specific prior written 21163516Simp * permission. 22163516Simp * 23163516Simp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24170002Simp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25170002Simp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26170002Simp * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 27170002Simp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28170002Simp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29170002Simp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30170002Simp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31170002Simp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32170002Simp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33170002Simp * SUCH DAMAGE. 34170002Simp * 35170002Simp * $FreeBSD: head/sys/geom/geom_bsd.c 109900 2003-01-26 21:54:36Z phk $ 36170002Simp * 37170002Simp * This is the method for dealing with BSD disklabels. It has been 38170002Simp * extensively (by my standards at least) commented, in the vain hope that 39170002Simp * it will serve as the source in future copy&paste operations. 40170002Simp */ 41170002Simp 42170002Simp#include <sys/param.h> 43170002Simp#ifndef _KERNEL 44170002Simp#include <stdio.h> 45170002Simp#include <string.h> 46170002Simp#include <stdlib.h> 47170002Simp#include <signal.h> 48170002Simp#include <err.h> 49170002Simp#else 50170002Simp#include <sys/systm.h> 51163516Simp#include <sys/kernel.h> 52163516Simp#include <sys/conf.h> 53163516Simp#include <sys/bio.h> 54163516Simp#include <sys/malloc.h> 55163516Simp#include <sys/lock.h> 56163516Simp#include <sys/mutex.h> 57163516Simp#endif 58163516Simp#include <sys/stdint.h> 59163516Simp#include <sys/md5.h> 60163516Simp#include <sys/errno.h> 61163516Simp#include <sys/disklabel.h> 62163516Simp#include <geom/geom.h> 63163516Simp#include <geom/geom_slice.h> 64163516Simp 65163516Simp#define BSD_CLASS_NAME "BSD" 66163516Simp 67163516Simp#define ALPHA_LABEL_OFFSET 64 68163516Simp 69163516Simp/* 70163516Simp * Our private data about one instance. All the rest is handled by the 71163516Simp * slice code and stored in its softc, so this is just the stuff 72163516Simp * specific to BSD disklabels. 73163516Simp */ 74163516Simpstruct g_bsd_softc { 75163516Simp off_t labeloffset; 76163516Simp off_t mbroffset; 77163516Simp off_t rawoffset; 78163516Simp struct disklabel ondisk; 79163516Simp struct disklabel inram; 80169567Simp u_char labelsum[16]; 81163516Simp}; 82163516Simp 83183480Simp/* 84163516Simp * The next 4 functions isolate us from how the compiler lays out and pads 85163516Simp * "struct disklabel". We treat what we read from disk as a bytestream and 86163516Simp * explicitly convert it into a struct disklabel. This makes us compiler- 87163516Simp * endianness- and wordsize- agnostic. 88163516Simp * For now we only have little-endian formats to deal with. 89163516Simp */ 90163516Simp 91163516Simpstatic void 92163516Simpg_bsd_ledec_partition(u_char *ptr, struct partition *d) 93163516Simp{ 94163516Simp d->p_size = g_dec_le4(ptr + 0); 95163516Simp d->p_offset = g_dec_le4(ptr + 4); 96183774Simp d->p_fsize = g_dec_le4(ptr + 8); 97183774Simp d->p_fstype = ptr[12]; 98183774Simp d->p_frag = ptr[13]; 99163516Simp d->p_cpg = g_dec_le2(ptr + 14); 100163516Simp} 101163516Simp 102163516Simpstatic void 103163516Simpg_bsd_ledec_disklabel(u_char *ptr, struct disklabel *d) 104163516Simp{ 105163516Simp int i; 106163516Simp 107163516Simp d->d_magic = g_dec_le4(ptr + 0); 108163516Simp d->d_type = g_dec_le2(ptr + 4); 109163516Simp d->d_subtype = g_dec_le2(ptr + 6); 110163516Simp bcopy(ptr + 8, d->d_typename, 16); 111163516Simp bcopy(ptr + 24, d->d_packname, 16); 112183704Smav d->d_secsize = g_dec_le4(ptr + 40); 113183480Simp d->d_nsectors = g_dec_le4(ptr + 44); 114163516Simp d->d_ntracks = g_dec_le4(ptr + 48); 115163516Simp d->d_ncylinders = g_dec_le4(ptr + 52); 116163516Simp d->d_secpercyl = g_dec_le4(ptr + 56); 117163516Simp d->d_secperunit = g_dec_le4(ptr + 60); 118163516Simp d->d_sparespertrack = g_dec_le2(ptr + 64); 119163516Simp d->d_sparespercyl = g_dec_le2(ptr + 66); 120163516Simp d->d_acylinders = g_dec_le4(ptr + 68); 121183774Simp d->d_rpm = g_dec_le2(ptr + 72); 122183774Simp d->d_interleave = g_dec_le2(ptr + 74); 123183774Simp d->d_trackskew = g_dec_le2(ptr + 76); 124163516Simp d->d_cylskew = g_dec_le2(ptr + 78); 125163516Simp d->d_headswitch = g_dec_le4(ptr + 80); 126163516Simp d->d_trkseek = g_dec_le4(ptr + 84); 127163516Simp d->d_flags = g_dec_le4(ptr + 88); 128163516Simp d->d_drivedata[0] = g_dec_le4(ptr + 92); 129183774Simp d->d_drivedata[1] = g_dec_le4(ptr + 96); 130183774Simp d->d_drivedata[2] = g_dec_le4(ptr + 100); 131183774Simp d->d_drivedata[3] = g_dec_le4(ptr + 104); 132183774Simp d->d_drivedata[4] = g_dec_le4(ptr + 108); 133183774Simp d->d_spare[0] = g_dec_le4(ptr + 112); 134183774Simp d->d_spare[1] = g_dec_le4(ptr + 116); 135183774Simp d->d_spare[2] = g_dec_le4(ptr + 120); 136184033Smav d->d_spare[3] = g_dec_le4(ptr + 124); 137183774Simp d->d_spare[4] = g_dec_le4(ptr + 128); 138183774Simp d->d_magic2 = g_dec_le4(ptr + 132); 139183774Simp d->d_checksum = g_dec_le2(ptr + 136); 140184033Smav d->d_npartitions = g_dec_le2(ptr + 138); 141183774Simp d->d_bbsize = g_dec_le4(ptr + 140); 142183774Simp d->d_sbsize = g_dec_le4(ptr + 144); 143183774Simp for (i = 0; i < MAXPARTITIONS; i++) 144183774Simp g_bsd_ledec_partition(ptr + 148 + 16 * i, &d->d_partitions[i]); 145183774Simp} 146183774Simp 147183774Simpstatic void 148183774Simpg_bsd_leenc_partition(u_char *ptr, struct partition *d) 149183774Simp{ 150183774Simp g_enc_le4(ptr + 0, d->p_size); 151183774Simp g_enc_le4(ptr + 4, d->p_offset); 152183805Smav g_enc_le4(ptr + 8, d->p_fsize); 153183774Simp ptr[12] = d->p_fstype; 154183774Simp ptr[13] = d->p_frag; 155183774Simp g_enc_le2(ptr + 14, d->p_cpg); 156183774Simp} 157183774Simp 158183774Simpstatic void 159183774Simpg_bsd_leenc_disklabel(u_char *ptr, struct disklabel *d) 160183774Simp{ 161183774Simp int i; 162163516Simp 163169567Simp g_enc_le4(ptr + 0, d->d_magic); 164169567Simp g_enc_le2(ptr + 4, d->d_type); 165172836Sjulian g_enc_le2(ptr + 6, d->d_subtype); 166163516Simp bcopy(d->d_typename, ptr + 8, 16); 167163516Simp bcopy(d->d_packname, ptr + 24, 16); 168163516Simp g_enc_le4(ptr + 40, d->d_secsize); 169163516Simp g_enc_le4(ptr + 44, d->d_nsectors); 170163516Simp g_enc_le4(ptr + 48, d->d_ntracks); 171163516Simp g_enc_le4(ptr + 52, d->d_ncylinders); 172163516Simp g_enc_le4(ptr + 56, d->d_secpercyl); 173169567Simp g_enc_le4(ptr + 60, d->d_secperunit); 174169567Simp g_enc_le2(ptr + 64, d->d_sparespertrack); 175169567Simp g_enc_le2(ptr + 66, d->d_sparespercyl); 176169567Simp g_enc_le4(ptr + 68, d->d_acylinders); 177169567Simp g_enc_le2(ptr + 72, d->d_rpm); 178169567Simp g_enc_le2(ptr + 74, d->d_interleave); 179169567Simp g_enc_le2(ptr + 76, d->d_trackskew); 180169567Simp g_enc_le2(ptr + 78, d->d_cylskew); 181169567Simp g_enc_le4(ptr + 80, d->d_headswitch); 182169567Simp g_enc_le4(ptr + 84, d->d_trkseek); 183169567Simp g_enc_le4(ptr + 88, d->d_flags); 184169567Simp g_enc_le4(ptr + 92, d->d_drivedata[0]); 185169567Simp g_enc_le4(ptr + 96, d->d_drivedata[1]); 186169567Simp g_enc_le4(ptr + 100, d->d_drivedata[2]); 187169567Simp g_enc_le4(ptr + 104, d->d_drivedata[3]); 188169567Simp g_enc_le4(ptr + 108, d->d_drivedata[4]); 189169567Simp g_enc_le4(ptr + 112, d->d_spare[0]); 190169567Simp g_enc_le4(ptr + 116, d->d_spare[1]); 191169567Simp g_enc_le4(ptr + 120, d->d_spare[2]); 192169567Simp g_enc_le4(ptr + 124, d->d_spare[3]); 193183467Simp g_enc_le4(ptr + 128, d->d_spare[4]); 194163516Simp g_enc_le4(ptr + 132, d->d_magic2); 195163516Simp g_enc_le2(ptr + 136, d->d_checksum); 196163516Simp g_enc_le2(ptr + 138, d->d_npartitions); 197163516Simp g_enc_le4(ptr + 140, d->d_bbsize); 198163516Simp g_enc_le4(ptr + 144, d->d_sbsize); 199183467Simp for (i = 0; i < MAXPARTITIONS; i++) 200163516Simp g_bsd_leenc_partition(ptr + 148 + 16 * i, &d->d_partitions[i]); 201163516Simp} 202163516Simp 203163516Simpstatic int 204163516Simpg_bsd_ondisk_size(void) 205183467Simp{ 206163516Simp return (148 + 16 * MAXPARTITIONS); 207163516Simp} 208163516Simp 209163516Simp/* 210163516Simp * For reasons which were valid and just in their days, FreeBSD/i386 uses 211163516Simp * absolute disk-addresses in disklabels. The way it works is that the 212163516Simp * p_offset field of all partitions have the first sector number of the 213163516Simp * disk slice added to them. This was hidden kernel-magic, userland did 214163516Simp * not see these offsets. These two functions subtract and add them 215163516Simp * while converting from the "ondisk" to the "inram" labels and vice 216163516Simp * versa. 217163516Simp */ 218163516Simpstatic void 219163516Simpondisk2inram(struct g_bsd_softc *sc) 220184033Smav{ 221184033Smav struct partition *ppp; 222184033Smav struct disklabel *dl; 223184033Smav int i; 224184033Smav 225184033Smav sc->inram = sc->ondisk; 226184033Smav dl = &sc->inram; 227184033Smav 228184033Smav /* Basic sanity-check needed to avoid mistakes. */ 229184033Smav if (dl->d_magic != DISKMAGIC || dl->d_magic2 != DISKMAGIC) 230184033Smav return; 231184033Smav if (dl->d_npartitions > MAXPARTITIONS) 232184033Smav return; 233184033Smav 234184033Smav sc->rawoffset = dl->d_partitions[RAW_PART].p_offset; 235184033Smav for (i = 0; i < dl->d_npartitions; i++) { 236184033Smav ppp = &dl->d_partitions[i]; 237184033Smav if (ppp->p_size != 0 && ppp->p_offset < sc->rawoffset) 238184033Smav sc->rawoffset = 0; 239184033Smav } 240184033Smav if (sc->rawoffset > 0) { 241184033Smav for (i = 0; i < dl->d_npartitions; i++) { 242184033Smav ppp = &dl->d_partitions[i]; 243184033Smav if (ppp->p_offset != 0) 244184033Smav ppp->p_offset -= sc->rawoffset; 245184033Smav } 246184033Smav } 247184033Smav dl->d_checksum = 0; 248184033Smav dl->d_checksum = dkcksum(&sc->inram); 249184033Smav} 250184033Smav 251184033Smavstatic void 252184033Smavinram2ondisk(struct g_bsd_softc *sc) 253184033Smav{ 254184033Smav struct partition *ppp; 255184033Smav int i; 256184033Smav 257184033Smav sc->ondisk = sc->inram; 258184033Smav if (sc->mbroffset != 0) 259184033Smav sc->rawoffset = sc->mbroffset / sc->inram.d_secsize; 260184033Smav if (sc->rawoffset != 0) { 261184033Smav for (i = 0; i < sc->inram.d_npartitions; i++) { 262184033Smav ppp = &sc->ondisk.d_partitions[i]; 263184033Smav if (ppp->p_size > 0) 264184033Smav ppp->p_offset += sc->rawoffset; 265184033Smav else 266184033Smav ppp->p_offset = 0; 267184033Smav } 268184033Smav } 269184033Smav sc->ondisk.d_checksum = 0; 270184033Smav sc->ondisk.d_checksum = dkcksum(&sc->ondisk); 271184033Smav} 272184033Smav 273184033Smav/* 274184033Smav * Check that this looks like a valid disklabel, but be prepared 275184033Smav * to get any kind of junk. The checksum must be checked only 276184033Smav * after this function returns success to prevent a bogus d_npartitions 277184033Smav * value from tripping us up. 278184033Smav */ 279184033Smavstatic int 280184033Smavg_bsd_checklabel(struct disklabel *dl) 281184033Smav{ 282184033Smav struct partition *ppp; 283184033Smav int i; 284184033Smav 285184033Smav if (dl->d_magic != DISKMAGIC || dl->d_magic2 != DISKMAGIC) 286184033Smav return (EINVAL); 287184033Smav /* 288184033Smav * If the label specifies more partitions than we can handle 289184033Smav * we have to reject it: If people updated the label they would 290184033Smav * trash it, and that would break the checksum. 291184033Smav */ 292184033Smav if (dl->d_npartitions > MAXPARTITIONS) 293184033Smav return (EINVAL); 294184033Smav 295184033Smav for (i = 0; i < dl->d_npartitions; i++) { 296184033Smav ppp = &dl->d_partitions[i]; 297184033Smav /* Cannot extend past unit. */ 298184033Smav if (ppp->p_size != 0 && 299184033Smav ppp->p_offset + ppp->p_size > dl->d_secperunit) { 300184033Smav return (EINVAL); 301184033Smav } 302184033Smav } 303184033Smav return (0); 304184033Smav} 305184033Smav 306184033Smav/* 307184033Smav * Modify our slicer to match proposed disklabel, if possible. 308184033Smav * First carry out all the simple checks, then lock topology 309184033Smav * and check that no open providers are affected negatively 310184033Smav * then carry out all the changes. 311184033Smav * 312184033Smav * NB: Returns with topology held only if successful return. 313184033Smav */ 314184033Smavstatic int 315184033Smavg_bsd_modify(struct g_geom *gp, struct disklabel *dl) 316184033Smav{ 317184033Smav int i, error; 318184033Smav struct partition *ppp; 319184033Smav struct g_slicer *gsp; 320184033Smav struct g_consumer *cp; 321184033Smav u_int secsize, u; 322184033Smav off_t mediasize; 323184033Smav 324184033Smav /* Basic check that this is indeed a disklabel. */ 325184033Smav error = g_bsd_checklabel(dl); 326184033Smav if (error) 327184033Smav return (error); 328184033Smav 329184033Smav /* Make sure the checksum is OK. */ 330184033Smav if (dkcksum(dl) != 0) 331184033Smav return (EINVAL); 332184033Smav 333184033Smav /* Get dimensions of our device. */ 334184033Smav cp = LIST_FIRST(&gp->consumer); 335184033Smav secsize = cp->provider->sectorsize; 336184033Smav mediasize = cp->provider->mediasize; 337184033Smav 338184033Smav#ifdef nolonger 339184033Smav /* 340184033Smav * The raw-partition must start at zero. We do not check that the 341184033Smav * size == mediasize because this is overly restrictive. We have 342184033Smav * already tested in g_bsd_checklabel() that it is not longer. 343184033Smav * XXX: RAW_PART is archaic anyway, and we should drop it. 344184033Smav */ 345184033Smav if (dl->d_partitions[RAW_PART].p_offset != 0) 346184033Smav return (EINVAL); 347184033Smav#endif 348184033Smav 349184033Smav#ifdef notyet 350184033Smav /* 351184033Smav * Indications are that the d_secperunit is not correctly 352184033Smav * initialized in many cases, and since we don't need it 353184033Smav * for anything, we dont strictly need this test. 354184033Smav * Preemptive action to avoid confusing people in disklabel(8) 355184033Smav * may be in order. 356184033Smav */ 357184033Smav /* The label cannot claim a larger size than the media. */ 358184033Smav if ((off_t)dl->d_secperunit * dl->d_secsize > mediasize) 359184033Smav return (EINVAL); 360184033Smav#endif 361184033Smav 362184033Smav 363184033Smav /* ... or a smaller sector size. */ 364184033Smav if (dl->d_secsize < secsize) 365163516Simp return (EINVAL); 366163516Simp 367163516Simp /* ... or a non-multiple sector size. */ 368163516Simp if (dl->d_secsize % secsize != 0) 369163516Simp return (EINVAL); 370163516Simp 371163516Simp g_topology_lock(); 372163516Simp 373163516Simp /* Don't munge open partitions. */ 374163516Simp gsp = gp->softc; 375169567Simp for (i = 0; i < dl->d_npartitions; i++) { 376163516Simp ppp = &dl->d_partitions[i]; 377163516Simp 378163516Simp error = g_slice_config(gp, i, G_SLICE_CONFIG_CHECK, 379163516Simp (off_t)ppp->p_offset * dl->d_secsize, 380163516Simp (off_t)ppp->p_size * dl->d_secsize, 381169567Simp dl->d_secsize, 382169567Simp "%s%c", gp->name, 'a' + i); 383169567Simp if (error) { 384163516Simp g_topology_unlock(); 385169567Simp return (error); 386169567Simp } 387183448Simp } 388183448Simp 389183448Simp /* Look good, go for it... */ 390183448Simp for (u = 0; u < gsp->nslice; u++) { 391183448Simp ppp = &dl->d_partitions[u]; 392183448Simp g_slice_config(gp, u, G_SLICE_CONFIG_SET, 393183448Simp (off_t)ppp->p_offset * dl->d_secsize, 394163516Simp (off_t)ppp->p_size * dl->d_secsize, 395163516Simp dl->d_secsize, 396184033Smav "%s%c", gp->name, 'a' + u); 397163516Simp } 398184033Smav return (0); 399184033Smav} 400184033Smav 401184033Smav/* 402184033Smav * Calculate a disklabel checksum for a little-endian byte-stream. 403184033Smav * We need access to the decoded disklabel because the checksum only 404184033Smav * covers the partition data for the first d_npartitions. 405163516Simp */ 406163516Simpstatic int 407183480Simpg_bsd_lesum(struct disklabel *dl, u_char *p) 408183480Simp{ 409183480Simp u_char *pe; 410183480Simp uint16_t sum; 411183480Simp 412163516Simp pe = p + 148 + 16 * dl->d_npartitions; 413163516Simp sum = 0; 414169567Simp while (p < pe) { 415169567Simp sum ^= g_dec_le2(p); 416169567Simp p += 2; 417169567Simp } 418169567Simp return (sum); 419169567Simp} 420169567Simp 421172836Sjulian/* 422163516Simp * This is an internal helper function, called multiple times from the taste 423163516Simp * function to try to locate a disklabel on the disk. More civilized formats 424183774Simp * will not need this, as there is only one possible place on disk to look 425183774Simp * for the magic spot. 426183774Simp */ 427183774Simp 428183774Simpstatic int 429183774Simpg_bsd_try(struct g_geom *gp, struct g_slicer *gsp, struct g_consumer *cp, int secsize, struct g_bsd_softc *ms, off_t offset) 430183774Simp{ 431183774Simp int error; 432183774Simp u_char *buf; 433183774Simp struct disklabel *dl; 434183774Simp off_t secoff; 435183774Simp 436183774Simp /* 437183774Simp * We need to read entire aligned sectors, and we assume that the 438183774Simp * disklabel does not span sectors, so one sector is enough. 439183774Simp */ 440183774Simp error = 0; 441183774Simp secoff = offset % secsize; 442183774Simp buf = g_read_data(cp, offset - secoff, secsize, &error); 443183774Simp if (buf == NULL || error != 0) 444163516Simp return (ENOENT); 445163516Simp 446163516Simp /* Decode into our native format. */ 447163516Simp dl = &ms->ondisk; 448163516Simp g_bsd_ledec_disklabel(buf + secoff, dl); 449163516Simp 450163516Simp ondisk2inram(ms); 451163516Simp 452163516Simp dl = &ms->inram; 453163516Simp /* Does it look like a label at all? */ 454163516Simp if (g_bsd_checklabel(dl)) 455163516Simp error = ENOENT; 456163516Simp /* ... and does the raw data have a good checksum? */ 457163516Simp if (error == 0 && g_bsd_lesum(dl, buf + secoff) != 0) 458163516Simp error = ENOENT; 459 460 /* Remember to free the buffer g_read_data() gave us. */ 461 g_free(buf); 462 463 /* If we had a label, record it properly. */ 464 if (error == 0) { 465 gsp->frontstuff = 16 * secsize; /* XXX */ 466 ms->labeloffset = offset; 467 g_topology_lock(); 468 g_slice_conf_hot(gp, 0, offset, g_bsd_ondisk_size()); 469 g_topology_unlock(); 470 } 471 return (error); 472} 473 474/* 475 * Implement certain ioctls to modify disklabels with. This function 476 * is called by the event handler thread with topology locked as result 477 * of the g_call_me() in g_bsd_start(). It is not necessary to keep 478 * topology locked all the time but make sure to return with topology 479 * locked as well. 480 */ 481 482static void 483g_bsd_ioctl(void *arg) 484{ 485 struct bio *bp; 486 struct g_geom *gp; 487 struct g_slicer *gsp; 488 struct g_bsd_softc *ms; 489 struct disklabel *dl; 490 struct g_ioctl *gio; 491 struct g_consumer *cp; 492 u_char *buf; 493 off_t secoff; 494 u_int secsize; 495 int error, i; 496 uint64_t sum; 497 498 /* We don't need topology for now. */ 499 g_topology_unlock(); 500 501 /* Get hold of the interesting bits from the bio. */ 502 bp = arg; 503 gp = bp->bio_to->geom; 504 gsp = gp->softc; 505 ms = gsp->softc; 506 gio = (struct g_ioctl *)bp->bio_data; 507 508 /* The disklabel to set is the ioctl argument. */ 509 dl = gio->data; 510 511 /* Validate and modify our slice instance to match. */ 512 error = g_bsd_modify(gp, dl); /* Picks up topology lock on success. */ 513 if (error) { 514 g_topology_lock(); 515 g_io_deliver(bp, error); 516 return; 517 } 518 /* Update our copy of the disklabel. */ 519 ms->inram = *dl; 520 inram2ondisk(ms); 521 522 if (gio->cmd == DIOCSDINFO) { 523 g_io_deliver(bp, 0); 524 return; 525 } 526 KASSERT(gio->cmd == DIOCWDINFO, ("Unknown ioctl in g_bsd_ioctl")); 527 cp = LIST_FIRST(&gp->consumer); 528 /* Get sector size, we need it to read data. */ 529 secsize = cp->provider->sectorsize; 530 secoff = ms->labeloffset % secsize; 531 buf = g_read_data(cp, ms->labeloffset - secoff, secsize, &error); 532 if (buf == NULL || error != 0) { 533 g_io_deliver(bp, error); 534 return; 535 } 536 dl = &ms->ondisk; 537 g_bsd_leenc_disklabel(buf + secoff, dl); 538 if (ms->labeloffset == ALPHA_LABEL_OFFSET) { 539 sum = 0; 540 for (i = 0; i < 63; i++) 541 sum += g_dec_le8(buf + i * 8); 542 g_enc_le8(buf + 504, sum); 543 } 544 error = g_write_data(cp, ms->labeloffset - secoff, buf, secsize); 545 g_free(buf); 546 g_io_deliver(bp, error); 547} 548 549/* 550 * Rewrite the bootblock, which is BBSIZE bytes from the start of the disk. 551 * We punch down the disklabel where we expect it to be before writing. 552 */ 553static int 554g_bsd_diocbsdbb(dev_t dev, u_long cmd, caddr_t data, int fflag, struct thread *td) 555{ 556 struct g_geom *gp; 557 struct g_slicer *gsp; 558 struct g_bsd_softc *ms; 559 struct disklabel *dl; 560 struct g_consumer *cp; 561 u_char *buf; 562 void *p; 563 u_int secsize; 564 int error, i; 565 uint64_t sum; 566 567 /* Get hold of the interesting bits from the bio. */ 568 gp = (void *)dev; 569 gsp = gp->softc; 570 ms = gsp->softc; 571 572 /* The disklabel to set is the ioctl argument. */ 573 buf = g_malloc(BBSIZE, 0); 574 p = *(void **)data; 575 error = copyin(p, buf, BBSIZE); 576 if (error) { 577 g_free(buf); 578 return (error); 579 } 580 /* The disklabel to set is the ioctl argument. */ 581 dl = (void *)(buf + ms->labeloffset); 582 583 DROP_GIANT(); 584 585 /* Validate and modify our slice instance to match. */ 586 error = g_bsd_modify(gp, dl); /* Picks up topology lock on success. */ 587 if (!error) { 588 cp = LIST_FIRST(&gp->consumer); 589 secsize = cp->provider->sectorsize; 590 dl = &ms->ondisk; 591 g_bsd_leenc_disklabel(buf + ms->labeloffset, dl); 592 if (ms->labeloffset == ALPHA_LABEL_OFFSET) { 593 sum = 0; 594 for (i = 0; i < 63; i++) 595 sum += g_dec_le8(buf + i * 8); 596 g_enc_le8(buf + 504, sum); 597 } 598 error = g_write_data(cp, 0, buf, BBSIZE); 599 g_topology_unlock(); 600 } 601 g_free(buf); 602 PICKUP_GIANT(); 603 return (error); 604} 605 606/* 607 * If the user tries to overwrite our disklabel through an open partition 608 * or via a magicwrite config call, we end up here and try to prevent 609 * footshooting as best we can. 610 */ 611static void 612g_bsd_hotwrite(void *arg) 613{ 614 struct bio *bp; 615 struct g_geom *gp; 616 struct g_slicer *gsp; 617 struct g_slice *gsl; 618 struct g_bsd_softc *ms; 619 struct g_bsd_softc fake; 620 u_char *p; 621 int error; 622 623 bp = arg; 624 gp = bp->bio_to->geom; 625 gsp = gp->softc; 626 ms = gsp->softc; 627 gsl = &gsp->slices[bp->bio_to->index]; 628 p = (u_char*)bp->bio_data + ms->labeloffset 629 - (bp->bio_offset + gsl->offset); 630 g_bsd_ledec_disklabel(p, &fake.ondisk); 631 632 ondisk2inram(&fake); 633 if (g_bsd_checklabel(&fake.inram)) { 634 g_io_deliver(bp, EPERM); 635 return; 636 } 637 if (g_bsd_lesum(&fake.ondisk, p) != 0) { 638 g_io_deliver(bp, EPERM); 639 return; 640 } 641 g_topology_unlock(); 642 error = g_bsd_modify(gp, &fake.inram); /* May pick up topology. */ 643 if (error) { 644 g_io_deliver(bp, EPERM); 645 g_topology_lock(); 646 return; 647 } 648 /* Update our copy of the disklabel. */ 649 ms->inram = fake.inram; 650 inram2ondisk(ms); 651 g_bsd_leenc_disklabel(p, &ms->ondisk); 652 g_slice_finish_hot(bp); 653} 654 655/*- 656 * This start routine is only called for non-trivial requests, all the 657 * trivial ones are handled autonomously by the slice code. 658 * For requests we handle here, we must call the g_io_deliver() on the 659 * bio, and return non-zero to indicate to the slice code that we did so. 660 * This code executes in the "DOWN" I/O path, this means: 661 * * No sleeping. 662 * * Don't grab the topology lock. 663 * * Don't call biowait, g_getattr(), g_setattr() or g_read_data() 664 */ 665 666static int 667g_bsd_start(struct bio *bp) 668{ 669 struct g_geom *gp; 670 struct g_bsd_softc *ms; 671 struct g_slicer *gsp; 672 struct g_ioctl *gio; 673 int error; 674 675 gp = bp->bio_to->geom; 676 gsp = gp->softc; 677 ms = gsp->softc; 678 switch(bp->bio_cmd) { 679 case BIO_READ: 680 /* We allow reading of our hot spots */ 681 return (0); 682 case BIO_DELETE: 683 /* We do not allow deleting our hot spots */ 684 return (EPERM); 685 case BIO_WRITE: 686 g_call_me(g_bsd_hotwrite, bp); 687 return (EJUSTRETURN); 688 case BIO_GETATTR: 689 if (g_handleattr(bp, "BSD::labelsum", ms->labelsum, 690 sizeof(ms->labelsum))) 691 return (1); 692 break; 693 case BIO_SETATTR: 694 break; 695 default: 696 KASSERT(0 == 1, ("Unknown bio_cmd in g_bsd_start (%d)", 697 bp->bio_cmd)); 698 } 699 700 /* We only handle ioctl(2) requests of the right format. */ 701 if (strcmp(bp->bio_attribute, "GEOM::ioctl")) 702 return (0); 703 else if (bp->bio_length != sizeof(*gio)) 704 return (0); 705 706 /* Get hold of the ioctl parameters. */ 707 gio = (struct g_ioctl *)bp->bio_data; 708 709 switch (gio->cmd) { 710 case DIOCGDINFO: 711 /* Return a copy of the disklabel to userland. */ 712 bcopy(&ms->inram, gio->data, sizeof(ms->inram)); 713 g_io_deliver(bp, 0); 714 return (1); 715 case DIOCBSDBB: 716 gio->func = g_bsd_diocbsdbb; 717 gio->dev = (void *)gp; 718 g_io_deliver(bp, EDIRIOCTL); 719 return (1); 720 case DIOCSDINFO: 721 case DIOCWDINFO: 722 /* 723 * These we cannot do without the topology lock and some 724 * some I/O requests. Ask the event-handler to schedule 725 * us in a less restricted environment. 726 */ 727 error = g_call_me(g_bsd_ioctl, bp); 728 if (error) 729 g_io_deliver(bp, error); 730 /* 731 * We must return non-zero to indicate that we will deal 732 * with this bio, even though we have not done so yet. 733 */ 734 return (1); 735 default: 736 return (0); 737 } 738} 739 740/* 741 * Dump configuration information in XML format. 742 * Notice that the function is called once for the geom and once for each 743 * consumer and provider. We let g_slice_dumpconf() do most of the work. 744 */ 745static void 746g_bsd_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp) 747{ 748 struct g_bsd_softc *ms; 749 struct g_slicer *gsp; 750 751 gsp = gp->softc; 752 ms = gsp->softc; 753 g_slice_dumpconf(sb, indent, gp, cp, pp); 754 if (indent != NULL && pp == NULL && cp == NULL) { 755 sbuf_printf(sb, "%s<labeloffset>%jd</labeloffset>\n", 756 indent, (intmax_t)ms->labeloffset); 757 sbuf_printf(sb, "%s<rawoffset>%jd</rawoffset>\n", 758 indent, (intmax_t)ms->rawoffset); 759 sbuf_printf(sb, "%s<mbroffset>%jd</mbroffset>\n", 760 indent, (intmax_t)ms->mbroffset); 761 } else if (pp != NULL) { 762 if (indent == NULL) 763 sbuf_printf(sb, " ty %d", 764 ms->inram.d_partitions[pp->index].p_fstype); 765 else 766 sbuf_printf(sb, "%s<type>%d</type>\n", indent, 767 ms->inram.d_partitions[pp->index].p_fstype); 768 } 769} 770 771/* 772 * The taste function is called from the event-handler, with the topology 773 * lock already held and a provider to examine. The flags are unused. 774 * 775 * If flags == G_TF_NORMAL, the idea is to take a bite of the provider and 776 * if we find valid, consistent magic on it, build a geom on it. 777 * any magic bits which indicate that we should automatically put a BSD 778 * geom on it. 779 * 780 * There may be cases where the operator would like to put a BSD-geom on 781 * providers which do not meet all of the requirements. This can be done 782 * by instead passing the G_TF_INSIST flag, which will override these 783 * checks. 784 * 785 * The final flags value is G_TF_TRANSPARENT, which instructs the method 786 * to put a geom on top of the provider and configure it to be as transparent 787 * as possible. This is not really relevant to the BSD method and therefore 788 * not implemented here. 789 */ 790 791static struct g_geom * 792g_bsd_taste(struct g_class *mp, struct g_provider *pp, int flags) 793{ 794 struct g_geom *gp; 795 struct g_consumer *cp; 796 int error, i; 797 struct g_bsd_softc *ms; 798 struct disklabel *dl; 799 u_int secsize; 800 struct g_slicer *gsp; 801 MD5_CTX md5sum; 802 u_char hash[16]; 803 804 g_trace(G_T_TOPOLOGY, "bsd_taste(%s,%s)", mp->name, pp->name); 805 g_topology_assert(); 806 807 /* We don't implement transparent inserts. */ 808 if (flags == G_TF_TRANSPARENT) 809 return (NULL); 810 811 /* 812 * BSD labels are a subclass of the general "slicing" topology so 813 * a lot of the work can be done by the common "slice" code. 814 * Create a geom with space for MAXPARTITIONS providers, one consumer 815 * and a softc structure for us. Specify the provider to attach 816 * the consumer to and our "start" routine for special requests. 817 * The provider is opened with mode (1,0,0) so we can do reads 818 * from it. 819 */ 820 gp = g_slice_new(mp, MAXPARTITIONS, pp, &cp, &ms, 821 sizeof(*ms), g_bsd_start); 822 if (gp == NULL) 823 return (NULL); 824 825 /* 826 * Now that we have attached to and opened our provider, we do 827 * not need the topology lock until we change the topology again 828 * next time. 829 */ 830 g_topology_unlock(); 831 832 /* 833 * Fill in the optional details, in our case we have a dumpconf 834 * routine which the "slice" code should call at the right time 835 */ 836 gp->dumpconf = g_bsd_dumpconf; 837 838 /* Get the geom_slicer softc from the geom. */ 839 gsp = gp->softc; 840 841 /* 842 * The do...while loop here allows us to have multiple escapes 843 * using a simple "break". This improves code clarity without 844 * ending up in deep nesting and without using goto or come from. 845 */ 846 do { 847 /* 848 * If the provider is an MBR we will only auto attach 849 * to type 165 slices in the G_TF_NORMAL case. We will 850 * attach to any other type. 851 */ 852 error = g_getattr("MBR::type", cp, &i); 853 if (!error) { 854 if (i != 165 && flags == G_TF_NORMAL) 855 break; 856 error = g_getattr("MBR::offset", cp, &ms->mbroffset); 857 if (error) 858 break; 859 } 860 861 /* Same thing if we are inside a PC98 */ 862 error = g_getattr("PC98::type", cp, &i); 863 if (!error) { 864 if (i != 0xc494 && flags == G_TF_NORMAL) 865 break; 866 error = g_getattr("PC98::offset", cp, &ms->mbroffset); 867 if (error) 868 break; 869 } 870 871 /* Get sector size, we need it to read data. */ 872 secsize = cp->provider->sectorsize; 873 if (secsize < 512) 874 break; 875 876 /* First look for a label at the start of the second sector. */ 877 error = g_bsd_try(gp, gsp, cp, secsize, ms, secsize); 878 879 /* Next, look for alpha labels */ 880 if (error) 881 error = g_bsd_try(gp, gsp, cp, secsize, ms, 882 ALPHA_LABEL_OFFSET); 883 884 /* If we didn't find a label, punt. */ 885 if (error) 886 break; 887 888 /* 889 * In order to avoid recursively attaching to the same 890 * on-disk label (it's usually visible through the 'c' 891 * partition) we calculate an MD5 and ask if other BSD's 892 * below us love that label. If they do, we don't. 893 */ 894 895 dl = &ms->inram; 896 MD5Init(&md5sum); 897 MD5Update(&md5sum, (u_char *)dl, sizeof(dl)); 898 MD5Final(ms->labelsum, &md5sum); 899 900 error = g_getattr("BSD::labelsum", cp, &hash); 901 if (!error && !strncmp(ms->labelsum, hash, sizeof(hash))) 902 break; 903 904 /* 905 * Process the found disklabel, and modify our "slice" 906 * instance to match it, if possible. 907 */ 908 error = g_bsd_modify(gp, dl); /* Picks up topology lock. */ 909 if (!error) 910 g_topology_unlock(); 911 break; 912 } while (0); 913 914 /* Success or failure, we can close our provider now. */ 915 g_topology_lock(); 916 error = g_access_rel(cp, -1, 0, 0); 917 918 /* If we have configured any providers, return the new geom. */ 919 if (gsp->nprovider > 0) 920 return (gp); 921 /* 922 * ...else push the "self-destruct" button, by spoiling our own 923 * consumer. This triggers a call to g_std_spoiled which will 924 * dismantle what was setup. 925 */ 926 g_std_spoiled(cp); 927 return (NULL); 928} 929 930/* Finally, register with GEOM infrastructure. */ 931static struct g_class g_bsd_class = { 932 BSD_CLASS_NAME, 933 g_bsd_taste, 934 NULL, 935 G_CLASS_INITIALIZER 936}; 937 938DECLARE_GEOM_CLASS(g_bsd_class, g_bsd); 939