geom_vinum_drive.c revision 184292
1130389Sle/*- 2142020Sle * Copyright (c) 2004, 2005 Lukas Ertl 3130389Sle * All rights reserved. 4130389Sle * 5130389Sle * Redistribution and use in source and binary forms, with or without 6130389Sle * modification, are permitted provided that the following conditions 7130389Sle * are met: 8130389Sle * 1. Redistributions of source code must retain the above copyright 9130389Sle * notice, this list of conditions and the following disclaimer. 10130389Sle * 2. Redistributions in binary form must reproduce the above copyright 11130389Sle * notice, this list of conditions and the following disclaimer in the 12130389Sle * documentation and/or other materials provided with the distribution. 13130389Sle * 14130389Sle * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15130389Sle * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16130389Sle * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17130389Sle * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18130389Sle * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19130389Sle * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20130389Sle * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21130389Sle * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22130389Sle * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23130389Sle * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24130389Sle * SUCH DAMAGE. 25130389Sle */ 26130389Sle 27130389Sle#include <sys/cdefs.h> 28130389Sle__FBSDID("$FreeBSD: head/sys/geom/vinum/geom_vinum_drive.c 184292 2008-10-26 17:20:37Z lulf $"); 29130389Sle 30130389Sle#include <sys/param.h> 31130389Sle#include <sys/bio.h> 32130389Sle#include <sys/errno.h> 33183514Slulf#include <sys/endian.h> 34130389Sle#include <sys/conf.h> 35130389Sle#include <sys/kernel.h> 36130389Sle#include <sys/kthread.h> 37130389Sle#include <sys/libkern.h> 38130389Sle#include <sys/lock.h> 39130389Sle#include <sys/malloc.h> 40130389Sle#include <sys/module.h> 41130389Sle#include <sys/mutex.h> 42130389Sle#include <sys/sbuf.h> 43130389Sle#include <sys/systm.h> 44130389Sle#include <sys/time.h> 45181803Sbz#include <sys/vimage.h> 46130389Sle 47130389Sle#include <geom/geom.h> 48130389Sle#include <geom/vinum/geom_vinum_var.h> 49130389Sle#include <geom/vinum/geom_vinum.h> 50130389Sle#include <geom/vinum/geom_vinum_share.h> 51130389Sle 52183514Slulf#define GV_LEGACY_I386 0 53183514Slulf#define GV_LEGACY_AMD64 1 54183514Slulf#define GV_LEGACY_SPARC64 2 55183514Slulf#define GV_LEGACY_POWERPC 3 56183514Slulf 57146325Slestatic void gv_drive_dead(void *, int); 58135173Slestatic void gv_drive_worker(void *); 59183514Slulfstatic int gv_legacy_header_type(uint8_t *, int); 60130389Sle 61183514Slulf/* 62183514Slulf * Here are the "offset (size)" for the various struct gv_hdr fields, 63183514Slulf * for the legacy i386 (or 32-bit powerpc), legacy amd64 (or sparc64), and 64183514Slulf * current (cpu & endian agnostic) versions of the on-disk format of the vinum 65183514Slulf * header structure: 66183514Slulf * 67183514Slulf * i386 amd64 current field 68183514Slulf * -------- -------- -------- ----- 69183514Slulf * 0 ( 8) 0 ( 8) 0 ( 8) magic 70183514Slulf * 8 ( 4) 8 ( 8) 8 ( 8) config_length 71183514Slulf * 12 (32) 16 (32) 16 (32) label.sysname 72183514Slulf * 44 (32) 48 (32) 48 (32) label.name 73183514Slulf * 76 ( 4) 80 ( 8) 80 ( 8) label.date_of_birth.tv_sec 74183514Slulf * 80 ( 4) 88 ( 8) 88 ( 8) label.date_of_birth.tv_usec 75183514Slulf * 84 ( 4) 96 ( 8) 96 ( 8) label.last_update.tv_sec 76183514Slulf * 88 ( 4) 104 ( 8) 104 ( 8) label.last_update.tv_usec 77183514Slulf * 92 ( 8) 112 ( 8) 112 ( 8) label.drive_size 78183514Slulf * ======== ======== ======== 79183514Slulf * 100 120 120 total size 80183514Slulf * 81183514Slulf * NOTE: i386 and amd64 formats are stored as little-endian; the current 82183514Slulf * format uses big-endian (network order). 83183514Slulf */ 84183514Slulf 85183514Slulf 86183514Slulf/* Checks for legacy format depending on platform. */ 87183514Slulfstatic int 88183514Slulfgv_legacy_header_type(uint8_t *hdr, int bigendian) 89183514Slulf{ 90183514Slulf uint32_t *i32; 91183514Slulf int arch_32, arch_64, i; 92183514Slulf 93183514Slulf /* Set arch according to endianess. */ 94183514Slulf if (bigendian) { 95183514Slulf arch_32 = GV_LEGACY_POWERPC; 96183514Slulf arch_64 = GV_LEGACY_SPARC64; 97183514Slulf } else { 98183514Slulf arch_32 = GV_LEGACY_I386; 99183514Slulf arch_64 = GV_LEGACY_AMD64; 100183514Slulf } 101183514Slulf 102183514Slulf /* if non-empty hostname overlaps 64-bit config_length */ 103183514Slulf i32 = (uint32_t *)(hdr + 12); 104183514Slulf if (*i32 != 0) 105183514Slulf return (arch_32); 106183514Slulf /* check for non-empty hostname */ 107183514Slulf if (hdr[16] != 0) 108183514Slulf return (arch_64); 109183514Slulf /* check bytes past 32-bit structure */ 110183514Slulf for (i = 100; i < 120; i++) 111183514Slulf if (hdr[i] != 0) 112183514Slulf return (arch_32); 113183514Slulf /* check for overlapping timestamp */ 114183514Slulf i32 = (uint32_t *)(hdr + 84); 115183514Slulf 116183514Slulf if (*i32 == 0) 117183514Slulf return (arch_64); 118183514Slulf return (arch_32); 119183514Slulf} 120183514Slulf 121183514Slulf/* 122183514Slulf * Read the header while taking magic number into account, and write it to 123183514Slulf * destination pointer. 124183514Slulf */ 125183514Slulfint 126183514Slulfgv_read_header(struct g_consumer *cp, struct gv_hdr *m_hdr) 127183514Slulf{ 128183514Slulf struct g_provider *pp; 129183514Slulf uint64_t magic_machdep; 130183514Slulf uint8_t *d_hdr; 131183514Slulf int be, off; 132183514Slulf 133183514Slulf#define GV_GET32(endian) \ 134183514Slulf endian##32toh(*((uint32_t *)&d_hdr[off])); \ 135183514Slulf off += 4 136183514Slulf#define GV_GET64(endian) \ 137183514Slulf endian##64toh(*((uint64_t *)&d_hdr[off])); \ 138183514Slulf off += 8 139183514Slulf 140183514Slulf KASSERT(m_hdr != NULL, ("gv_read_header: null m_hdr")); 141183514Slulf KASSERT(cp != NULL, ("gv_read_header: null cp")); 142183514Slulf pp = cp->provider; 143183514Slulf KASSERT(pp != NULL, ("gv_read_header: null pp")); 144183514Slulf 145183514Slulf d_hdr = g_read_data(cp, GV_HDR_OFFSET, pp->sectorsize, NULL); 146183514Slulf if (d_hdr == NULL) 147183514Slulf return (-1); 148183514Slulf off = 0; 149183514Slulf m_hdr->magic = GV_GET64(be); 150183514Slulf magic_machdep = *((uint64_t *)&d_hdr[0]); 151183514Slulf /* 152183514Slulf * The big endian machines will have a reverse of GV_OLD_MAGIC, so we 153183514Slulf * need to decide if we are running on a big endian machine as well as 154183514Slulf * checking the magic against the reverse of GV_OLD_MAGIC. 155183514Slulf */ 156183514Slulf be = (m_hdr->magic == magic_machdep); 157183514Slulf if (m_hdr->magic == GV_MAGIC) { 158183514Slulf m_hdr->config_length = GV_GET64(be); 159183514Slulf off = 16; 160183514Slulf bcopy(d_hdr + off, m_hdr->label.sysname, GV_HOSTNAME_LEN); 161183514Slulf off += GV_HOSTNAME_LEN; 162183514Slulf bcopy(d_hdr + off, m_hdr->label.name, GV_MAXDRIVENAME); 163183514Slulf off += GV_MAXDRIVENAME; 164183514Slulf m_hdr->label.date_of_birth.tv_sec = GV_GET64(be); 165183514Slulf m_hdr->label.date_of_birth.tv_usec = GV_GET64(be); 166183514Slulf m_hdr->label.last_update.tv_sec = GV_GET64(be); 167183514Slulf m_hdr->label.last_update.tv_usec = GV_GET64(be); 168183514Slulf m_hdr->label.drive_size = GV_GET64(be); 169183514Slulf } else if (m_hdr->magic != GV_OLD_MAGIC && 170183514Slulf m_hdr->magic != le64toh(GV_OLD_MAGIC)) { 171183514Slulf /* Not a gvinum drive. */ 172183514Slulf g_free(d_hdr); 173183514Slulf return (-1); 174183514Slulf } else if (gv_legacy_header_type(d_hdr, be) == GV_LEGACY_SPARC64) { 175184292Slulf G_VINUM_DEBUG(1, "detected legacy sparc64 header"); 176183514Slulf m_hdr->magic = GV_MAGIC; 177183514Slulf /* Legacy sparc64 on-disk header */ 178183514Slulf m_hdr->config_length = GV_GET64(be); 179183514Slulf bcopy(d_hdr + 16, m_hdr->label.sysname, GV_HOSTNAME_LEN); 180183514Slulf off += GV_HOSTNAME_LEN; 181183514Slulf bcopy(d_hdr + 48, m_hdr->label.name, GV_MAXDRIVENAME); 182183514Slulf off += GV_MAXDRIVENAME; 183183514Slulf m_hdr->label.date_of_birth.tv_sec = GV_GET64(be); 184183514Slulf m_hdr->label.date_of_birth.tv_usec = GV_GET64(be); 185183514Slulf m_hdr->label.last_update.tv_sec = GV_GET64(be); 186183514Slulf m_hdr->label.last_update.tv_usec = GV_GET64(be); 187183514Slulf m_hdr->label.drive_size = GV_GET64(be); 188183514Slulf } else if (gv_legacy_header_type(d_hdr, be) == GV_LEGACY_POWERPC) { 189184292Slulf G_VINUM_DEBUG(1, "detected legacy PowerPC header"); 190183514Slulf m_hdr->magic = GV_MAGIC; 191183514Slulf /* legacy 32-bit big endian on-disk header */ 192183514Slulf m_hdr->config_length = GV_GET32(be); 193183514Slulf bcopy(d_hdr + off, m_hdr->label.sysname, GV_HOSTNAME_LEN); 194183514Slulf off += GV_HOSTNAME_LEN; 195183514Slulf bcopy(d_hdr + off, m_hdr->label.name, GV_MAXDRIVENAME); 196183514Slulf off += GV_MAXDRIVENAME; 197183514Slulf m_hdr->label.date_of_birth.tv_sec = GV_GET32(be); 198183514Slulf m_hdr->label.date_of_birth.tv_usec = GV_GET32(be); 199183514Slulf m_hdr->label.last_update.tv_sec = GV_GET32(be); 200183514Slulf m_hdr->label.last_update.tv_usec = GV_GET32(be); 201183514Slulf m_hdr->label.drive_size = GV_GET64(be); 202183514Slulf } else if (gv_legacy_header_type(d_hdr, be) == GV_LEGACY_I386) { 203184292Slulf G_VINUM_DEBUG(1, "detected legacy i386 header"); 204183514Slulf m_hdr->magic = GV_MAGIC; 205183514Slulf /* legacy i386 on-disk header */ 206183514Slulf m_hdr->config_length = GV_GET32(le); 207183514Slulf bcopy(d_hdr + off, m_hdr->label.sysname, GV_HOSTNAME_LEN); 208183514Slulf off += GV_HOSTNAME_LEN; 209183514Slulf bcopy(d_hdr + off, m_hdr->label.name, GV_MAXDRIVENAME); 210183514Slulf off += GV_MAXDRIVENAME; 211183514Slulf m_hdr->label.date_of_birth.tv_sec = GV_GET32(le); 212183514Slulf m_hdr->label.date_of_birth.tv_usec = GV_GET32(le); 213183514Slulf m_hdr->label.last_update.tv_sec = GV_GET32(le); 214183514Slulf m_hdr->label.last_update.tv_usec = GV_GET32(le); 215183514Slulf m_hdr->label.drive_size = GV_GET64(le); 216183514Slulf } else { 217184292Slulf G_VINUM_DEBUG(1, "detected legacy amd64 header"); 218183514Slulf m_hdr->magic = GV_MAGIC; 219183514Slulf /* legacy amd64 on-disk header */ 220183514Slulf m_hdr->config_length = GV_GET64(le); 221183514Slulf bcopy(d_hdr + 16, m_hdr->label.sysname, GV_HOSTNAME_LEN); 222183514Slulf off += GV_HOSTNAME_LEN; 223183514Slulf bcopy(d_hdr + 48, m_hdr->label.name, GV_MAXDRIVENAME); 224183514Slulf off += GV_MAXDRIVENAME; 225183514Slulf m_hdr->label.date_of_birth.tv_sec = GV_GET64(le); 226183514Slulf m_hdr->label.date_of_birth.tv_usec = GV_GET64(le); 227183514Slulf m_hdr->label.last_update.tv_sec = GV_GET64(le); 228183514Slulf m_hdr->label.last_update.tv_usec = GV_GET64(le); 229183514Slulf m_hdr->label.drive_size = GV_GET64(le); 230183514Slulf } 231183514Slulf 232183514Slulf g_free(d_hdr); 233183514Slulf return (0); 234183514Slulf} 235183514Slulf 236183514Slulf/* Write out the gvinum header. */ 237183514Slulfint 238183514Slulfgv_write_header(struct g_consumer *cp, struct gv_hdr *m_hdr) 239183514Slulf{ 240183514Slulf uint8_t d_hdr[GV_HDR_LEN]; 241183514Slulf int off, ret; 242183514Slulf 243183514Slulf#define GV_SET64BE(field) \ 244183514Slulf do { \ 245183514Slulf *((uint64_t *)&d_hdr[off]) = htobe64(field); \ 246183514Slulf off += 8; \ 247183514Slulf } while (0) 248183514Slulf 249183514Slulf KASSERT(m_hdr != NULL, ("gv_write_header: null m_hdr")); 250183514Slulf 251183514Slulf off = 0; 252183514Slulf memset(d_hdr, 0, GV_HDR_LEN); 253183514Slulf GV_SET64BE(m_hdr->magic); 254183514Slulf GV_SET64BE(m_hdr->config_length); 255183514Slulf off = 16; 256183514Slulf bcopy(m_hdr->label.sysname, d_hdr + off, GV_HOSTNAME_LEN); 257183514Slulf off += GV_HOSTNAME_LEN; 258183514Slulf bcopy(m_hdr->label.name, d_hdr + off, GV_MAXDRIVENAME); 259183514Slulf off += GV_MAXDRIVENAME; 260183514Slulf GV_SET64BE(m_hdr->label.date_of_birth.tv_sec); 261183514Slulf GV_SET64BE(m_hdr->label.date_of_birth.tv_usec); 262183514Slulf GV_SET64BE(m_hdr->label.last_update.tv_sec); 263183514Slulf GV_SET64BE(m_hdr->label.last_update.tv_usec); 264183514Slulf GV_SET64BE(m_hdr->label.drive_size); 265183514Slulf 266183514Slulf ret = g_write_data(cp, GV_HDR_OFFSET, d_hdr, GV_HDR_LEN); 267183514Slulf return (ret); 268183514Slulf} 269183514Slulf 270130389Slevoid 271134407Slegv_config_new_drive(struct gv_drive *d) 272134407Sle{ 273134407Sle struct gv_hdr *vhdr; 274134407Sle struct gv_freelist *fl; 275134407Sle 276134407Sle KASSERT(d != NULL, ("config_new_drive: NULL d")); 277134407Sle 278134407Sle vhdr = g_malloc(sizeof(*vhdr), M_WAITOK | M_ZERO); 279134407Sle vhdr->magic = GV_MAGIC; 280134407Sle vhdr->config_length = GV_CFG_LEN; 281134407Sle 282180291Srwatson mtx_lock(&hostname_mtx); 283181803Sbz bcopy(G_hostname, vhdr->label.sysname, GV_HOSTNAME_LEN); 284180291Srwatson mtx_unlock(&hostname_mtx); 285134407Sle strncpy(vhdr->label.name, d->name, GV_MAXDRIVENAME); 286134407Sle microtime(&vhdr->label.date_of_birth); 287134407Sle 288134407Sle d->hdr = vhdr; 289134407Sle 290134407Sle LIST_INIT(&d->subdisks); 291134407Sle LIST_INIT(&d->freelist); 292134407Sle 293134407Sle fl = g_malloc(sizeof(struct gv_freelist), M_WAITOK | M_ZERO); 294134407Sle fl->offset = GV_DATA_START; 295134407Sle fl->size = d->avail; 296134407Sle LIST_INSERT_HEAD(&d->freelist, fl, freelist); 297134407Sle d->freelist_entries = 1; 298135173Sle 299154075Sle d->bqueue = g_malloc(sizeof(struct bio_queue_head), M_WAITOK | M_ZERO); 300154075Sle bioq_init(d->bqueue); 301135173Sle mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF); 302172836Sjulian kproc_create(gv_drive_worker, d, NULL, 0, 0, "gv_d %s", d->name); 303135173Sle d->flags |= GV_DRIVE_THREAD_ACTIVE; 304134407Sle} 305134407Sle 306134407Slevoid 307130389Slegv_save_config_all(struct gv_softc *sc) 308130389Sle{ 309130389Sle struct gv_drive *d; 310130389Sle 311130389Sle g_topology_assert(); 312130389Sle 313130389Sle LIST_FOREACH(d, &sc->drives, drive) { 314130389Sle if (d->geom == NULL) 315130389Sle continue; 316130389Sle gv_save_config(NULL, d, sc); 317130389Sle } 318130389Sle} 319130389Sle 320130389Sle/* Save the vinum configuration back to disk. */ 321130389Slevoid 322130389Slegv_save_config(struct g_consumer *cp, struct gv_drive *d, struct gv_softc *sc) 323130389Sle{ 324130389Sle struct g_geom *gp; 325130389Sle struct g_consumer *cp2; 326130389Sle struct gv_hdr *vhdr, *hdr; 327130389Sle struct sbuf *sb; 328130389Sle int error; 329130389Sle 330130389Sle g_topology_assert(); 331130389Sle 332130389Sle KASSERT(d != NULL, ("gv_save_config: null d")); 333130389Sle KASSERT(sc != NULL, ("gv_save_config: null sc")); 334130389Sle 335149094Sle /* 336149094Sle * We can't save the config on a drive that isn't up, but drives that 337149094Sle * were just created aren't officially up yet, so we check a special 338149094Sle * flag. 339149094Sle */ 340149094Sle if ((d->state != GV_DRIVE_UP) && !(d->flags && GV_DRIVE_NEWBORN)) 341146325Sle return; 342146325Sle 343130389Sle if (cp == NULL) { 344130389Sle gp = d->geom; 345130389Sle KASSERT(gp != NULL, ("gv_save_config: null gp")); 346130389Sle cp2 = LIST_FIRST(&gp->consumer); 347130389Sle KASSERT(cp2 != NULL, ("gv_save_config: null cp2")); 348130389Sle } else 349130389Sle cp2 = cp; 350130389Sle 351130389Sle vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO); 352130389Sle vhdr->magic = GV_MAGIC; 353130389Sle vhdr->config_length = GV_CFG_LEN; 354130389Sle 355130389Sle hdr = d->hdr; 356130389Sle if (hdr == NULL) { 357184292Slulf G_VINUM_DEBUG(0, "drive %s has NULL hdr", d->name); 358130389Sle g_free(vhdr); 359130389Sle return; 360130389Sle } 361130389Sle microtime(&hdr->label.last_update); 362130389Sle bcopy(&hdr->label, &vhdr->label, sizeof(struct gv_label)); 363130389Sle 364130389Sle sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN); 365130389Sle gv_format_config(sc, sb, 1, NULL); 366130389Sle sbuf_finish(sb); 367130389Sle 368130389Sle error = g_access(cp2, 0, 1, 0); 369130389Sle if (error) { 370184292Slulf G_VINUM_DEBUG(0, "g_access failed on drive %s, errno %d", 371146325Sle d->name, error); 372130389Sle sbuf_delete(sb); 373146325Sle g_free(vhdr); 374130389Sle return; 375130389Sle } 376130389Sle g_topology_unlock(); 377130389Sle 378130389Sle do { 379183514Slulf error = gv_write_header(cp2, vhdr); 380130389Sle if (error) { 381184292Slulf G_VINUM_DEBUG(0, "writing vhdr failed on drive %s, " 382146325Sle "errno %d", d->name, error); 383130389Sle break; 384130389Sle } 385130389Sle 386130389Sle error = g_write_data(cp2, GV_CFG_OFFSET, sbuf_data(sb), 387130389Sle GV_CFG_LEN); 388130389Sle if (error) { 389184292Slulf G_VINUM_DEBUG(0, "writing first config copy failed " 390146325Sle "on drive %s, errno %d", d->name, error); 391130389Sle break; 392130389Sle } 393130389Sle 394130389Sle error = g_write_data(cp2, GV_CFG_OFFSET + GV_CFG_LEN, 395130389Sle sbuf_data(sb), GV_CFG_LEN); 396130389Sle if (error) 397184292Slulf G_VINUM_DEBUG(0, "writing second config copy failed " 398146325Sle "on drive %s, errno %d", d->name, error); 399130389Sle } while (0); 400130389Sle 401130389Sle g_topology_lock(); 402130389Sle g_access(cp2, 0, -1, 0); 403130389Sle sbuf_delete(sb); 404130389Sle g_free(vhdr); 405130389Sle 406130389Sle if (d->geom != NULL) 407130389Sle gv_drive_modify(d); 408130389Sle} 409130389Sle 410130389Sle/* This resembles g_slice_access(). */ 411130389Slestatic int 412130389Slegv_drive_access(struct g_provider *pp, int dr, int dw, int de) 413130389Sle{ 414130389Sle struct g_geom *gp; 415130389Sle struct g_consumer *cp; 416130389Sle struct g_provider *pp2; 417130389Sle struct gv_drive *d; 418130389Sle struct gv_sd *s, *s2; 419130389Sle int error; 420130389Sle 421130389Sle gp = pp->geom; 422130389Sle cp = LIST_FIRST(&gp->consumer); 423135173Sle if (cp == NULL) 424135173Sle return (0); 425130389Sle 426130389Sle d = gp->softc; 427146325Sle if (d == NULL) 428146325Sle return (0); 429130389Sle 430130389Sle s = pp->private; 431130389Sle KASSERT(s != NULL, ("gv_drive_access: NULL s")); 432130389Sle 433130389Sle LIST_FOREACH(s2, &d->subdisks, from_drive) { 434130389Sle if (s == s2) 435130389Sle continue; 436130389Sle if (s->drive_offset + s->size <= s2->drive_offset) 437130389Sle continue; 438130389Sle if (s2->drive_offset + s2->size <= s->drive_offset) 439130389Sle continue; 440130389Sle 441130389Sle /* Overlap. */ 442130389Sle pp2 = s2->provider; 443130389Sle KASSERT(s2 != NULL, ("gv_drive_access: NULL s2")); 444146325Sle if ((pp->acw + dw) > 0 && pp2->ace > 0) 445130389Sle return (EPERM); 446146325Sle if ((pp->ace + de) > 0 && pp2->acw > 0) 447130389Sle return (EPERM); 448130389Sle } 449130389Sle 450130389Sle error = g_access(cp, dr, dw, de); 451130389Sle return (error); 452130389Sle} 453130389Sle 454130389Slestatic void 455135173Slegv_drive_done(struct bio *bp) 456135173Sle{ 457135173Sle struct gv_drive *d; 458135173Sle 459135173Sle /* Put the BIO on the worker queue again. */ 460135173Sle d = bp->bio_from->geom->softc; 461135173Sle bp->bio_cflags |= GV_BIO_DONE; 462135173Sle mtx_lock(&d->bqueue_mtx); 463154075Sle bioq_insert_tail(d->bqueue, bp); 464135173Sle wakeup(d); 465135173Sle mtx_unlock(&d->bqueue_mtx); 466135173Sle} 467135173Sle 468135173Sle 469135173Slestatic void 470130389Slegv_drive_start(struct bio *bp) 471130389Sle{ 472130389Sle struct gv_drive *d; 473130389Sle struct gv_sd *s; 474130389Sle 475135173Sle switch (bp->bio_cmd) { 476135173Sle case BIO_READ: 477135173Sle case BIO_WRITE: 478135173Sle case BIO_DELETE: 479135173Sle break; 480135173Sle case BIO_GETATTR: 481135173Sle default: 482135173Sle g_io_deliver(bp, EOPNOTSUPP); 483135173Sle return; 484135173Sle } 485130389Sle 486135173Sle s = bp->bio_to->private; 487130389Sle if ((s->state == GV_SD_DOWN) || (s->state == GV_SD_STALE)) { 488130389Sle g_io_deliver(bp, ENXIO); 489130389Sle return; 490130389Sle } 491130389Sle 492135173Sle d = bp->bio_to->geom->softc; 493130389Sle 494135173Sle /* 495135173Sle * Put the BIO on the worker queue, where the worker thread will pick 496135173Sle * it up. 497135173Sle */ 498135173Sle mtx_lock(&d->bqueue_mtx); 499154075Sle bioq_disksort(d->bqueue, bp); 500135173Sle wakeup(d); 501135173Sle mtx_unlock(&d->bqueue_mtx); 502130389Sle 503135173Sle} 504135173Sle 505135173Slestatic void 506135173Slegv_drive_worker(void *arg) 507135173Sle{ 508135173Sle struct bio *bp, *cbp; 509135173Sle struct g_geom *gp; 510135173Sle struct g_provider *pp; 511135173Sle struct gv_drive *d; 512135173Sle struct gv_sd *s; 513135173Sle int error; 514135173Sle 515135173Sle d = arg; 516135173Sle 517135173Sle mtx_lock(&d->bqueue_mtx); 518135173Sle for (;;) { 519135173Sle /* We were signaled to exit. */ 520135173Sle if (d->flags & GV_DRIVE_THREAD_DIE) 521135173Sle break; 522135173Sle 523135173Sle /* Take the first BIO from out queue. */ 524154075Sle bp = bioq_takefirst(d->bqueue); 525154075Sle if (bp == NULL) { 526135173Sle msleep(d, &d->bqueue_mtx, PRIBIO, "-", hz/10); 527135173Sle continue; 528135173Sle } 529135173Sle mtx_unlock(&d->bqueue_mtx); 530135173Sle 531135173Sle pp = bp->bio_to; 532135173Sle gp = pp->geom; 533135173Sle 534135173Sle /* Completed request. */ 535135173Sle if (bp->bio_cflags & GV_BIO_DONE) { 536135173Sle error = bp->bio_error; 537135173Sle 538135173Sle /* Deliver the original request. */ 539135173Sle g_std_done(bp); 540135173Sle 541135173Sle /* The request had an error, we need to clean up. */ 542135173Sle if (error != 0) { 543135173Sle g_topology_lock(); 544135173Sle gv_set_drive_state(d, GV_DRIVE_DOWN, 545135173Sle GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG); 546135173Sle g_topology_unlock(); 547146325Sle g_post_event(gv_drive_dead, d, M_WAITOK, d, 548146325Sle NULL); 549135173Sle } 550135173Sle 551135173Sle /* New request, needs to be sent downwards. */ 552135173Sle } else { 553135173Sle s = pp->private; 554135173Sle 555135173Sle if ((s->state == GV_SD_DOWN) || 556135173Sle (s->state == GV_SD_STALE)) { 557135173Sle g_io_deliver(bp, ENXIO); 558135173Sle mtx_lock(&d->bqueue_mtx); 559135173Sle continue; 560135173Sle } 561135173Sle if (bp->bio_offset > s->size) { 562135173Sle g_io_deliver(bp, EINVAL); 563135173Sle mtx_lock(&d->bqueue_mtx); 564135173Sle continue; 565135173Sle } 566135173Sle 567135173Sle cbp = g_clone_bio(bp); 568135173Sle if (cbp == NULL) { 569135173Sle g_io_deliver(bp, ENOMEM); 570135173Sle mtx_lock(&d->bqueue_mtx); 571135173Sle continue; 572135173Sle } 573135173Sle if (cbp->bio_offset + cbp->bio_length > s->size) 574135173Sle cbp->bio_length = s->size - 575135173Sle cbp->bio_offset; 576135173Sle cbp->bio_done = gv_drive_done; 577135173Sle cbp->bio_offset += s->drive_offset; 578135173Sle g_io_request(cbp, LIST_FIRST(&gp->consumer)); 579130389Sle } 580130389Sle 581135173Sle mtx_lock(&d->bqueue_mtx); 582130389Sle } 583135173Sle 584154075Sle while ((bp = bioq_takefirst(d->bqueue)) != NULL) { 585135173Sle mtx_unlock(&d->bqueue_mtx); 586135173Sle if (bp->bio_cflags & GV_BIO_DONE) 587135173Sle g_std_done(bp); 588135173Sle else 589135173Sle g_io_deliver(bp, ENXIO); 590135173Sle mtx_lock(&d->bqueue_mtx); 591135173Sle } 592135173Sle mtx_unlock(&d->bqueue_mtx); 593135173Sle d->flags |= GV_DRIVE_THREAD_DEAD; 594135173Sle 595172836Sjulian kproc_exit(ENXIO); 596130389Sle} 597130389Sle 598135173Sle 599130389Slestatic void 600130389Slegv_drive_orphan(struct g_consumer *cp) 601130389Sle{ 602130389Sle struct g_geom *gp; 603130597Sle struct gv_drive *d; 604130389Sle 605130389Sle g_topology_assert(); 606130389Sle gp = cp->geom; 607130389Sle g_trace(G_T_TOPOLOGY, "gv_drive_orphan(%s)", gp->name); 608130597Sle d = gp->softc; 609130697Sle if (d != NULL) { 610135162Sle gv_set_drive_state(d, GV_DRIVE_DOWN, 611135162Sle GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG); 612146325Sle g_post_event(gv_drive_dead, d, M_WAITOK, d, NULL); 613146325Sle } else 614146325Sle g_wither_geom(gp, ENXIO); 615130389Sle} 616130389Sle 617130389Slestatic struct g_geom * 618130389Slegv_drive_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 619130389Sle{ 620130389Sle struct g_geom *gp, *gp2; 621130389Sle struct g_consumer *cp; 622130389Sle struct gv_drive *d; 623130389Sle struct gv_sd *s; 624130389Sle struct gv_softc *sc; 625130389Sle struct gv_freelist *fl; 626130389Sle struct gv_hdr *vhdr; 627130389Sle int error; 628152773Sle char *buf, errstr[ERRBUFSIZ]; 629130389Sle 630130389Sle vhdr = NULL; 631130389Sle d = NULL; 632130389Sle 633130389Sle g_trace(G_T_TOPOLOGY, "gv_drive_taste(%s, %s)", mp->name, pp->name); 634130389Sle g_topology_assert(); 635130389Sle 636130389Sle /* Find the VINUM class and its associated geom. */ 637130389Sle gp2 = find_vinum_geom(); 638130389Sle if (gp2 == NULL) 639130389Sle return (NULL); 640130389Sle sc = gp2->softc; 641130389Sle 642130389Sle gp = g_new_geomf(mp, "%s.vinumdrive", pp->name); 643133983Sle gp->start = gv_drive_start; 644133983Sle gp->orphan = gv_drive_orphan; 645133983Sle gp->access = gv_drive_access; 646133983Sle gp->start = gv_drive_start; 647130389Sle 648130389Sle cp = g_new_consumer(gp); 649130389Sle g_attach(cp, pp); 650130389Sle error = g_access(cp, 1, 0, 0); 651130389Sle if (error) { 652130389Sle g_detach(cp); 653130389Sle g_destroy_consumer(cp); 654130389Sle g_destroy_geom(gp); 655130389Sle return (NULL); 656130389Sle } 657130389Sle 658130389Sle g_topology_unlock(); 659130389Sle 660130389Sle /* Now check if the provided slice is a valid vinum drive. */ 661130389Sle do { 662183514Slulf vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO); 663183514Slulf error = gv_read_header(cp, vhdr); 664183514Slulf if (error) { 665130389Sle g_free(vhdr); 666130389Sle break; 667130389Sle } 668130389Sle 669152773Sle /* A valid vinum drive, let's parse the on-disk information. */ 670152971Ssobomax buf = g_read_data(cp, GV_CFG_OFFSET, GV_CFG_LEN, NULL); 671152971Ssobomax if (buf == NULL) { 672152773Sle g_free(vhdr); 673152773Sle break; 674152773Sle } 675149501Sle g_topology_lock(); 676152773Sle gv_parse_config(sc, buf, 1); 677152773Sle g_free(buf); 678149501Sle 679130389Sle /* 680152773Sle * Let's see if this drive is already known in the 681152773Sle * configuration. 682130389Sle */ 683130389Sle d = gv_find_drive(sc, vhdr->label.name); 684130389Sle 685130389Sle /* We already know about this drive. */ 686130389Sle if (d != NULL) { 687133983Sle /* Check if this drive already has a geom. */ 688133983Sle if (d->geom != NULL) { 689133983Sle g_topology_unlock(); 690177345Slulf g_free(vhdr); 691133983Sle break; 692133983Sle } 693130389Sle bcopy(vhdr, d->hdr, sizeof(*vhdr)); 694177345Slulf g_free(vhdr); 695130389Sle 696130389Sle /* This is a new drive. */ 697130389Sle } else { 698130389Sle d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO); 699130389Sle 700130389Sle /* Initialize all needed variables. */ 701130389Sle d->size = pp->mediasize - GV_DATA_START; 702130389Sle d->avail = d->size; 703130389Sle d->hdr = vhdr; 704130389Sle strncpy(d->name, vhdr->label.name, GV_MAXDRIVENAME); 705130389Sle LIST_INIT(&d->subdisks); 706130389Sle LIST_INIT(&d->freelist); 707130389Sle 708130389Sle /* We also need a freelist entry. */ 709130389Sle fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO); 710130389Sle fl->offset = GV_DATA_START; 711130389Sle fl->size = d->avail; 712130389Sle LIST_INSERT_HEAD(&d->freelist, fl, freelist); 713130389Sle d->freelist_entries = 1; 714130389Sle 715140475Sle /* Save it into the main configuration. */ 716140475Sle LIST_INSERT_HEAD(&sc->drives, d, drive); 717140475Sle } 718140475Sle 719140475Sle /* 720154075Sle * Create bio queue, queue mutex and a worker thread, if 721154075Sle * necessary. 722140475Sle */ 723154075Sle if (d->bqueue == NULL) { 724154075Sle d->bqueue = g_malloc(sizeof(struct bio_queue_head), 725154075Sle M_WAITOK | M_ZERO); 726154075Sle bioq_init(d->bqueue); 727154075Sle } 728140475Sle if (mtx_initialized(&d->bqueue_mtx) == 0) 729135173Sle mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF); 730140475Sle 731140475Sle if (!(d->flags & GV_DRIVE_THREAD_ACTIVE)) { 732172836Sjulian kproc_create(gv_drive_worker, d, NULL, 0, 0, 733135173Sle "gv_d %s", d->name); 734135173Sle d->flags |= GV_DRIVE_THREAD_ACTIVE; 735130389Sle } 736130389Sle 737133983Sle g_access(cp, -1, 0, 0); 738132617Sle 739130389Sle gp->softc = d; 740130389Sle d->geom = gp; 741135173Sle d->vinumconf = sc; 742130389Sle strncpy(d->device, pp->name, GV_MAXDRIVENAME); 743130389Sle 744130389Sle /* 745130389Sle * Find out which subdisks belong to this drive and crosslink 746130389Sle * them. 747130389Sle */ 748130389Sle LIST_FOREACH(s, &sc->subdisks, sd) { 749130389Sle if (!strncmp(s->drive, d->name, GV_MAXDRIVENAME)) 750130389Sle /* XXX: errors ignored */ 751130389Sle gv_sd_to_drive(sc, d, s, errstr, 752130389Sle sizeof(errstr)); 753130389Sle } 754130389Sle 755130389Sle /* This drive is now up for sure. */ 756130389Sle gv_set_drive_state(d, GV_DRIVE_UP, 0); 757130389Sle 758130389Sle /* 759130389Sle * If there are subdisks on this drive, we need to create 760130389Sle * providers for them. 761130389Sle */ 762130389Sle if (d->sdcount) 763130389Sle gv_drive_modify(d); 764130389Sle 765130389Sle return (gp); 766130389Sle 767130389Sle } while (0); 768130389Sle 769130389Sle g_topology_lock(); 770130389Sle g_access(cp, -1, 0, 0); 771130389Sle 772130389Sle g_detach(cp); 773130389Sle g_destroy_consumer(cp); 774130389Sle g_destroy_geom(gp); 775130389Sle return (NULL); 776130389Sle} 777130389Sle 778130389Sle/* 779130389Sle * Modify the providers for the given drive 'd'. It is assumed that the 780130389Sle * subdisk list of 'd' is already correctly set up. 781130389Sle */ 782130389Slevoid 783130389Slegv_drive_modify(struct gv_drive *d) 784130389Sle{ 785130389Sle struct g_geom *gp; 786130389Sle struct g_consumer *cp; 787130389Sle struct g_provider *pp, *pp2; 788130389Sle struct gv_sd *s; 789130389Sle 790130389Sle KASSERT(d != NULL, ("gv_drive_modify: null d")); 791130389Sle gp = d->geom; 792130389Sle KASSERT(gp != NULL, ("gv_drive_modify: null gp")); 793130389Sle cp = LIST_FIRST(&gp->consumer); 794130389Sle KASSERT(cp != NULL, ("gv_drive_modify: null cp")); 795130389Sle pp = cp->provider; 796130389Sle KASSERT(pp != NULL, ("gv_drive_modify: null pp")); 797130389Sle 798130389Sle g_topology_assert(); 799130389Sle 800130389Sle LIST_FOREACH(s, &d->subdisks, from_drive) { 801130389Sle /* This subdisk already has a provider. */ 802130389Sle if (s->provider != NULL) 803130389Sle continue; 804130389Sle pp2 = g_new_providerf(gp, "gvinum/sd/%s", s->name); 805130389Sle pp2->mediasize = s->size; 806130389Sle pp2->sectorsize = pp->sectorsize; 807130389Sle g_error_provider(pp2, 0); 808130389Sle s->provider = pp2; 809130389Sle pp2->private = s; 810130389Sle } 811130389Sle} 812130389Sle 813146325Slestatic void 814146325Slegv_drive_dead(void *arg, int flag) 815146325Sle{ 816146325Sle struct g_geom *gp; 817146325Sle struct g_consumer *cp; 818146325Sle struct gv_drive *d; 819146325Sle struct gv_sd *s; 820146325Sle 821146325Sle g_topology_assert(); 822146325Sle KASSERT(arg != NULL, ("gv_drive_dead: NULL arg")); 823146325Sle 824146325Sle if (flag == EV_CANCEL) 825146325Sle return; 826146325Sle 827146325Sle d = arg; 828146325Sle if (d->state != GV_DRIVE_DOWN) 829146325Sle return; 830146325Sle 831146325Sle g_trace(G_T_TOPOLOGY, "gv_drive_dead(%s)", d->name); 832146325Sle 833146325Sle gp = d->geom; 834146325Sle if (gp == NULL) 835146325Sle return; 836146325Sle 837146325Sle LIST_FOREACH(cp, &gp->consumer, consumer) { 838146325Sle if (cp->nstart != cp->nend) { 839184292Slulf G_VINUM_DEBUG(0, "dead drive '%s' still has " 840184292Slulf "active requests, cannot detach consumer", 841146325Sle d->name); 842146325Sle g_post_event(gv_drive_dead, d, M_WAITOK, d, 843146325Sle NULL); 844146325Sle return; 845146325Sle } 846146325Sle if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0) 847146325Sle g_access(cp, -cp->acr, -cp->acw, -cp->ace); 848146325Sle } 849146325Sle 850184292Slulf G_VINUM_DEBUG(1, "lost drive '%s'", d->name); 851146325Sle d->geom = NULL; 852146325Sle LIST_FOREACH(s, &d->subdisks, from_drive) { 853146325Sle s->provider = NULL; 854146325Sle s->consumer = NULL; 855146325Sle } 856146325Sle gv_kill_drive_thread(d); 857146325Sle gp->softc = NULL; 858146325Sle g_wither_geom(gp, ENXIO); 859146325Sle} 860146325Sle 861130389Slestatic int 862130389Slegv_drive_destroy_geom(struct gctl_req *req, struct g_class *mp, 863130389Sle struct g_geom *gp) 864130389Sle{ 865135173Sle struct gv_drive *d; 866135173Sle 867130389Sle g_trace(G_T_TOPOLOGY, "gv_drive_destroy_geom: %s", gp->name); 868130389Sle g_topology_assert(); 869130389Sle 870135173Sle d = gp->softc; 871135173Sle gv_kill_drive_thread(d); 872135173Sle 873130389Sle g_wither_geom(gp, ENXIO); 874130389Sle return (0); 875130389Sle} 876130389Sle 877130389Sle#define VINUMDRIVE_CLASS_NAME "VINUMDRIVE" 878130389Sle 879130389Slestatic struct g_class g_vinum_drive_class = { 880130389Sle .name = VINUMDRIVE_CLASS_NAME, 881133318Sphk .version = G_VERSION, 882130389Sle .taste = gv_drive_taste, 883130389Sle .destroy_geom = gv_drive_destroy_geom 884130389Sle}; 885130389Sle 886130389SleDECLARE_GEOM_CLASS(g_vinum_drive_class, g_vinum_drive); 887