md_promise.c revision 220209
1/*- 2 * Copyright (c) 2011 Alexander Motin <mav@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/geom/raid/md_promise.c 220209 2011-03-31 16:14:35Z mav $"); 29 30#include <sys/param.h> 31#include <sys/bio.h> 32#include <sys/endian.h> 33#include <sys/kernel.h> 34#include <sys/kobj.h> 35#include <sys/limits.h> 36#include <sys/lock.h> 37#include <sys/malloc.h> 38#include <sys/mutex.h> 39#include <sys/systm.h> 40#include <geom/geom.h> 41#include "geom/raid/g_raid.h" 42#include "g_raid_md_if.h" 43 44static MALLOC_DEFINE(M_MD_PROMISE, "md_promise_data", "GEOM_RAID Promise metadata"); 45 46#define PROMISE_MAX_DISKS 8 47#define PROMISE_MAX_SUBDISKS 2 48#define PROMISE_META_OFFSET 14 49 50struct promise_raid_disk { 51 uint8_t flags; /* Subdisk status. */ 52#define PROMISE_F_VALID 0x01 53#define PROMISE_F_ONLINE 0x02 54#define PROMISE_F_ASSIGNED 0x04 55#define PROMISE_F_SPARE 0x08 56#define PROMISE_F_DUPLICATE 0x10 57#define PROMISE_F_REDIR 0x20 58#define PROMISE_F_DOWN 0x40 59#define PROMISE_F_READY 0x80 60 61 uint8_t number; /* Position in a volume. */ 62 uint8_t channel; /* ATA channel number. */ 63 uint8_t device; /* ATA device number. */ 64 uint64_t id __packed; /* Subdisk ID. */ 65} __packed; 66 67struct promise_raid_conf { 68 char promise_id[24]; 69#define PROMISE_MAGIC "Promise Technology, Inc." 70#define FREEBSD_MAGIC "FreeBSD ATA driver RAID " 71 72 uint32_t dummy_0; 73 uint64_t magic_0; 74#define PROMISE_MAGIC0(x) (((uint64_t)(x.channel) << 48) | \ 75 ((uint64_t)(x.device != 0) << 56)) 76 uint16_t magic_1; 77 uint32_t magic_2; 78 uint8_t filler1[470]; 79 80 uint32_t integrity; 81#define PROMISE_I_VALID 0x00000080 82 83 struct promise_raid_disk disk; /* This subdisk info. */ 84 uint32_t disk_offset; /* Subdisk offset. */ 85 uint32_t disk_sectors; /* Subdisk size */ 86 uint32_t rebuild_lba; /* Rebuild position. */ 87 uint16_t generation; /* Generation number. */ 88 uint8_t status; /* Volume status. */ 89#define PROMISE_S_VALID 0x01 90#define PROMISE_S_ONLINE 0x02 91#define PROMISE_S_INITED 0x04 92#define PROMISE_S_READY 0x08 93#define PROMISE_S_DEGRADED 0x10 94#define PROMISE_S_MARKED 0x20 95#define PROMISE_S_MIGRATING 0x40 96#define PROMISE_S_FUNCTIONAL 0x80 97 98 uint8_t type; /* Voluem type. */ 99#define PROMISE_T_RAID0 0x00 100#define PROMISE_T_RAID1 0x01 101#define PROMISE_T_RAID3 0x02 102#define PROMISE_T_RAID5 0x04 103#define PROMISE_T_SPAN 0x08 104#define PROMISE_T_JBOD 0x10 105 106 uint8_t total_disks; /* Disks in this volume. */ 107 uint8_t stripe_shift; /* Strip size. */ 108 uint8_t array_width; /* Number of RAID0 stripes. */ 109 uint8_t array_number; /* Global volume number. */ 110 uint32_t total_sectors; /* Volume size. */ 111 uint16_t cylinders; /* Volume geometry: C. */ 112 uint8_t heads; /* Volume geometry: H. */ 113 uint8_t sectors; /* Volume geometry: S. */ 114 uint64_t volume_id __packed; /* Volume ID, */ 115 struct promise_raid_disk disks[PROMISE_MAX_DISKS]; 116 /* Subdisks in this volume. */ 117 char name[32]; /* Volume label. */ 118 119 uint32_t filler2[8]; 120 uint32_t magic_3; /* Something related to rebuild. */ 121 uint64_t rebuild_lba64; /* Per-volume rebuild position. */ 122 uint32_t magic_4; 123 uint32_t magic_5; 124 uint32_t filler3[325]; 125 uint32_t checksum; 126} __packed; 127 128struct g_raid_md_promise_perdisk { 129 int pd_updated; 130 int pd_subdisks; 131 struct promise_raid_conf *pd_meta[PROMISE_MAX_SUBDISKS]; 132}; 133 134struct g_raid_md_promise_pervolume { 135 struct promise_raid_conf *pv_meta; 136 uint64_t pv_id; 137 uint16_t pv_generation; 138 int pv_disks_present; 139 int pv_started; 140 struct callout pv_start_co; /* STARTING state timer. */ 141}; 142 143static g_raid_md_create_t g_raid_md_create_promise; 144static g_raid_md_taste_t g_raid_md_taste_promise; 145static g_raid_md_event_t g_raid_md_event_promise; 146static g_raid_md_volume_event_t g_raid_md_volume_event_promise; 147static g_raid_md_ctl_t g_raid_md_ctl_promise; 148static g_raid_md_write_t g_raid_md_write_promise; 149static g_raid_md_fail_disk_t g_raid_md_fail_disk_promise; 150static g_raid_md_free_disk_t g_raid_md_free_disk_promise; 151static g_raid_md_free_volume_t g_raid_md_free_volume_promise; 152static g_raid_md_free_t g_raid_md_free_promise; 153 154static kobj_method_t g_raid_md_promise_methods[] = { 155 KOBJMETHOD(g_raid_md_create, g_raid_md_create_promise), 156 KOBJMETHOD(g_raid_md_taste, g_raid_md_taste_promise), 157 KOBJMETHOD(g_raid_md_event, g_raid_md_event_promise), 158 KOBJMETHOD(g_raid_md_volume_event, g_raid_md_volume_event_promise), 159 KOBJMETHOD(g_raid_md_ctl, g_raid_md_ctl_promise), 160 KOBJMETHOD(g_raid_md_write, g_raid_md_write_promise), 161 KOBJMETHOD(g_raid_md_fail_disk, g_raid_md_fail_disk_promise), 162 KOBJMETHOD(g_raid_md_free_disk, g_raid_md_free_disk_promise), 163 KOBJMETHOD(g_raid_md_free_volume, g_raid_md_free_volume_promise), 164 KOBJMETHOD(g_raid_md_free, g_raid_md_free_promise), 165 { 0, 0 } 166}; 167 168static struct g_raid_md_class g_raid_md_promise_class = { 169 "Promise", 170 g_raid_md_promise_methods, 171 sizeof(struct g_raid_md_object), 172 .mdc_priority = 100 173}; 174 175 176static void 177g_raid_md_promise_print(struct promise_raid_conf *meta) 178{ 179 int i; 180 181 if (g_raid_debug < 1) 182 return; 183 184 printf("********* ATA Promise Metadata *********\n"); 185 printf("promise_id <%.24s>\n", meta->promise_id); 186 printf("disk %02x %02x %02x %02x %016jx\n", 187 meta->disk.flags, meta->disk.number, meta->disk.channel, 188 meta->disk.device, meta->disk.id); 189 printf("disk_offset %u\n", meta->disk_offset); 190 printf("disk_sectors %u\n", meta->disk_sectors); 191 printf("rebuild_lba %u\n", meta->rebuild_lba); 192 printf("generation %u\n", meta->generation); 193 printf("status 0x%02x\n", meta->status); 194 printf("type %u\n", meta->type); 195 printf("total_disks %u\n", meta->total_disks); 196 printf("stripe_shift %u\n", meta->stripe_shift); 197 printf("array_width %u\n", meta->array_width); 198 printf("array_number %u\n", meta->array_number); 199 printf("total_sectors %u\n", meta->total_sectors); 200 printf("cylinders %u\n", meta->cylinders); 201 printf("heads %u\n", meta->heads); 202 printf("sectors %u\n", meta->sectors); 203 printf("volume_id 0x%016jx\n", meta->volume_id); 204 printf("disks:\n"); 205 for (i = 0; i < PROMISE_MAX_DISKS; i++ ) { 206 printf(" %02x %02x %02x %02x %016jx\n", 207 meta->disks[i].flags, meta->disks[i].number, 208 meta->disks[i].channel, meta->disks[i].device, 209 meta->disks[i].id); 210 } 211 printf("name <%.32s>\n", meta->name); 212 printf("magic_3 0x%08x\n", meta->magic_3); 213 printf("rebuild_lba64 %ju\n", meta->rebuild_lba64); 214 printf("magic_4 0x%08x\n", meta->magic_4); 215 printf("magic_5 0x%08x\n", meta->magic_5); 216 printf("=================================================\n"); 217} 218 219static struct promise_raid_conf * 220promise_meta_copy(struct promise_raid_conf *meta) 221{ 222 struct promise_raid_conf *nmeta; 223 224 nmeta = malloc(sizeof(*nmeta), M_MD_PROMISE, M_WAITOK); 225 memcpy(nmeta, meta, sizeof(*nmeta)); 226 return (nmeta); 227} 228 229static int 230promise_meta_find_disk(struct promise_raid_conf *meta, uint64_t id) 231{ 232 int pos; 233 234 for (pos = 0; pos < meta->total_disks; pos++) { 235 if (meta->disks[pos].id == id) 236 return (pos); 237 } 238 return (-1); 239} 240 241static int 242promise_meta_unused_range(struct promise_raid_conf **metaarr, int nsd, 243 uint32_t sectors, uint32_t *off, uint32_t *size) 244{ 245 uint32_t coff, csize; 246 int i, j; 247 248 sectors -= 131072; 249 *off = 0; 250 *size = 0; 251 coff = 0; 252 csize = sectors; 253 i = 0; 254 while (1) { 255 for (j = 0; j < nsd; j++) { 256 if (metaarr[j]->disk_offset >= coff) { 257 csize = MIN(csize, 258 metaarr[j]->disk_offset - coff); 259 } 260 } 261 if (csize > *size) { 262 *off = coff; 263 *size = csize; 264 } 265 if (i >= nsd) 266 break; 267 coff = metaarr[i]->disk_offset + metaarr[i]->disk_sectors; 268 csize = sectors - coff; 269 i++; 270 }; 271 return ((*size > 0) ? 1 : 0); 272} 273 274static int 275promise_meta_translate_disk(struct g_raid_volume *vol, int md_disk_pos) 276{ 277 int disk_pos, width; 278 279 if (md_disk_pos >= 0 && vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) { 280 width = vol->v_disks_count / 2; 281 disk_pos = (md_disk_pos / width) + 282 (md_disk_pos % width) * width; 283 } else 284 disk_pos = md_disk_pos; 285 return (disk_pos); 286} 287 288static void 289promise_meta_get_name(struct promise_raid_conf *meta, char *buf) 290{ 291 int i; 292 293 strncpy(buf, meta->name, 32); 294 buf[32] = 0; 295 for (i = 31; i >= 0; i--) { 296 if (buf[i] > 0x20) 297 break; 298 buf[i] = 0; 299 } 300} 301 302static void 303promise_meta_put_name(struct promise_raid_conf *meta, char *buf) 304{ 305 306 memset(meta->name, 0x20, 32); 307 memcpy(meta->name, buf, MIN(strlen(buf), 32)); 308} 309 310static int 311promise_meta_read(struct g_consumer *cp, struct promise_raid_conf **metaarr) 312{ 313 struct g_provider *pp; 314 struct promise_raid_conf *meta; 315 char *buf; 316 int error, i, subdisks; 317 uint32_t checksum, *ptr; 318 319 pp = cp->provider; 320 subdisks = 0; 321next: 322 /* Read metadata block. */ 323 buf = g_read_data(cp, pp->mediasize - pp->sectorsize * 324 (63 - subdisks * PROMISE_META_OFFSET), 325 pp->sectorsize * 4, &error); 326 if (buf == NULL) { 327 G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).", 328 pp->name, error); 329 return (subdisks); 330 } 331 meta = (struct promise_raid_conf *)buf; 332 333 /* Check if this is an Promise RAID struct */ 334 if (strncmp(meta->promise_id, PROMISE_MAGIC, strlen(PROMISE_MAGIC)) && 335 strncmp(meta->promise_id, FREEBSD_MAGIC, strlen(FREEBSD_MAGIC))) { 336 if (subdisks == 0) 337 G_RAID_DEBUG(1, 338 "Promise signature check failed on %s", pp->name); 339 g_free(buf); 340 return (subdisks); 341 } 342 meta = malloc(sizeof(*meta), M_MD_PROMISE, M_WAITOK); 343 memcpy(meta, buf, MIN(sizeof(*meta), pp->sectorsize * 4)); 344 g_free(buf); 345 346 /* Check metadata checksum. */ 347 for (checksum = 0, ptr = (uint32_t *)meta, i = 0; i < 511; i++) 348 checksum += *ptr++; 349 if (checksum != meta->checksum) { 350 G_RAID_DEBUG(1, "Promise checksum check failed on %s", pp->name); 351 free(meta, M_MD_PROMISE); 352 return (subdisks); 353 } 354 355 if ((meta->integrity & PROMISE_I_VALID) == 0) { 356 G_RAID_DEBUG(1, "Promise metadata is invalid on %s", pp->name); 357 free(meta, M_MD_PROMISE); 358 return (subdisks); 359 } 360 361 if (meta->total_disks > PROMISE_MAX_DISKS) { 362 G_RAID_DEBUG(1, "Wrong number of disks on %s (%d)", 363 pp->name, meta->total_disks); 364 free(meta, M_MD_PROMISE); 365 return (subdisks); 366 } 367 368 /* Save this part and look for next. */ 369 *metaarr = meta; 370 metaarr++; 371 subdisks++; 372 if (subdisks < PROMISE_MAX_SUBDISKS) 373 goto next; 374 375 return (subdisks); 376} 377 378static int 379promise_meta_write(struct g_consumer *cp, 380 struct promise_raid_conf **metaarr, int nsd) 381{ 382 struct g_provider *pp; 383 struct promise_raid_conf *meta; 384 char *buf; 385 int error, i, subdisk, fake; 386 uint32_t checksum, *ptr, off, size; 387 388 pp = cp->provider; 389 subdisk = 0; 390 fake = 0; 391next: 392 buf = malloc(pp->sectorsize * 4, M_MD_PROMISE, M_WAITOK | M_ZERO); 393 meta = NULL; 394 if (subdisk < nsd) { 395 meta = metaarr[subdisk]; 396 } else if (!fake && promise_meta_unused_range(metaarr, nsd, 397 cp->provider->mediasize / cp->provider->sectorsize, 398 &off, &size)) { 399 /* Optionally add record for unused space. */ 400 meta = (struct promise_raid_conf *)buf; 401 memcpy(&meta->promise_id[0], PROMISE_MAGIC, 402 sizeof(PROMISE_MAGIC) - 1); 403 meta->dummy_0 = 0x00020000; 404 meta->integrity = PROMISE_I_VALID; 405 meta->disk.flags = PROMISE_F_ONLINE | PROMISE_F_VALID; 406 meta->disk.number = 0xff; 407 arc4rand(&meta->disk.id, sizeof(meta->disk.id), 0); 408 meta->disk_offset = off; 409 meta->disk_sectors = size; 410 meta->rebuild_lba = UINT32_MAX; 411 fake = 1; 412 } 413 if (meta != NULL) { 414 /* Recalculate checksum for case if metadata were changed. */ 415 meta->checksum = 0; 416 for (checksum = 0, ptr = (uint32_t *)meta, i = 0; i < 511; i++) 417 checksum += *ptr++; 418 meta->checksum = checksum; 419 memcpy(buf, meta, MIN(pp->sectorsize * 4, sizeof(*meta))); 420 } 421 error = g_write_data(cp, pp->mediasize - pp->sectorsize * 422 (63 - subdisk * PROMISE_META_OFFSET), 423 buf, pp->sectorsize * 4); 424 if (error != 0) { 425 G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).", 426 pp->name, error); 427 } 428 free(buf, M_MD_PROMISE); 429 430 subdisk++; 431 if (subdisk < PROMISE_MAX_SUBDISKS) 432 goto next; 433 434 return (error); 435} 436 437static int 438promise_meta_erase(struct g_consumer *cp) 439{ 440 struct g_provider *pp; 441 char *buf; 442 int error, subdisk; 443 444 pp = cp->provider; 445 buf = malloc(4 * pp->sectorsize, M_MD_PROMISE, M_WAITOK | M_ZERO); 446 for (subdisk = 0; subdisk < PROMISE_MAX_SUBDISKS; subdisk++) { 447 error = g_write_data(cp, pp->mediasize - pp->sectorsize * 448 (63 - subdisk * PROMISE_META_OFFSET), 449 buf, 4 * pp->sectorsize); 450 if (error != 0) { 451 G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).", 452 pp->name, error); 453 } 454 } 455 free(buf, M_MD_PROMISE); 456 return (error); 457} 458 459static int 460promise_meta_write_spare(struct g_consumer *cp) 461{ 462 struct promise_raid_conf *meta; 463 int error; 464 465 meta = malloc(sizeof(*meta), M_MD_PROMISE, M_WAITOK | M_ZERO); 466 memcpy(&meta->promise_id[0], PROMISE_MAGIC, sizeof(PROMISE_MAGIC) - 1); 467 meta->dummy_0 = 0x00020000; 468 meta->integrity = PROMISE_I_VALID; 469 meta->disk.flags = PROMISE_F_SPARE | PROMISE_F_ONLINE | PROMISE_F_VALID; 470 meta->disk.number = 0xff; 471 arc4rand(&meta->disk.id, sizeof(meta->disk.id), 0); 472 meta->disk_sectors = cp->provider->mediasize / cp->provider->sectorsize; 473 meta->disk_sectors -= 131072; 474 meta->rebuild_lba = UINT32_MAX; 475 error = promise_meta_write(cp, &meta, 1); 476 free(meta, M_MD_PROMISE); 477 return (error); 478} 479 480static struct g_raid_volume * 481g_raid_md_promise_get_volume(struct g_raid_softc *sc, uint64_t id) 482{ 483 struct g_raid_volume *vol; 484 struct g_raid_md_promise_pervolume *pv; 485 486 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 487 pv = vol->v_md_data; 488 if (pv->pv_id == id) 489 break; 490 } 491 return (vol); 492} 493 494static int 495g_raid_md_promise_purge_volumes(struct g_raid_softc *sc) 496{ 497 struct g_raid_volume *vol, *tvol; 498 struct g_raid_md_promise_pervolume *pv; 499 int i, res; 500 501 res = 0; 502 TAILQ_FOREACH_SAFE(vol, &sc->sc_volumes, v_next, tvol) { 503 pv = vol->v_md_data; 504 if (!pv->pv_started || vol->v_stopping) 505 continue; 506 for (i = 0; i < vol->v_disks_count; i++) { 507 if (vol->v_subdisks[i].sd_state != G_RAID_SUBDISK_S_NONE) 508 break; 509 } 510 if (i >= vol->v_disks_count) { 511 g_raid_destroy_volume(vol); 512 res = 1; 513 } 514 } 515 return (res); 516} 517 518static int 519g_raid_md_promise_purge_disks(struct g_raid_softc *sc) 520{ 521 struct g_raid_disk *disk, *tdisk; 522 struct g_raid_volume *vol; 523 struct g_raid_md_promise_perdisk *pd; 524 int i, j, res; 525 526 res = 0; 527 TAILQ_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) { 528 if (disk->d_state == G_RAID_DISK_S_SPARE) 529 continue; 530 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 531 532 /* Scan for deleted volumes. */ 533 for (i = 0; i < pd->pd_subdisks; ) { 534 vol = g_raid_md_promise_get_volume(sc, 535 pd->pd_meta[i]->volume_id); 536 if (vol != NULL && !vol->v_stopping) { 537 i++; 538 continue; 539 } 540 free(pd->pd_meta[i], M_MD_PROMISE); 541 for (j = i; j < pd->pd_subdisks - 1; j++) 542 pd->pd_meta[j] = pd->pd_meta[j + 1]; 543 pd->pd_meta[PROMISE_MAX_SUBDISKS - 1] = NULL; 544 pd->pd_subdisks--; 545 pd->pd_updated = 1; 546 } 547 548 /* If there is no metadata left - erase and delete disk. */ 549 if (pd->pd_subdisks == 0) { 550 promise_meta_erase(disk->d_consumer); 551 g_raid_destroy_disk(disk); 552 res = 1; 553 } 554 } 555 return (res); 556} 557 558static int 559g_raid_md_promise_supported(int level, int qual, int disks, int force) 560{ 561 562 if (disks > PROMISE_MAX_DISKS) 563 return (0); 564 switch (level) { 565 case G_RAID_VOLUME_RL_RAID0: 566 if (disks < 1) 567 return (0); 568 if (!force && disks < 2) 569 return (0); 570 break; 571 case G_RAID_VOLUME_RL_RAID1: 572 if (disks < 1) 573 return (0); 574 if (!force && (disks != 2)) 575 return (0); 576 break; 577 case G_RAID_VOLUME_RL_RAID1E: 578 if (disks < 2) 579 return (0); 580 if (disks % 2 != 0) 581 return (0); 582 if (!force && (disks != 4)) 583 return (0); 584 break; 585 case G_RAID_VOLUME_RL_SINGLE: 586 if (disks != 1) 587 return (0); 588 break; 589 case G_RAID_VOLUME_RL_CONCAT: 590 if (disks < 2) 591 return (0); 592 break; 593 case G_RAID_VOLUME_RL_RAID5: 594 if (disks < 3) 595 return (0); 596 break; 597 default: 598 return (0); 599 } 600 if (qual != G_RAID_VOLUME_RLQ_NONE) 601 return (0); 602 return (1); 603} 604 605static int 606g_raid_md_promise_start_disk(struct g_raid_disk *disk, int sdn, 607 struct g_raid_volume *vol) 608{ 609 struct g_raid_softc *sc; 610 struct g_raid_subdisk *sd; 611 struct g_raid_md_promise_perdisk *pd; 612 struct g_raid_md_promise_pervolume *pv; 613 struct promise_raid_conf *meta; 614 off_t size; 615 int disk_pos, md_disk_pos, i, resurrection = 0; 616 uint32_t eoff, esize; 617 618 sc = disk->d_softc; 619 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 620 621 pv = vol->v_md_data; 622 meta = pv->pv_meta; 623 624 if (sdn >= 0) { 625 /* Find disk position in metadata by it's serial. */ 626 md_disk_pos = promise_meta_find_disk(meta, pd->pd_meta[sdn]->disk.id); 627 /* For RAID0+1 we need to translate order. */ 628 disk_pos = promise_meta_translate_disk(vol, md_disk_pos); 629 } else { 630 md_disk_pos = -1; 631 disk_pos = -1; 632 } 633 if (disk_pos < 0) { 634 G_RAID_DEBUG1(1, sc, "Disk %s is not part of the volume %s", 635 g_raid_get_diskname(disk), vol->v_name); 636 /* Failed stale disk is useless for us. */ 637 if (sdn >= 0 && 638 pd->pd_meta[sdn]->disk.flags & PROMISE_F_DOWN) { 639 g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE_FAILED); 640 return (0); 641 } 642 /* If we were given specific metadata subdisk - erase it. */ 643 if (sdn >= 0) { 644 free(pd->pd_meta[sdn], M_MD_PROMISE); 645 for (i = sdn; i < pd->pd_subdisks - 1; i++) 646 pd->pd_meta[i] = pd->pd_meta[i + 1]; 647 pd->pd_meta[PROMISE_MAX_SUBDISKS - 1] = NULL; 648 pd->pd_subdisks--; 649 } 650 /* If we are in the start process, that's all for now. */ 651 if (!pv->pv_started) 652 goto nofit; 653 /* 654 * If we have already started - try to get use of the disk. 655 * Try to replace OFFLINE disks first, then FAILED. 656 */ 657 promise_meta_unused_range(pd->pd_meta, pd->pd_subdisks, 658 disk->d_consumer->provider->mediasize / 659 disk->d_consumer->provider->sectorsize, 660 &eoff, &esize); 661 if (esize == 0) { 662 G_RAID_DEBUG1(1, sc, "No free space on disk %s", 663 g_raid_get_diskname(disk)); 664 goto nofit; 665 } 666 size = INT64_MAX; 667 for (i = 0; i < vol->v_disks_count; i++) { 668 sd = &vol->v_subdisks[i]; 669 if (sd->sd_state != G_RAID_SUBDISK_S_NONE) 670 size = sd->sd_size; 671 if (sd->sd_state <= G_RAID_SUBDISK_S_FAILED && 672 (disk_pos < 0 || 673 vol->v_subdisks[i].sd_state < sd->sd_state)) 674 disk_pos = i; 675 } 676 if (disk_pos >= 0 && 677 vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT && 678 (off_t)esize * 512 < size) { 679 G_RAID_DEBUG1(1, sc, "Disk %s free space " 680 "is too small (%ju < %ju)", 681 g_raid_get_diskname(disk), 682 (off_t)esize * 512, size); 683 disk_pos = -1; 684 } 685 if (disk_pos >= 0) { 686 if (vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT) 687 esize = size / 512; 688 /* For RAID0+1 we need to translate order. */ 689 md_disk_pos = promise_meta_translate_disk(vol, disk_pos); 690 } else { 691nofit: 692 if (pd->pd_subdisks == 0) { 693 g_raid_change_disk_state(disk, 694 G_RAID_DISK_S_SPARE); 695 } 696 return (0); 697 } 698 G_RAID_DEBUG1(1, sc, "Disk %s takes pos %d in the volume %s", 699 g_raid_get_diskname(disk), disk_pos, vol->v_name); 700 resurrection = 1; 701 } 702 703 sd = &vol->v_subdisks[disk_pos]; 704 705 if (resurrection && sd->sd_disk != NULL) { 706 g_raid_change_disk_state(sd->sd_disk, 707 G_RAID_DISK_S_STALE_FAILED); 708 TAILQ_REMOVE(&sd->sd_disk->d_subdisks, 709 sd, sd_next); 710 } 711 vol->v_subdisks[disk_pos].sd_disk = disk; 712 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next); 713 714 /* Welcome the new disk. */ 715 if (resurrection) 716 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE); 717 else if (meta->disks[md_disk_pos].flags & PROMISE_F_DOWN) 718 g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED); 719 else 720 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE); 721 722 if (resurrection) { 723 sd->sd_offset = (off_t)eoff * 512; 724 sd->sd_size = (off_t)esize * 512; 725 } else { 726 sd->sd_offset = (off_t)pd->pd_meta[sdn]->disk_offset * 512; 727 sd->sd_size = (off_t)pd->pd_meta[sdn]->disk_sectors * 512; 728 } 729 730 if (resurrection) { 731 /* Stale disk, almost same as new. */ 732 g_raid_change_subdisk_state(sd, 733 G_RAID_SUBDISK_S_NEW); 734 } else if (meta->disks[md_disk_pos].flags & PROMISE_F_DOWN) { 735 /* Failed disk. */ 736 g_raid_change_subdisk_state(sd, 737 G_RAID_SUBDISK_S_FAILED); 738 } else if (meta->disks[md_disk_pos].flags & PROMISE_F_REDIR) { 739 /* Rebuilding disk. */ 740 g_raid_change_subdisk_state(sd, 741 G_RAID_SUBDISK_S_REBUILD); 742 if (pd->pd_meta[sdn]->generation != meta->generation) 743 sd->sd_rebuild_pos = 0; 744 else { 745 sd->sd_rebuild_pos = 746 (off_t)pd->pd_meta[sdn]->rebuild_lba * 512; 747 } 748 } else if (!(meta->disks[md_disk_pos].flags & PROMISE_F_ONLINE)) { 749 /* Rebuilding disk. */ 750 g_raid_change_subdisk_state(sd, 751 G_RAID_SUBDISK_S_NEW); 752 } else if (pd->pd_meta[sdn]->generation != meta->generation || 753 (meta->status & PROMISE_S_MARKED)) { 754 /* Stale disk or dirty volume (unclean shutdown). */ 755 g_raid_change_subdisk_state(sd, 756 G_RAID_SUBDISK_S_STALE); 757 } else { 758 /* Up to date disk. */ 759 g_raid_change_subdisk_state(sd, 760 G_RAID_SUBDISK_S_ACTIVE); 761 } 762 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW, 763 G_RAID_EVENT_SUBDISK); 764 765 return (resurrection); 766} 767 768static void 769g_raid_md_promise_refill(struct g_raid_softc *sc) 770{ 771 struct g_raid_volume *vol; 772 struct g_raid_subdisk *sd; 773 struct g_raid_disk *disk; 774 struct g_raid_md_object *md; 775 struct g_raid_md_promise_perdisk *pd; 776 struct g_raid_md_promise_pervolume *pv; 777 int update, updated, i, bad; 778 779 md = sc->sc_md; 780restart: 781 updated = 0; 782 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 783 pv = vol->v_md_data; 784 if (!pv->pv_started || vol->v_stopping) 785 continue; 786 787 /* Search for subdisk that needs replacement. */ 788 bad = 0; 789 for (i = 0; i < vol->v_disks_count; i++) { 790 sd = &vol->v_subdisks[i]; 791 if (sd->sd_state == G_RAID_SUBDISK_S_NONE || 792 sd->sd_state == G_RAID_SUBDISK_S_FAILED) 793 bad = 1; 794 } 795 if (!bad) 796 continue; 797 798 G_RAID_DEBUG1(1, sc, "Volume %s is not complete, " 799 "trying to refill.", vol->v_name); 800 801 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 802 /* Skip failed. */ 803 if (disk->d_state < G_RAID_DISK_S_SPARE) 804 continue; 805 /* Skip already used by this volume. */ 806 for (i = 0; i < vol->v_disks_count; i++) { 807 sd = &vol->v_subdisks[i]; 808 if (sd->sd_disk == disk) 809 break; 810 } 811 if (i < vol->v_disks_count) 812 continue; 813 814 /* Try to use disk if it has empty extents. */ 815 pd = disk->d_md_data; 816 if (pd->pd_subdisks < PROMISE_MAX_SUBDISKS) { 817 update = 818 g_raid_md_promise_start_disk(disk, -1, vol); 819 } else 820 update = 0; 821 if (update) { 822 updated = 1; 823 g_raid_md_write_promise(md, vol, NULL, disk); 824 break; 825 } 826 } 827 } 828 if (updated) 829 goto restart; 830} 831 832static void 833g_raid_md_promise_start(struct g_raid_volume *vol) 834{ 835 struct g_raid_softc *sc; 836 struct g_raid_subdisk *sd; 837 struct g_raid_disk *disk; 838 struct g_raid_md_object *md; 839 struct g_raid_md_promise_perdisk *pd; 840 struct g_raid_md_promise_pervolume *pv; 841 struct promise_raid_conf *meta; 842 int i; 843 844 sc = vol->v_softc; 845 md = sc->sc_md; 846 pv = vol->v_md_data; 847 meta = pv->pv_meta; 848 849 if (meta->type == PROMISE_T_RAID0) 850 vol->v_raid_level = G_RAID_VOLUME_RL_RAID0; 851 else if (meta->type == PROMISE_T_RAID1) { 852 if (meta->array_width == 1) 853 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1; 854 else 855 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E; 856 } else if (meta->type == PROMISE_T_RAID3) 857 vol->v_raid_level = G_RAID_VOLUME_RL_RAID3; 858 else if (meta->type == PROMISE_T_RAID5) 859 vol->v_raid_level = G_RAID_VOLUME_RL_RAID5; 860 else if (meta->type == PROMISE_T_SPAN) 861 vol->v_raid_level = G_RAID_VOLUME_RL_CONCAT; 862 else if (meta->type == PROMISE_T_JBOD) 863 vol->v_raid_level = G_RAID_VOLUME_RL_SINGLE; 864 else 865 vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN; 866 vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE; 867 vol->v_strip_size = 512 << meta->stripe_shift; //ZZZ 868 vol->v_disks_count = meta->total_disks; 869 vol->v_mediasize = (off_t)meta->total_sectors * 512; //ZZZ 870 vol->v_sectorsize = 512; //ZZZ 871 for (i = 0; i < vol->v_disks_count; i++) { 872 sd = &vol->v_subdisks[i]; 873 sd->sd_offset = (off_t)meta->disk_offset * 512; //ZZZ 874 sd->sd_size = (off_t)meta->disk_sectors * 512; //ZZZ 875 } 876 g_raid_start_volume(vol); 877 878 /* Make all disks found till the moment take their places. */ 879 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 880 pd = disk->d_md_data; 881 for (i = 0; i < pd->pd_subdisks; i++) { 882 if (pd->pd_meta[i]->volume_id == meta->volume_id) 883 g_raid_md_promise_start_disk(disk, i, vol); 884 } 885 } 886 887 pv->pv_started = 1; 888 callout_stop(&pv->pv_start_co); 889 G_RAID_DEBUG1(0, sc, "Volume started."); 890 g_raid_md_write_promise(md, vol, NULL, NULL); 891 892 /* Pickup any STALE/SPARE disks to refill array if needed. */ 893 g_raid_md_promise_refill(sc); 894 895 g_raid_event_send(vol, G_RAID_VOLUME_E_START, G_RAID_EVENT_VOLUME); 896} 897 898static void 899g_raid_promise_go(void *arg) 900{ 901 struct g_raid_volume *vol; 902 struct g_raid_softc *sc; 903 struct g_raid_md_promise_pervolume *pv; 904 905 vol = arg; 906 pv = vol->v_md_data; 907 sc = vol->v_softc; 908 if (!pv->pv_started) { 909 G_RAID_DEBUG1(0, sc, "Force volume start due to timeout."); 910 g_raid_event_send(vol, G_RAID_VOLUME_E_STARTMD, 911 G_RAID_EVENT_VOLUME); 912 } 913} 914 915static void 916g_raid_md_promise_new_disk(struct g_raid_disk *disk) 917{ 918 struct g_raid_softc *sc; 919 struct g_raid_md_object *md; 920 struct promise_raid_conf *pdmeta; 921 struct g_raid_md_promise_perdisk *pd; 922 struct g_raid_md_promise_pervolume *pv; 923 struct g_raid_volume *vol; 924 int i; 925 char buf[33]; 926 927 sc = disk->d_softc; 928 md = sc->sc_md; 929 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 930 931 if (pd->pd_subdisks == 0) { 932 g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE); 933 g_raid_md_promise_refill(sc); 934 return; 935 } 936 937 for (i = 0; i < pd->pd_subdisks; i++) { 938 pdmeta = pd->pd_meta[i]; 939 940 /* Look for volume with matching ID. */ 941 vol = g_raid_md_promise_get_volume(sc, pdmeta->volume_id); 942 if (vol == NULL) { 943 promise_meta_get_name(pdmeta, buf); 944 vol = g_raid_create_volume(sc, buf, pdmeta->array_number); 945 pv = malloc(sizeof(*pv), M_MD_PROMISE, M_WAITOK | M_ZERO); 946 pv->pv_id = pdmeta->volume_id; 947 vol->v_md_data = pv; 948 callout_init(&pv->pv_start_co, 1); 949 callout_reset(&pv->pv_start_co, 950 g_raid_start_timeout * hz, 951 g_raid_promise_go, vol); 952 } else 953 pv = vol->v_md_data; 954 955 /* If we haven't started yet - check metadata freshness. */ 956 if (pv->pv_meta == NULL || !pv->pv_started) { 957 if (pv->pv_meta == NULL || 958 ((int16_t)(pdmeta->generation - pv->pv_generation)) > 0) { 959 G_RAID_DEBUG1(1, sc, "Newer disk"); 960 if (pv->pv_meta != NULL) 961 free(pv->pv_meta, M_MD_PROMISE); 962 pv->pv_meta = promise_meta_copy(pdmeta); 963 pv->pv_generation = pv->pv_meta->generation; 964 pv->pv_disks_present = 1; 965 } else if (pdmeta->generation == pv->pv_generation) { 966 pv->pv_disks_present++; 967 G_RAID_DEBUG1(1, sc, "Matching disk (%d of %d up)", 968 pv->pv_disks_present, 969 pv->pv_meta->total_disks); 970 } else { 971 G_RAID_DEBUG1(1, sc, "Older disk"); 972 } 973 } 974 } 975 976 for (i = 0; i < pd->pd_subdisks; i++) { 977 pdmeta = pd->pd_meta[i]; 978 979 /* Look for volume with matching ID. */ 980 vol = g_raid_md_promise_get_volume(sc, pdmeta->volume_id); 981 if (vol == NULL) 982 continue; 983 pv = vol->v_md_data; 984 985 if (pv->pv_started) { 986 if (g_raid_md_promise_start_disk(disk, i, vol)) 987 g_raid_md_write_promise(md, vol, NULL, NULL); 988 } else { 989 /* If we collected all needed disks - start array. */ 990 if (pv->pv_disks_present == pv->pv_meta->total_disks) 991 g_raid_md_promise_start(vol); 992 } 993 } 994} 995 996static int 997g_raid_md_create_promise(struct g_raid_md_object *md, struct g_class *mp, 998 struct g_geom **gp) 999{ 1000 struct g_geom *geom; 1001 struct g_raid_softc *sc; 1002 1003 /* Search for existing node. */ 1004 LIST_FOREACH(geom, &mp->geom, geom) { 1005 sc = geom->softc; 1006 if (sc == NULL) 1007 continue; 1008 if (sc->sc_stopping != 0) 1009 continue; 1010 if (sc->sc_md->mdo_class != md->mdo_class) 1011 continue; 1012 break; 1013 } 1014 if (geom != NULL) { 1015 *gp = geom; 1016 return (G_RAID_MD_TASTE_EXISTING); 1017 } 1018 1019 /* Create new one if not found. */ 1020 sc = g_raid_create_node(mp, "Promise", md); 1021 if (sc == NULL) 1022 return (G_RAID_MD_TASTE_FAIL); 1023 md->mdo_softc = sc; 1024 *gp = sc->sc_geom; 1025 return (G_RAID_MD_TASTE_NEW); 1026} 1027 1028static int 1029g_raid_md_taste_promise(struct g_raid_md_object *md, struct g_class *mp, 1030 struct g_consumer *cp, struct g_geom **gp) 1031{ 1032 struct g_consumer *rcp; 1033 struct g_provider *pp; 1034 struct g_raid_softc *sc; 1035 struct g_raid_disk *disk; 1036 struct promise_raid_conf *meta, *metaarr[4]; 1037 struct g_raid_md_promise_perdisk *pd; 1038 struct g_geom *geom; 1039 int error, i, j, result, len, subdisks; 1040 char name[16]; 1041 uint16_t vendor; 1042 1043 G_RAID_DEBUG(1, "Tasting Promise on %s", cp->provider->name); 1044 pp = cp->provider; 1045 1046 /* Read metadata from device. */ 1047 meta = NULL; 1048 vendor = 0xffff; 1049 if (g_access(cp, 1, 0, 0) != 0) 1050 return (G_RAID_MD_TASTE_FAIL); 1051 g_topology_unlock(); 1052 len = 2; 1053 if (pp->geom->rank == 1) 1054 g_io_getattr("GEOM::hba_vendor", cp, &len, &vendor); 1055 subdisks = promise_meta_read(cp, metaarr); 1056 g_topology_lock(); 1057 g_access(cp, -1, 0, 0); 1058 if (subdisks == 0) { 1059 if (g_raid_aggressive_spare) { 1060 if (vendor == 0x105a || vendor == 0x1002) { 1061 G_RAID_DEBUG(1, 1062 "No Promise metadata, forcing spare."); 1063 goto search; 1064 } else { 1065 G_RAID_DEBUG(1, 1066 "Promise/ATI vendor mismatch " 1067 "0x%04x != 0x105a/0x1002", 1068 vendor); 1069 } 1070 } 1071 return (G_RAID_MD_TASTE_FAIL); 1072 } 1073 1074 /* Metadata valid. Print it. */ 1075 for (i = 0; i < subdisks; i++) 1076 g_raid_md_promise_print(metaarr[i]); 1077 1078 /* Purge meaningless (empty/spare) records. */ 1079 for (i = 0; i < subdisks; ) { 1080 if (metaarr[i]->disk.flags & PROMISE_F_ASSIGNED) { 1081 i++; 1082 continue; 1083 } 1084 free(metaarr[i], M_MD_PROMISE); 1085 for (j = i; j < subdisks - 1; j++) 1086 metaarr[i] = metaarr[j + 1]; 1087 metaarr[PROMISE_MAX_SUBDISKS - 1] = NULL; 1088 subdisks--; 1089 } 1090 1091search: 1092 /* Search for matching node. */ 1093 sc = NULL; 1094 LIST_FOREACH(geom, &mp->geom, geom) { 1095 sc = geom->softc; 1096 if (sc == NULL) 1097 continue; 1098 if (sc->sc_stopping != 0) 1099 continue; 1100 if (sc->sc_md->mdo_class != md->mdo_class) 1101 continue; 1102 break; 1103 } 1104 1105 /* Found matching node. */ 1106 if (geom != NULL) { 1107 G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name); 1108 result = G_RAID_MD_TASTE_EXISTING; 1109 1110 } else { /* Not found matching node -- create one. */ 1111 result = G_RAID_MD_TASTE_NEW; 1112 snprintf(name, sizeof(name), "Promise"); 1113 sc = g_raid_create_node(mp, name, md); 1114 md->mdo_softc = sc; 1115 geom = sc->sc_geom; 1116 } 1117 1118 rcp = g_new_consumer(geom); 1119 g_attach(rcp, pp); 1120 if (g_access(rcp, 1, 1, 1) != 0) 1121 ; //goto fail1; 1122 1123 g_topology_unlock(); 1124 sx_xlock(&sc->sc_lock); 1125 1126 pd = malloc(sizeof(*pd), M_MD_PROMISE, M_WAITOK | M_ZERO); 1127 pd->pd_subdisks = subdisks; 1128 for (i = 0; i < subdisks; i++) 1129 pd->pd_meta[i] = metaarr[i]; 1130 disk = g_raid_create_disk(sc); 1131 disk->d_md_data = (void *)pd; 1132 disk->d_consumer = rcp; 1133 rcp->private = disk; 1134 1135 /* Read kernel dumping information. */ 1136 disk->d_kd.offset = 0; 1137 disk->d_kd.length = OFF_MAX; 1138 len = sizeof(disk->d_kd); 1139 error = g_io_getattr("GEOM::kerneldump", rcp, &len, &disk->d_kd); 1140 if (disk->d_kd.di.dumper == NULL) 1141 G_RAID_DEBUG1(2, sc, "Dumping not supported by %s: %d.", 1142 rcp->provider->name, error); 1143 1144 g_raid_md_promise_new_disk(disk); 1145 1146 sx_xunlock(&sc->sc_lock); 1147 g_topology_lock(); 1148 *gp = geom; 1149 return (result); 1150} 1151 1152static int 1153g_raid_md_event_promise(struct g_raid_md_object *md, 1154 struct g_raid_disk *disk, u_int event) 1155{ 1156 struct g_raid_softc *sc; 1157 struct g_raid_md_promise_perdisk *pd; 1158 1159 sc = md->mdo_softc; 1160 if (disk == NULL) 1161 return (-1); 1162 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 1163 switch (event) { 1164 case G_RAID_DISK_E_DISCONNECTED: 1165 /* Delete disk. */ 1166 g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE); 1167 g_raid_destroy_disk(disk); 1168 g_raid_md_promise_purge_volumes(sc); 1169 1170 /* Write updated metadata to all disks. */ 1171 g_raid_md_write_promise(md, NULL, NULL, NULL); 1172 1173 /* Check if anything left. */ 1174 if (g_raid_ndisks(sc, -1) == 0) 1175 g_raid_destroy_node(sc, 0); 1176 else 1177 g_raid_md_promise_refill(sc); 1178 return (0); 1179 } 1180 return (-2); 1181} 1182 1183static int 1184g_raid_md_volume_event_promise(struct g_raid_md_object *md, 1185 struct g_raid_volume *vol, u_int event) 1186{ 1187 struct g_raid_softc *sc; 1188 struct g_raid_md_promise_pervolume *pv; 1189 1190 sc = md->mdo_softc; 1191 pv = (struct g_raid_md_promise_pervolume *)vol->v_md_data; 1192 switch (event) { 1193 case G_RAID_VOLUME_E_STARTMD: 1194 if (!pv->pv_started) 1195 g_raid_md_promise_start(vol); 1196 return (0); 1197 } 1198 return (-2); 1199} 1200 1201static int 1202g_raid_md_ctl_promise(struct g_raid_md_object *md, 1203 struct gctl_req *req) 1204{ 1205 struct g_raid_softc *sc; 1206 struct g_raid_volume *vol, *vol1; 1207 struct g_raid_subdisk *sd; 1208 struct g_raid_disk *disk, *disks[PROMISE_MAX_DISKS]; 1209 struct g_raid_md_promise_perdisk *pd; 1210 struct g_raid_md_promise_pervolume *pv; 1211 struct g_consumer *cp; 1212 struct g_provider *pp; 1213 char arg[16]; 1214 const char *verb, *volname, *levelname, *diskname; 1215 char *tmp; 1216 int *nargs, *force; 1217 off_t size, sectorsize, strip; 1218 intmax_t *sizearg, *striparg; 1219 uint32_t offs[PROMISE_MAX_DISKS], esize; 1220 int numdisks, i, len, level, qual; 1221 int error; 1222 1223 sc = md->mdo_softc; 1224 verb = gctl_get_param(req, "verb", NULL); 1225 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1226 error = 0; 1227 if (strcmp(verb, "label") == 0) { 1228 1229 if (*nargs < 4) { 1230 gctl_error(req, "Invalid number of arguments."); 1231 return (-1); 1232 } 1233 volname = gctl_get_asciiparam(req, "arg1"); 1234 if (volname == NULL) { 1235 gctl_error(req, "No volume name."); 1236 return (-2); 1237 } 1238 levelname = gctl_get_asciiparam(req, "arg2"); 1239 if (levelname == NULL) { 1240 gctl_error(req, "No RAID level."); 1241 return (-3); 1242 } 1243 if (g_raid_volume_str2level(levelname, &level, &qual)) { 1244 gctl_error(req, "Unknown RAID level '%s'.", levelname); 1245 return (-4); 1246 } 1247 numdisks = *nargs - 3; 1248 force = gctl_get_paraml(req, "force", sizeof(*force)); 1249 if (!g_raid_md_promise_supported(level, qual, numdisks, 1250 force ? *force : 0)) { 1251 gctl_error(req, "Unsupported RAID level " 1252 "(0x%02x/0x%02x), or number of disks (%d).", 1253 level, qual, numdisks); 1254 return (-5); 1255 } 1256 1257 /* Search for disks, connect them and probe. */ 1258 size = INT64_MAX; 1259 sectorsize = 0; 1260 bzero(disks, sizeof(disks)); 1261 bzero(offs, sizeof(offs)); 1262 for (i = 0; i < numdisks; i++) { 1263 snprintf(arg, sizeof(arg), "arg%d", i + 3); 1264 diskname = gctl_get_asciiparam(req, arg); 1265 if (diskname == NULL) { 1266 gctl_error(req, "No disk name (%s).", arg); 1267 error = -6; 1268 break; 1269 } 1270 if (strcmp(diskname, "NONE") == 0) 1271 continue; 1272 1273 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1274 if (disk->d_consumer != NULL && 1275 disk->d_consumer->provider != NULL && 1276 strcmp(disk->d_consumer->provider->name, 1277 diskname) == 0) 1278 break; 1279 } 1280 if (disk != NULL) { 1281 if (disk->d_state != G_RAID_DISK_S_ACTIVE) { 1282 gctl_error(req, "Disk '%s' is in a " 1283 "wrong state (%s).", diskname, 1284 g_raid_disk_state2str(disk->d_state)); 1285 error = -7; 1286 break; 1287 } 1288 pd = disk->d_md_data; 1289 if (pd->pd_subdisks >= PROMISE_MAX_SUBDISKS) { 1290 gctl_error(req, "Disk '%s' already " 1291 "used by %d volumes.", 1292 diskname, pd->pd_subdisks); 1293 error = -7; 1294 break; 1295 } 1296 pp = disk->d_consumer->provider; 1297 disks[i] = disk; 1298 promise_meta_unused_range(pd->pd_meta, 1299 pd->pd_subdisks, 1300 pp->mediasize / pp->sectorsize, 1301 &offs[i], &esize); 1302 size = MIN(size, (off_t)esize * pp->sectorsize); 1303 sectorsize = MAX(sectorsize, pp->sectorsize); 1304 continue; 1305 } 1306 1307 g_topology_lock(); 1308 cp = g_raid_open_consumer(sc, diskname); 1309 if (cp == NULL) { 1310 gctl_error(req, "Can't open disk '%s'.", 1311 diskname); 1312 g_topology_unlock(); 1313 error = -8; 1314 break; 1315 } 1316 pp = cp->provider; 1317 pd = malloc(sizeof(*pd), M_MD_PROMISE, M_WAITOK | M_ZERO); 1318 disk = g_raid_create_disk(sc); 1319 disk->d_md_data = (void *)pd; 1320 disk->d_consumer = cp; 1321 disks[i] = disk; 1322 cp->private = disk; 1323 g_topology_unlock(); 1324 1325 /* Read kernel dumping information. */ 1326 disk->d_kd.offset = 0; 1327 disk->d_kd.length = OFF_MAX; 1328 len = sizeof(disk->d_kd); 1329 g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd); 1330 if (disk->d_kd.di.dumper == NULL) 1331 G_RAID_DEBUG1(2, sc, 1332 "Dumping not supported by %s.", 1333 cp->provider->name); 1334 1335 /* Reserve some space for metadata. */ 1336 size = MIN(size, pp->mediasize - 131072llu * pp->sectorsize); 1337 sectorsize = MAX(sectorsize, pp->sectorsize); 1338 } 1339 if (error != 0) { 1340 for (i = 0; i < numdisks; i++) { 1341 if (disks[i] != NULL && 1342 disks[i]->d_state == G_RAID_DISK_S_NONE) 1343 g_raid_destroy_disk(disks[i]); 1344 } 1345 return (error); 1346 } 1347 1348 /* Handle size argument. */ 1349 len = sizeof(*sizearg); 1350 sizearg = gctl_get_param(req, "size", &len); 1351 if (sizearg != NULL && len == sizeof(*sizearg) && 1352 *sizearg > 0) { 1353 if (*sizearg > size) { 1354 gctl_error(req, "Size too big %lld > %lld.", 1355 (long long)*sizearg, (long long)size); 1356 return (-9); 1357 } 1358 size = *sizearg; 1359 } 1360 1361 /* Handle strip argument. */ 1362 strip = 131072; 1363 len = sizeof(*striparg); 1364 striparg = gctl_get_param(req, "strip", &len); 1365 if (striparg != NULL && len == sizeof(*striparg) && 1366 *striparg > 0) { 1367 if (*striparg < sectorsize) { 1368 gctl_error(req, "Strip size too small."); 1369 return (-10); 1370 } 1371 if (*striparg % sectorsize != 0) { 1372 gctl_error(req, "Incorrect strip size."); 1373 return (-11); 1374 } 1375 strip = *striparg; 1376 } 1377 1378 /* Round size down to strip or sector. */ 1379 if (level == G_RAID_VOLUME_RL_RAID1 || 1380 level == G_RAID_VOLUME_RL_SINGLE || 1381 level == G_RAID_VOLUME_RL_CONCAT) 1382 size -= (size % sectorsize); 1383 else if (level == G_RAID_VOLUME_RL_RAID1E && 1384 (numdisks & 1) != 0) 1385 size -= (size % (2 * strip)); 1386 else 1387 size -= (size % strip); 1388 if (size <= 0) { 1389 gctl_error(req, "Size too small."); 1390 return (-13); 1391 } 1392 if (size > 0xffffffffllu * sectorsize) { 1393 gctl_error(req, "Size too big."); 1394 return (-14); 1395 } 1396 1397 /* We have all we need, create things: volume, ... */ 1398 pv = malloc(sizeof(*pv), M_MD_PROMISE, M_WAITOK | M_ZERO); 1399 arc4rand(&pv->pv_id, sizeof(pv->pv_id), 0); 1400 pv->pv_generation = 0; 1401 pv->pv_started = 1; 1402 vol = g_raid_create_volume(sc, volname, -1); 1403 vol->v_md_data = pv; 1404 vol->v_raid_level = level; 1405 vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE; 1406 vol->v_strip_size = strip; 1407 vol->v_disks_count = numdisks; 1408 if (level == G_RAID_VOLUME_RL_RAID0 || 1409 level == G_RAID_VOLUME_RL_CONCAT || 1410 level == G_RAID_VOLUME_RL_SINGLE) 1411 vol->v_mediasize = size * numdisks; 1412 else if (level == G_RAID_VOLUME_RL_RAID1) 1413 vol->v_mediasize = size; 1414 else if (level == G_RAID_VOLUME_RL_RAID3 || 1415 level == G_RAID_VOLUME_RL_RAID5) 1416 vol->v_mediasize = size * (numdisks - 1); 1417 else { /* RAID1E */ 1418 vol->v_mediasize = ((size * numdisks) / strip / 2) * 1419 strip; 1420 } 1421 vol->v_sectorsize = sectorsize; 1422 g_raid_start_volume(vol); 1423 1424 /* , and subdisks. */ 1425 for (i = 0; i < numdisks; i++) { 1426 disk = disks[i]; 1427 sd = &vol->v_subdisks[i]; 1428 sd->sd_disk = disk; 1429 sd->sd_offset = (off_t)offs[i] * 512; 1430 sd->sd_size = size; 1431 if (disk == NULL) 1432 continue; 1433 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next); 1434 g_raid_change_disk_state(disk, 1435 G_RAID_DISK_S_ACTIVE); 1436 g_raid_change_subdisk_state(sd, 1437 G_RAID_SUBDISK_S_ACTIVE); 1438 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW, 1439 G_RAID_EVENT_SUBDISK); 1440 } 1441 1442 /* Write metadata based on created entities. */ 1443 G_RAID_DEBUG1(0, sc, "Array started."); 1444 g_raid_md_write_promise(md, vol, NULL, NULL); 1445 1446 /* Pickup any STALE/SPARE disks to refill array if needed. */ 1447 g_raid_md_promise_refill(sc); 1448 1449 g_raid_event_send(vol, G_RAID_VOLUME_E_START, 1450 G_RAID_EVENT_VOLUME); 1451 return (0); 1452 } 1453 if (strcmp(verb, "add") == 0) { 1454 1455 gctl_error(req, "`add` command is not applicable, " 1456 "use `label` instead."); 1457 return (-99); 1458 } 1459 if (strcmp(verb, "delete") == 0) { 1460 1461 /* Full node destruction. */ 1462 if (*nargs == 1) { 1463 /* Check if some volume is still open. */ 1464 force = gctl_get_paraml(req, "force", sizeof(*force)); 1465 if (force != NULL && *force == 0 && 1466 g_raid_nopens(sc) != 0) { 1467 gctl_error(req, "Some volume is still open."); 1468 return (-4); 1469 } 1470 1471 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1472 if (disk->d_consumer) 1473 promise_meta_erase(disk->d_consumer); 1474 } 1475 g_raid_destroy_node(sc, 0); 1476 return (0); 1477 } 1478 1479 /* Destroy specified volume. If it was last - all node. */ 1480 if (*nargs != 2) { 1481 gctl_error(req, "Invalid number of arguments."); 1482 return (-1); 1483 } 1484 volname = gctl_get_asciiparam(req, "arg1"); 1485 if (volname == NULL) { 1486 gctl_error(req, "No volume name."); 1487 return (-2); 1488 } 1489 1490 /* Search for volume. */ 1491 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 1492 if (strcmp(vol->v_name, volname) == 0) 1493 break; 1494 } 1495 if (vol == NULL) { 1496 i = strtol(volname, &tmp, 10); 1497 if (verb != volname && tmp[0] == 0) { 1498 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 1499 if (vol->v_global_id == i) 1500 break; 1501 } 1502 } 1503 } 1504 if (vol == NULL) { 1505 gctl_error(req, "Volume '%s' not found.", volname); 1506 return (-3); 1507 } 1508 1509 /* Check if volume is still open. */ 1510 force = gctl_get_paraml(req, "force", sizeof(*force)); 1511 if (force != NULL && *force == 0 && 1512 vol->v_provider_open != 0) { 1513 gctl_error(req, "Volume is still open."); 1514 return (-4); 1515 } 1516 1517 /* Destroy volume and potentially node. */ 1518 i = 0; 1519 TAILQ_FOREACH(vol1, &sc->sc_volumes, v_next) 1520 i++; 1521 if (i >= 2) { 1522 g_raid_destroy_volume(vol); 1523 g_raid_md_promise_purge_disks(sc); 1524 g_raid_md_write_promise(md, NULL, NULL, NULL); 1525 } else { 1526 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1527 if (disk->d_consumer) 1528 promise_meta_erase(disk->d_consumer); 1529 } 1530 g_raid_destroy_node(sc, 0); 1531 } 1532 return (0); 1533 } 1534 if (strcmp(verb, "remove") == 0 || 1535 strcmp(verb, "fail") == 0) { 1536 if (*nargs < 2) { 1537 gctl_error(req, "Invalid number of arguments."); 1538 return (-1); 1539 } 1540 for (i = 1; i < *nargs; i++) { 1541 snprintf(arg, sizeof(arg), "arg%d", i); 1542 diskname = gctl_get_asciiparam(req, arg); 1543 if (diskname == NULL) { 1544 gctl_error(req, "No disk name (%s).", arg); 1545 error = -2; 1546 break; 1547 } 1548 if (strncmp(diskname, "/dev/", 5) == 0) 1549 diskname += 5; 1550 1551 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1552 if (disk->d_consumer != NULL && 1553 disk->d_consumer->provider != NULL && 1554 strcmp(disk->d_consumer->provider->name, 1555 diskname) == 0) 1556 break; 1557 } 1558 if (disk == NULL) { 1559 gctl_error(req, "Disk '%s' not found.", 1560 diskname); 1561 error = -3; 1562 break; 1563 } 1564 1565 if (strcmp(verb, "fail") == 0) { 1566 g_raid_md_fail_disk_promise(md, NULL, disk); 1567 continue; 1568 } 1569 1570 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 1571 1572 /* Erase metadata on deleting disk and destroy it. */ 1573 promise_meta_erase(disk->d_consumer); 1574 g_raid_destroy_disk(disk); 1575 } 1576 g_raid_md_promise_purge_volumes(sc); 1577 1578 /* Write updated metadata to remaining disks. */ 1579 g_raid_md_write_promise(md, NULL, NULL, NULL); 1580 1581 /* Check if anything left. */ 1582 if (g_raid_ndisks(sc, -1) == 0) 1583 g_raid_destroy_node(sc, 0); 1584 else 1585 g_raid_md_promise_refill(sc); 1586 return (error); 1587 } 1588 if (strcmp(verb, "insert") == 0) { 1589 if (*nargs < 2) { 1590 gctl_error(req, "Invalid number of arguments."); 1591 return (-1); 1592 } 1593 for (i = 1; i < *nargs; i++) { 1594 /* Get disk name. */ 1595 snprintf(arg, sizeof(arg), "arg%d", i); 1596 diskname = gctl_get_asciiparam(req, arg); 1597 if (diskname == NULL) { 1598 gctl_error(req, "No disk name (%s).", arg); 1599 error = -3; 1600 break; 1601 } 1602 1603 /* Try to find provider with specified name. */ 1604 g_topology_lock(); 1605 cp = g_raid_open_consumer(sc, diskname); 1606 if (cp == NULL) { 1607 gctl_error(req, "Can't open disk '%s'.", 1608 diskname); 1609 g_topology_unlock(); 1610 error = -4; 1611 break; 1612 } 1613 pp = cp->provider; 1614 g_topology_unlock(); 1615 1616 pd = malloc(sizeof(*pd), M_MD_PROMISE, M_WAITOK | M_ZERO); 1617 1618 disk = g_raid_create_disk(sc); 1619 disk->d_consumer = cp; 1620 disk->d_consumer->private = disk; 1621 disk->d_md_data = (void *)pd; 1622 cp->private = disk; 1623 1624 /* Read kernel dumping information. */ 1625 disk->d_kd.offset = 0; 1626 disk->d_kd.length = OFF_MAX; 1627 len = sizeof(disk->d_kd); 1628 g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd); 1629 if (disk->d_kd.di.dumper == NULL) 1630 G_RAID_DEBUG1(2, sc, 1631 "Dumping not supported by %s.", 1632 cp->provider->name); 1633 1634 /* Welcome the "new" disk. */ 1635 g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE); 1636 promise_meta_write_spare(cp); 1637 g_raid_md_promise_refill(sc); 1638 } 1639 return (error); 1640 } 1641 return (-100); 1642} 1643 1644static int 1645g_raid_md_write_promise(struct g_raid_md_object *md, struct g_raid_volume *tvol, 1646 struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk) 1647{ 1648 struct g_raid_softc *sc; 1649 struct g_raid_volume *vol; 1650 struct g_raid_subdisk *sd; 1651 struct g_raid_disk *disk; 1652 struct g_raid_md_promise_perdisk *pd; 1653 struct g_raid_md_promise_pervolume *pv; 1654 struct promise_raid_conf *meta; 1655 off_t rebuild_lba64; 1656 int i, j, pos, rebuild; 1657 1658 sc = md->mdo_softc; 1659 1660 if (sc->sc_stopping == G_RAID_DESTROY_HARD) 1661 return (0); 1662 1663 /* Generate new per-volume metadata for affected volumes. */ 1664 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 1665 if (vol->v_stopping) 1666 continue; 1667 1668 /* Skip volumes not related to specified targets. */ 1669 if (tvol != NULL && vol != tvol) 1670 continue; 1671 if (tsd != NULL && vol != tsd->sd_volume) 1672 continue; 1673 if (tdisk != NULL) { 1674 for (i = 0; i < vol->v_disks_count; i++) { 1675 if (vol->v_subdisks[i].sd_disk == tdisk) 1676 break; 1677 } 1678 if (i >= vol->v_disks_count) 1679 continue; 1680 } 1681 1682 pv = (struct g_raid_md_promise_pervolume *)vol->v_md_data; 1683 pv->pv_generation++; 1684 1685 meta = malloc(sizeof(*meta), M_MD_PROMISE, M_WAITOK | M_ZERO); 1686 if (pv->pv_meta != NULL) 1687 memcpy(meta, pv->pv_meta, sizeof(*meta)); 1688 memcpy(meta->promise_id, PROMISE_MAGIC, 1689 sizeof(PROMISE_MAGIC) - 1); 1690 meta->dummy_0 = 0x00020000; 1691 meta->integrity = PROMISE_I_VALID; 1692 1693 meta->generation = pv->pv_generation; 1694 meta->status = PROMISE_S_VALID | PROMISE_S_ONLINE | 1695 PROMISE_S_INITED | PROMISE_S_READY; 1696 if (vol->v_state <= G_RAID_VOLUME_S_DEGRADED) 1697 meta->status |= PROMISE_S_DEGRADED; 1698 if (vol->v_dirty) 1699 meta->status |= PROMISE_S_MARKED; /* XXX: INVENTED! */ 1700 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0 || 1701 vol->v_raid_level == G_RAID_VOLUME_RL_SINGLE) 1702 meta->type = PROMISE_T_RAID0; 1703 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 || 1704 vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) 1705 meta->type = PROMISE_T_RAID1; 1706 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID3) 1707 meta->type = PROMISE_T_RAID3; 1708 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5) 1709 meta->type = PROMISE_T_RAID5; 1710 else if (vol->v_raid_level == G_RAID_VOLUME_RL_CONCAT) 1711 meta->type = PROMISE_T_SPAN; 1712 else 1713 meta->type = PROMISE_T_JBOD; 1714 meta->total_disks = vol->v_disks_count; 1715 meta->stripe_shift = ffs(vol->v_strip_size / 1024); 1716 meta->array_width = vol->v_disks_count; 1717 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 || 1718 vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) 1719 meta->array_width /= 2; 1720 meta->array_number = vol->v_global_id; 1721 meta->total_sectors = vol->v_mediasize / vol->v_sectorsize; 1722 meta->cylinders = meta->total_sectors / (255 * 63) - 1; 1723 meta->heads = 254; 1724 meta->sectors = 63; 1725 meta->volume_id = pv->pv_id; 1726 rebuild_lba64 = UINT64_MAX; 1727 rebuild = 0; 1728 for (i = 0; i < vol->v_disks_count; i++) { 1729 sd = &vol->v_subdisks[i]; 1730 /* For RAID0+1 we need to translate order. */ 1731 pos = promise_meta_translate_disk(vol, i); 1732 meta->disks[pos].flags = PROMISE_F_VALID | 1733 PROMISE_F_ASSIGNED; 1734 if (sd->sd_state == G_RAID_SUBDISK_S_NONE) { 1735 meta->disks[pos].flags |= 0; 1736 } else if (sd->sd_state == G_RAID_SUBDISK_S_FAILED) { 1737 meta->disks[pos].flags |= 1738 PROMISE_F_DOWN | PROMISE_F_REDIR; 1739 } else if (sd->sd_state <= G_RAID_SUBDISK_S_REBUILD) { 1740 meta->disks[pos].flags |= 1741 PROMISE_F_ONLINE | PROMISE_F_REDIR; 1742 if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD) { 1743 rebuild_lba64 = MIN(rebuild_lba64, 1744 sd->sd_rebuild_pos / 512); 1745 } else 1746 rebuild_lba64 = 0; 1747 rebuild = 1; 1748 } else { 1749 meta->disks[pos].flags |= PROMISE_F_ONLINE; 1750 if (sd->sd_state < G_RAID_SUBDISK_S_ACTIVE) { 1751 meta->status |= PROMISE_S_MARKED; 1752 if (sd->sd_state == G_RAID_SUBDISK_S_RESYNC) { 1753 rebuild_lba64 = MIN(rebuild_lba64, 1754 sd->sd_rebuild_pos / 512); 1755 } else 1756 rebuild_lba64 = 0; 1757 } 1758 } 1759 if (pv->pv_meta != NULL) { 1760 meta->disks[pos].id = pv->pv_meta->disks[pos].id; 1761 } else { 1762 meta->disks[pos].number = i * 2; 1763 arc4rand(&meta->disks[pos].id, 1764 sizeof(meta->disks[pos].id), 0); 1765 } 1766 } 1767 promise_meta_put_name(meta, vol->v_name); 1768 1769 /* Try to mimic AMD BIOS rebuild/resync behavior. */ 1770 if (rebuild_lba64 != UINT64_MAX) { 1771 if (rebuild) 1772 meta->magic_3 = 0x03040010UL; /* Rebuild? */ 1773 else 1774 meta->magic_3 = 0x03040008UL; /* Resync? */ 1775 /* Translate from per-disk to per-volume LBA. */ 1776 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 || 1777 vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) { 1778 rebuild_lba64 *= meta->array_width; 1779 } else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID3 || 1780 vol->v_raid_level == G_RAID_VOLUME_RL_RAID5) { 1781 rebuild_lba64 *= meta->array_width - 1; 1782 } else 1783 rebuild_lba64 = 0; 1784 } else 1785 meta->magic_3 = 0x03000000UL; 1786 meta->rebuild_lba64 = rebuild_lba64; 1787 meta->magic_4 = 0x04010101UL; 1788 1789 /* Replace per-volume metadata with new. */ 1790 if (pv->pv_meta != NULL) 1791 free(pv->pv_meta, M_MD_PROMISE); 1792 pv->pv_meta = meta; 1793 1794 /* Copy new metadata to the disks, adding or replacing old. */ 1795 for (i = 0; i < vol->v_disks_count; i++) { 1796 sd = &vol->v_subdisks[i]; 1797 disk = sd->sd_disk; 1798 if (disk == NULL) 1799 continue; 1800 /* For RAID0+1 we need to translate order. */ 1801 pos = promise_meta_translate_disk(vol, i); 1802 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 1803 for (j = 0; j < pd->pd_subdisks; j++) { 1804 if (pd->pd_meta[j]->volume_id == meta->volume_id) 1805 break; 1806 } 1807 if (j == pd->pd_subdisks) 1808 pd->pd_subdisks++; 1809 if (pd->pd_meta[j] != NULL) 1810 free(pd->pd_meta[j], M_MD_PROMISE); 1811 pd->pd_meta[j] = promise_meta_copy(meta); 1812 pd->pd_meta[j]->disk = meta->disks[pos]; 1813 pd->pd_meta[j]->disk.number = pos; 1814 pd->pd_meta[j]->disk_offset = sd->sd_offset / 512; 1815 pd->pd_meta[j]->disk_sectors = sd->sd_size / 512; 1816 if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD) { 1817 pd->pd_meta[j]->rebuild_lba = 1818 sd->sd_rebuild_pos / 512; 1819 } else if (sd->sd_state < G_RAID_SUBDISK_S_REBUILD) 1820 pd->pd_meta[j]->rebuild_lba = 0; 1821 else 1822 pd->pd_meta[j]->rebuild_lba = UINT32_MAX; 1823 pd->pd_updated = 1; 1824 } 1825 } 1826 1827 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1828 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 1829 if (disk->d_state != G_RAID_DISK_S_ACTIVE) 1830 continue; 1831 if (!pd->pd_updated) 1832 continue; 1833 G_RAID_DEBUG(1, "Writing Promise metadata to %s", 1834 g_raid_get_diskname(disk)); 1835 for (i = 0; i < pd->pd_subdisks; i++) 1836 g_raid_md_promise_print(pd->pd_meta[i]); 1837 promise_meta_write(disk->d_consumer, 1838 pd->pd_meta, pd->pd_subdisks); 1839 pd->pd_updated = 0; 1840 } 1841 1842 return (0); 1843} 1844 1845static int 1846g_raid_md_fail_disk_promise(struct g_raid_md_object *md, 1847 struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk) 1848{ 1849 struct g_raid_softc *sc; 1850 struct g_raid_md_promise_perdisk *pd; 1851 struct g_raid_subdisk *sd; 1852 int i, pos; 1853 1854 sc = md->mdo_softc; 1855 pd = (struct g_raid_md_promise_perdisk *)tdisk->d_md_data; 1856 1857 /* We can't fail disk that is not a part of array now. */ 1858 if (tdisk->d_state != G_RAID_DISK_S_ACTIVE) 1859 return (-1); 1860 1861 /* 1862 * Mark disk as failed in metadata and try to write that metadata 1863 * to the disk itself to prevent it's later resurrection as STALE. 1864 */ 1865 if (pd->pd_subdisks > 0 && tdisk->d_consumer != NULL) 1866 G_RAID_DEBUG(1, "Writing Promise metadata to %s", 1867 g_raid_get_diskname(tdisk)); 1868 for (i = 0; i < pd->pd_subdisks; i++) { 1869 pd->pd_meta[i]->disk.flags |= 1870 PROMISE_F_DOWN | PROMISE_F_REDIR; 1871 pos = pd->pd_meta[i]->disk.number; 1872 if (pos >= 0 && pos < PROMISE_MAX_DISKS) { 1873 pd->pd_meta[i]->disks[pos].flags |= 1874 PROMISE_F_DOWN | PROMISE_F_REDIR; 1875 } 1876 g_raid_md_promise_print(pd->pd_meta[i]); 1877 } 1878 if (tdisk->d_consumer != NULL) 1879 promise_meta_write(tdisk->d_consumer, 1880 pd->pd_meta, pd->pd_subdisks); 1881 1882 /* Change states. */ 1883 g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED); 1884 TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) { 1885 g_raid_change_subdisk_state(sd, 1886 G_RAID_SUBDISK_S_FAILED); 1887 g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED, 1888 G_RAID_EVENT_SUBDISK); 1889 } 1890 1891 /* Write updated metadata to remaining disks. */ 1892 g_raid_md_write_promise(md, NULL, NULL, tdisk); 1893 1894 g_raid_md_promise_refill(sc); 1895 return (0); 1896} 1897 1898static int 1899g_raid_md_free_disk_promise(struct g_raid_md_object *md, 1900 struct g_raid_disk *disk) 1901{ 1902 struct g_raid_md_promise_perdisk *pd; 1903 int i; 1904 1905 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 1906 for (i = 0; i < pd->pd_subdisks; i++) { 1907 if (pd->pd_meta[i] != NULL) { 1908 free(pd->pd_meta[i], M_MD_PROMISE); 1909 pd->pd_meta[i] = NULL; 1910 } 1911 } 1912 free(pd, M_MD_PROMISE); 1913 disk->d_md_data = NULL; 1914 return (0); 1915} 1916 1917static int 1918g_raid_md_free_volume_promise(struct g_raid_md_object *md, 1919 struct g_raid_volume *vol) 1920{ 1921 struct g_raid_md_promise_pervolume *pv; 1922 1923 pv = (struct g_raid_md_promise_pervolume *)vol->v_md_data; 1924 if (pv && pv->pv_meta != NULL) { 1925 free(pv->pv_meta, M_MD_PROMISE); 1926 pv->pv_meta = NULL; 1927 } 1928 if (pv && !pv->pv_started) { 1929 pv->pv_started = 1; 1930 callout_stop(&pv->pv_start_co); 1931 } 1932 return (0); 1933} 1934 1935static int 1936g_raid_md_free_promise(struct g_raid_md_object *md) 1937{ 1938 1939 return (0); 1940} 1941 1942G_RAID_MD_DECLARE(g_raid_md_promise); 1943