md_intel.c revision 246168
1/*- 2 * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org> 3 * Copyright (c) 2000 - 2008 S��ren Schmidt <sos@FreeBSD.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: stable/9/sys/geom/raid/md_intel.c 246168 2013-01-31 22:12:25Z mav $"); 30 31#include <sys/param.h> 32#include <sys/bio.h> 33#include <sys/endian.h> 34#include <sys/kernel.h> 35#include <sys/kobj.h> 36#include <sys/limits.h> 37#include <sys/lock.h> 38#include <sys/malloc.h> 39#include <sys/mutex.h> 40#include <sys/systm.h> 41#include <sys/taskqueue.h> 42#include <geom/geom.h> 43#include "geom/raid/g_raid.h" 44#include "g_raid_md_if.h" 45 46static MALLOC_DEFINE(M_MD_INTEL, "md_intel_data", "GEOM_RAID Intel metadata"); 47 48struct intel_raid_map { 49 uint32_t offset; 50 uint32_t disk_sectors; 51 uint32_t stripe_count; 52 uint16_t strip_sectors; 53 uint8_t status; 54#define INTEL_S_READY 0x00 55#define INTEL_S_UNINITIALIZED 0x01 56#define INTEL_S_DEGRADED 0x02 57#define INTEL_S_FAILURE 0x03 58 59 uint8_t type; 60#define INTEL_T_RAID0 0x00 61#define INTEL_T_RAID1 0x01 62#define INTEL_T_RAID5 0x05 63 64 uint8_t total_disks; 65 uint8_t total_domains; 66 uint8_t failed_disk_num; 67 uint8_t ddf; 68 uint32_t offset_hi; 69 uint32_t disk_sectors_hi; 70 uint32_t stripe_count_hi; 71 uint32_t filler_2[4]; 72 uint32_t disk_idx[1]; /* total_disks entries. */ 73#define INTEL_DI_IDX 0x00ffffff 74#define INTEL_DI_RBLD 0x01000000 75} __packed; 76 77struct intel_raid_vol { 78 uint8_t name[16]; 79 u_int64_t total_sectors __packed; 80 uint32_t state; 81#define INTEL_ST_BOOTABLE 0x00000001 82#define INTEL_ST_BOOT_DEVICE 0x00000002 83#define INTEL_ST_READ_COALESCING 0x00000004 84#define INTEL_ST_WRITE_COALESCING 0x00000008 85#define INTEL_ST_LAST_SHUTDOWN_DIRTY 0x00000010 86#define INTEL_ST_HIDDEN_AT_BOOT 0x00000020 87#define INTEL_ST_CURRENTLY_HIDDEN 0x00000040 88#define INTEL_ST_VERIFY_AND_FIX 0x00000080 89#define INTEL_ST_MAP_STATE_UNINIT 0x00000100 90#define INTEL_ST_NO_AUTO_RECOVERY 0x00000200 91#define INTEL_ST_CLONE_N_GO 0x00000400 92#define INTEL_ST_CLONE_MAN_SYNC 0x00000800 93#define INTEL_ST_CNG_MASTER_DISK_NUM 0x00001000 94 uint32_t reserved; 95 uint8_t migr_priority; 96 uint8_t num_sub_vols; 97 uint8_t tid; 98 uint8_t cng_master_disk; 99 uint16_t cache_policy; 100 uint8_t cng_state; 101#define INTEL_SNGST_NEEDS_UPDATE 1 102#define INTEL_SNGST_MASTER_MISSING 2 103 uint8_t cng_sub_state; 104 uint32_t filler_0[10]; 105 106 uint32_t curr_migr_unit; 107 uint32_t checkpoint_id; 108 uint8_t migr_state; 109 uint8_t migr_type; 110#define INTEL_MT_INIT 0 111#define INTEL_MT_REBUILD 1 112#define INTEL_MT_VERIFY 2 113#define INTEL_MT_GEN_MIGR 3 114#define INTEL_MT_STATE_CHANGE 4 115#define INTEL_MT_REPAIR 5 116 uint8_t dirty; 117 uint8_t fs_state; 118 uint16_t verify_errors; 119 uint16_t bad_blocks; 120 uint32_t curr_migr_unit_hi; 121 uint32_t filler_1[3]; 122 struct intel_raid_map map[1]; /* 2 entries if migr_state != 0. */ 123} __packed; 124 125struct intel_raid_disk { 126#define INTEL_SERIAL_LEN 16 127 uint8_t serial[INTEL_SERIAL_LEN]; 128 uint32_t sectors; 129 uint32_t id; 130 uint32_t flags; 131#define INTEL_F_SPARE 0x01 132#define INTEL_F_ASSIGNED 0x02 133#define INTEL_F_FAILED 0x04 134#define INTEL_F_ONLINE 0x08 135#define INTEL_F_DISABLED 0x80 136 uint32_t owner_cfg_num; 137 uint32_t sectors_hi; 138 uint32_t filler[3]; 139} __packed; 140 141struct intel_raid_conf { 142 uint8_t intel_id[24]; 143#define INTEL_MAGIC "Intel Raid ISM Cfg Sig. " 144 145 uint8_t version[6]; 146#define INTEL_VERSION_1000 "1.0.00" /* RAID0 */ 147#define INTEL_VERSION_1100 "1.1.00" /* RAID1 */ 148#define INTEL_VERSION_1200 "1.2.00" /* Many volumes */ 149#define INTEL_VERSION_1201 "1.2.01" /* 3 or 4 disks */ 150#define INTEL_VERSION_1202 "1.2.02" /* RAID5 */ 151#define INTEL_VERSION_1204 "1.2.04" /* 5 or 6 disks */ 152#define INTEL_VERSION_1206 "1.2.06" /* CNG */ 153#define INTEL_VERSION_1300 "1.3.00" /* Attributes */ 154 155 uint8_t dummy_0[2]; 156 uint32_t checksum; 157 uint32_t config_size; 158 uint32_t config_id; 159 uint32_t generation; 160 uint32_t error_log_size; 161 uint32_t attributes; 162#define INTEL_ATTR_RAID0 0x00000001 163#define INTEL_ATTR_RAID1 0x00000002 164#define INTEL_ATTR_RAID10 0x00000004 165#define INTEL_ATTR_RAID1E 0x00000008 166#define INTEL_ATTR_RAID5 0x00000010 167#define INTEL_ATTR_RAIDCNG 0x00000020 168#define INTEL_ATTR_2TB 0x20000000 169#define INTEL_ATTR_PM 0x40000000 170#define INTEL_ATTR_CHECKSUM 0x80000000 171 172 uint8_t total_disks; 173 uint8_t total_volumes; 174 uint8_t dummy_2[2]; 175 uint32_t filler_0[39]; 176 struct intel_raid_disk disk[1]; /* total_disks entries. */ 177 /* Here goes total_volumes of struct intel_raid_vol. */ 178} __packed; 179 180#define INTEL_MAX_MD_SIZE(ndisks) \ 181 (sizeof(struct intel_raid_conf) + \ 182 sizeof(struct intel_raid_disk) * (ndisks - 1) + \ 183 sizeof(struct intel_raid_vol) * 2 + \ 184 sizeof(struct intel_raid_map) * 2 + \ 185 sizeof(uint32_t) * (ndisks - 1) * 4) 186 187struct g_raid_md_intel_perdisk { 188 struct intel_raid_conf *pd_meta; 189 int pd_disk_pos; 190 struct intel_raid_disk pd_disk_meta; 191}; 192 193struct g_raid_md_intel_pervolume { 194 int pv_volume_pos; 195 int pv_cng; 196 int pv_cng_man_sync; 197 int pv_cng_master_disk; 198}; 199 200struct g_raid_md_intel_object { 201 struct g_raid_md_object mdio_base; 202 uint32_t mdio_config_id; 203 uint32_t mdio_generation; 204 struct intel_raid_conf *mdio_meta; 205 struct callout mdio_start_co; /* STARTING state timer. */ 206 int mdio_disks_present; 207 int mdio_started; 208 int mdio_incomplete; 209 struct root_hold_token *mdio_rootmount; /* Root mount delay token. */ 210}; 211 212static g_raid_md_create_t g_raid_md_create_intel; 213static g_raid_md_taste_t g_raid_md_taste_intel; 214static g_raid_md_event_t g_raid_md_event_intel; 215static g_raid_md_ctl_t g_raid_md_ctl_intel; 216static g_raid_md_write_t g_raid_md_write_intel; 217static g_raid_md_fail_disk_t g_raid_md_fail_disk_intel; 218static g_raid_md_free_disk_t g_raid_md_free_disk_intel; 219static g_raid_md_free_volume_t g_raid_md_free_volume_intel; 220static g_raid_md_free_t g_raid_md_free_intel; 221 222static kobj_method_t g_raid_md_intel_methods[] = { 223 KOBJMETHOD(g_raid_md_create, g_raid_md_create_intel), 224 KOBJMETHOD(g_raid_md_taste, g_raid_md_taste_intel), 225 KOBJMETHOD(g_raid_md_event, g_raid_md_event_intel), 226 KOBJMETHOD(g_raid_md_ctl, g_raid_md_ctl_intel), 227 KOBJMETHOD(g_raid_md_write, g_raid_md_write_intel), 228 KOBJMETHOD(g_raid_md_fail_disk, g_raid_md_fail_disk_intel), 229 KOBJMETHOD(g_raid_md_free_disk, g_raid_md_free_disk_intel), 230 KOBJMETHOD(g_raid_md_free_volume, g_raid_md_free_volume_intel), 231 KOBJMETHOD(g_raid_md_free, g_raid_md_free_intel), 232 { 0, 0 } 233}; 234 235static struct g_raid_md_class g_raid_md_intel_class = { 236 "Intel", 237 g_raid_md_intel_methods, 238 sizeof(struct g_raid_md_intel_object), 239 .mdc_enable = 1, 240 .mdc_priority = 100 241}; 242 243 244static struct intel_raid_map * 245intel_get_map(struct intel_raid_vol *mvol, int i) 246{ 247 struct intel_raid_map *mmap; 248 249 if (i > (mvol->migr_state ? 1 : 0)) 250 return (NULL); 251 mmap = &mvol->map[0]; 252 for (; i > 0; i--) { 253 mmap = (struct intel_raid_map *) 254 &mmap->disk_idx[mmap->total_disks]; 255 } 256 return ((struct intel_raid_map *)mmap); 257} 258 259static struct intel_raid_vol * 260intel_get_volume(struct intel_raid_conf *meta, int i) 261{ 262 struct intel_raid_vol *mvol; 263 struct intel_raid_map *mmap; 264 265 if (i > 1) 266 return (NULL); 267 mvol = (struct intel_raid_vol *)&meta->disk[meta->total_disks]; 268 for (; i > 0; i--) { 269 mmap = intel_get_map(mvol, mvol->migr_state ? 1 : 0); 270 mvol = (struct intel_raid_vol *) 271 &mmap->disk_idx[mmap->total_disks]; 272 } 273 return (mvol); 274} 275 276static off_t 277intel_get_map_offset(struct intel_raid_map *mmap) 278{ 279 off_t offset = (off_t)mmap->offset_hi << 32; 280 281 offset += mmap->offset; 282 return (offset); 283} 284 285static void 286intel_set_map_offset(struct intel_raid_map *mmap, off_t offset) 287{ 288 289 mmap->offset = offset & 0xffffffff; 290 mmap->offset_hi = offset >> 32; 291} 292 293static off_t 294intel_get_map_disk_sectors(struct intel_raid_map *mmap) 295{ 296 off_t disk_sectors = (off_t)mmap->disk_sectors_hi << 32; 297 298 disk_sectors += mmap->disk_sectors; 299 return (disk_sectors); 300} 301 302static void 303intel_set_map_disk_sectors(struct intel_raid_map *mmap, off_t disk_sectors) 304{ 305 306 mmap->disk_sectors = disk_sectors & 0xffffffff; 307 mmap->disk_sectors_hi = disk_sectors >> 32; 308} 309 310static void 311intel_set_map_stripe_count(struct intel_raid_map *mmap, off_t stripe_count) 312{ 313 314 mmap->stripe_count = stripe_count & 0xffffffff; 315 mmap->stripe_count_hi = stripe_count >> 32; 316} 317 318static off_t 319intel_get_disk_sectors(struct intel_raid_disk *disk) 320{ 321 off_t sectors = (off_t)disk->sectors_hi << 32; 322 323 sectors += disk->sectors; 324 return (sectors); 325} 326 327static void 328intel_set_disk_sectors(struct intel_raid_disk *disk, off_t sectors) 329{ 330 331 disk->sectors = sectors & 0xffffffff; 332 disk->sectors_hi = sectors >> 32; 333} 334 335static off_t 336intel_get_vol_curr_migr_unit(struct intel_raid_vol *vol) 337{ 338 off_t curr_migr_unit = (off_t)vol->curr_migr_unit_hi << 32; 339 340 curr_migr_unit += vol->curr_migr_unit; 341 return (curr_migr_unit); 342} 343 344static void 345intel_set_vol_curr_migr_unit(struct intel_raid_vol *vol, off_t curr_migr_unit) 346{ 347 348 vol->curr_migr_unit = curr_migr_unit & 0xffffffff; 349 vol->curr_migr_unit_hi = curr_migr_unit >> 32; 350} 351 352static void 353g_raid_md_intel_print(struct intel_raid_conf *meta) 354{ 355 struct intel_raid_vol *mvol; 356 struct intel_raid_map *mmap; 357 int i, j, k; 358 359 if (g_raid_debug < 1) 360 return; 361 362 printf("********* ATA Intel MatrixRAID Metadata *********\n"); 363 printf("intel_id <%.24s>\n", meta->intel_id); 364 printf("version <%.6s>\n", meta->version); 365 printf("checksum 0x%08x\n", meta->checksum); 366 printf("config_size 0x%08x\n", meta->config_size); 367 printf("config_id 0x%08x\n", meta->config_id); 368 printf("generation 0x%08x\n", meta->generation); 369 printf("attributes 0x%08x\n", meta->attributes); 370 printf("total_disks %u\n", meta->total_disks); 371 printf("total_volumes %u\n", meta->total_volumes); 372 printf("DISK# serial disk_sectors disk_sectors_hi disk_id flags\n"); 373 for (i = 0; i < meta->total_disks; i++ ) { 374 printf(" %d <%.16s> %u %u 0x%08x 0x%08x\n", i, 375 meta->disk[i].serial, meta->disk[i].sectors, 376 meta->disk[i].sectors_hi, 377 meta->disk[i].id, meta->disk[i].flags); 378 } 379 for (i = 0; i < meta->total_volumes; i++) { 380 mvol = intel_get_volume(meta, i); 381 printf(" ****** Volume %d ******\n", i); 382 printf(" name %.16s\n", mvol->name); 383 printf(" total_sectors %ju\n", mvol->total_sectors); 384 printf(" state 0x%08x\n", mvol->state); 385 printf(" reserved %u\n", mvol->reserved); 386 printf(" migr_priority %u\n", mvol->migr_priority); 387 printf(" num_sub_vols %u\n", mvol->num_sub_vols); 388 printf(" tid %u\n", mvol->tid); 389 printf(" cng_master_disk %u\n", mvol->cng_master_disk); 390 printf(" cache_policy %u\n", mvol->cache_policy); 391 printf(" cng_state %u\n", mvol->cng_state); 392 printf(" cng_sub_state %u\n", mvol->cng_sub_state); 393 printf(" curr_migr_unit %u\n", mvol->curr_migr_unit); 394 printf(" curr_migr_unit_hi %u\n", mvol->curr_migr_unit_hi); 395 printf(" checkpoint_id %u\n", mvol->checkpoint_id); 396 printf(" migr_state %u\n", mvol->migr_state); 397 printf(" migr_type %u\n", mvol->migr_type); 398 printf(" dirty %u\n", mvol->dirty); 399 400 for (j = 0; j < (mvol->migr_state ? 2 : 1); j++) { 401 printf(" *** Map %d ***\n", j); 402 mmap = intel_get_map(mvol, j); 403 printf(" offset %u\n", mmap->offset); 404 printf(" offset_hi %u\n", mmap->offset_hi); 405 printf(" disk_sectors %u\n", mmap->disk_sectors); 406 printf(" disk_sectors_hi %u\n", mmap->disk_sectors_hi); 407 printf(" stripe_count %u\n", mmap->stripe_count); 408 printf(" stripe_count_hi %u\n", mmap->stripe_count_hi); 409 printf(" strip_sectors %u\n", mmap->strip_sectors); 410 printf(" status %u\n", mmap->status); 411 printf(" type %u\n", mmap->type); 412 printf(" total_disks %u\n", mmap->total_disks); 413 printf(" total_domains %u\n", mmap->total_domains); 414 printf(" failed_disk_num %u\n", mmap->failed_disk_num); 415 printf(" ddf %u\n", mmap->ddf); 416 printf(" disk_idx "); 417 for (k = 0; k < mmap->total_disks; k++) 418 printf(" 0x%08x", mmap->disk_idx[k]); 419 printf("\n"); 420 } 421 } 422 printf("=================================================\n"); 423} 424 425static struct intel_raid_conf * 426intel_meta_copy(struct intel_raid_conf *meta) 427{ 428 struct intel_raid_conf *nmeta; 429 430 nmeta = malloc(meta->config_size, M_MD_INTEL, M_WAITOK); 431 memcpy(nmeta, meta, meta->config_size); 432 return (nmeta); 433} 434 435static int 436intel_meta_find_disk(struct intel_raid_conf *meta, char *serial) 437{ 438 int pos; 439 440 for (pos = 0; pos < meta->total_disks; pos++) { 441 if (strncmp(meta->disk[pos].serial, 442 serial, INTEL_SERIAL_LEN) == 0) 443 return (pos); 444 } 445 return (-1); 446} 447 448static struct intel_raid_conf * 449intel_meta_read(struct g_consumer *cp) 450{ 451 struct g_provider *pp; 452 struct intel_raid_conf *meta; 453 struct intel_raid_vol *mvol; 454 struct intel_raid_map *mmap; 455 char *buf; 456 int error, i, j, k, left, size; 457 uint32_t checksum, *ptr; 458 459 pp = cp->provider; 460 461 /* Read the anchor sector. */ 462 buf = g_read_data(cp, 463 pp->mediasize - pp->sectorsize * 2, pp->sectorsize, &error); 464 if (buf == NULL) { 465 G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).", 466 pp->name, error); 467 return (NULL); 468 } 469 meta = (struct intel_raid_conf *)buf; 470 471 /* Check if this is an Intel RAID struct */ 472 if (strncmp(meta->intel_id, INTEL_MAGIC, strlen(INTEL_MAGIC))) { 473 G_RAID_DEBUG(1, "Intel signature check failed on %s", pp->name); 474 g_free(buf); 475 return (NULL); 476 } 477 if (meta->config_size > 65536 || 478 meta->config_size < sizeof(struct intel_raid_conf)) { 479 G_RAID_DEBUG(1, "Intel metadata size looks wrong: %d", 480 meta->config_size); 481 g_free(buf); 482 return (NULL); 483 } 484 size = meta->config_size; 485 meta = malloc(size, M_MD_INTEL, M_WAITOK); 486 memcpy(meta, buf, min(size, pp->sectorsize)); 487 g_free(buf); 488 489 /* Read all the rest, if needed. */ 490 if (meta->config_size > pp->sectorsize) { 491 left = (meta->config_size - 1) / pp->sectorsize; 492 buf = g_read_data(cp, 493 pp->mediasize - pp->sectorsize * (2 + left), 494 pp->sectorsize * left, &error); 495 if (buf == NULL) { 496 G_RAID_DEBUG(1, "Cannot read remaining metadata" 497 " part from %s (error=%d).", 498 pp->name, error); 499 free(meta, M_MD_INTEL); 500 return (NULL); 501 } 502 memcpy(((char *)meta) + pp->sectorsize, buf, 503 pp->sectorsize * left); 504 g_free(buf); 505 } 506 507 /* Check metadata checksum. */ 508 for (checksum = 0, ptr = (uint32_t *)meta, i = 0; 509 i < (meta->config_size / sizeof(uint32_t)); i++) { 510 checksum += *ptr++; 511 } 512 checksum -= meta->checksum; 513 if (checksum != meta->checksum) { 514 G_RAID_DEBUG(1, "Intel checksum check failed on %s", pp->name); 515 free(meta, M_MD_INTEL); 516 return (NULL); 517 } 518 519 /* Validate metadata size. */ 520 size = sizeof(struct intel_raid_conf) + 521 sizeof(struct intel_raid_disk) * (meta->total_disks - 1) + 522 sizeof(struct intel_raid_vol) * meta->total_volumes; 523 if (size > meta->config_size) { 524badsize: 525 G_RAID_DEBUG(1, "Intel metadata size incorrect %d < %d", 526 meta->config_size, size); 527 free(meta, M_MD_INTEL); 528 return (NULL); 529 } 530 for (i = 0; i < meta->total_volumes; i++) { 531 mvol = intel_get_volume(meta, i); 532 mmap = intel_get_map(mvol, 0); 533 size += 4 * (mmap->total_disks - 1); 534 if (size > meta->config_size) 535 goto badsize; 536 if (mvol->migr_state) { 537 size += sizeof(struct intel_raid_map); 538 if (size > meta->config_size) 539 goto badsize; 540 mmap = intel_get_map(mvol, 1); 541 size += 4 * (mmap->total_disks - 1); 542 if (size > meta->config_size) 543 goto badsize; 544 } 545 } 546 547 /* Validate disk indexes. */ 548 for (i = 0; i < meta->total_volumes; i++) { 549 mvol = intel_get_volume(meta, i); 550 for (j = 0; j < (mvol->migr_state ? 2 : 1); j++) { 551 mmap = intel_get_map(mvol, j); 552 for (k = 0; k < mmap->total_disks; k++) { 553 if ((mmap->disk_idx[k] & INTEL_DI_IDX) > 554 meta->total_disks) { 555 G_RAID_DEBUG(1, "Intel metadata disk" 556 " index %d too big (>%d)", 557 mmap->disk_idx[k] & INTEL_DI_IDX, 558 meta->total_disks); 559 free(meta, M_MD_INTEL); 560 return (NULL); 561 } 562 } 563 } 564 } 565 566 /* Validate migration types. */ 567 for (i = 0; i < meta->total_volumes; i++) { 568 mvol = intel_get_volume(meta, i); 569 if (mvol->migr_state && 570 mvol->migr_type != INTEL_MT_INIT && 571 mvol->migr_type != INTEL_MT_REBUILD && 572 mvol->migr_type != INTEL_MT_VERIFY && 573 mvol->migr_type != INTEL_MT_REPAIR) { 574 G_RAID_DEBUG(1, "Intel metadata has unsupported" 575 " migration type %d", mvol->migr_type); 576 free(meta, M_MD_INTEL); 577 return (NULL); 578 } 579 } 580 581 return (meta); 582} 583 584static int 585intel_meta_write(struct g_consumer *cp, struct intel_raid_conf *meta) 586{ 587 struct g_provider *pp; 588 char *buf; 589 int error, i, sectors; 590 uint32_t checksum, *ptr; 591 592 pp = cp->provider; 593 594 /* Recalculate checksum for case if metadata were changed. */ 595 meta->checksum = 0; 596 for (checksum = 0, ptr = (uint32_t *)meta, i = 0; 597 i < (meta->config_size / sizeof(uint32_t)); i++) { 598 checksum += *ptr++; 599 } 600 meta->checksum = checksum; 601 602 /* Create and fill buffer. */ 603 sectors = (meta->config_size + pp->sectorsize - 1) / pp->sectorsize; 604 buf = malloc(sectors * pp->sectorsize, M_MD_INTEL, M_WAITOK | M_ZERO); 605 if (sectors > 1) { 606 memcpy(buf, ((char *)meta) + pp->sectorsize, 607 (sectors - 1) * pp->sectorsize); 608 } 609 memcpy(buf + (sectors - 1) * pp->sectorsize, meta, pp->sectorsize); 610 611 error = g_write_data(cp, 612 pp->mediasize - pp->sectorsize * (1 + sectors), 613 buf, pp->sectorsize * sectors); 614 if (error != 0) { 615 G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).", 616 pp->name, error); 617 } 618 619 free(buf, M_MD_INTEL); 620 return (error); 621} 622 623static int 624intel_meta_erase(struct g_consumer *cp) 625{ 626 struct g_provider *pp; 627 char *buf; 628 int error; 629 630 pp = cp->provider; 631 buf = malloc(pp->sectorsize, M_MD_INTEL, M_WAITOK | M_ZERO); 632 error = g_write_data(cp, 633 pp->mediasize - 2 * pp->sectorsize, 634 buf, pp->sectorsize); 635 if (error != 0) { 636 G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).", 637 pp->name, error); 638 } 639 free(buf, M_MD_INTEL); 640 return (error); 641} 642 643static int 644intel_meta_write_spare(struct g_consumer *cp, struct intel_raid_disk *d) 645{ 646 struct intel_raid_conf *meta; 647 int error; 648 649 /* Fill anchor and single disk. */ 650 meta = malloc(INTEL_MAX_MD_SIZE(1), M_MD_INTEL, M_WAITOK | M_ZERO); 651 memcpy(&meta->intel_id[0], INTEL_MAGIC, sizeof(INTEL_MAGIC) - 1); 652 memcpy(&meta->version[0], INTEL_VERSION_1000, 653 sizeof(INTEL_VERSION_1000) - 1); 654 meta->config_size = INTEL_MAX_MD_SIZE(1); 655 meta->config_id = arc4random(); 656 meta->generation = 1; 657 meta->total_disks = 1; 658 meta->disk[0] = *d; 659 error = intel_meta_write(cp, meta); 660 free(meta, M_MD_INTEL); 661 return (error); 662} 663 664static struct g_raid_disk * 665g_raid_md_intel_get_disk(struct g_raid_softc *sc, int id) 666{ 667 struct g_raid_disk *disk; 668 struct g_raid_md_intel_perdisk *pd; 669 670 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 671 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data; 672 if (pd->pd_disk_pos == id) 673 break; 674 } 675 return (disk); 676} 677 678static int 679g_raid_md_intel_supported(int level, int qual, int disks, int force) 680{ 681 682 switch (level) { 683 case G_RAID_VOLUME_RL_RAID0: 684 if (disks < 1) 685 return (0); 686 if (!force && (disks < 2 || disks > 6)) 687 return (0); 688 break; 689 case G_RAID_VOLUME_RL_RAID1: 690 if (disks < 1) 691 return (0); 692 if (!force && (disks != 2)) 693 return (0); 694 break; 695 case G_RAID_VOLUME_RL_RAID1E: 696 if (disks < 2) 697 return (0); 698 if (!force && (disks != 4)) 699 return (0); 700 break; 701 case G_RAID_VOLUME_RL_RAID5: 702 if (disks < 3) 703 return (0); 704 if (!force && disks > 6) 705 return (0); 706 if (qual != G_RAID_VOLUME_RLQ_R5LA) 707 return (0); 708 break; 709 default: 710 return (0); 711 } 712 if (level != G_RAID_VOLUME_RL_RAID5 && qual != G_RAID_VOLUME_RLQ_NONE) 713 return (0); 714 return (1); 715} 716 717static struct g_raid_volume * 718g_raid_md_intel_get_volume(struct g_raid_softc *sc, int id) 719{ 720 struct g_raid_volume *mvol; 721 struct g_raid_md_intel_pervolume *pv; 722 723 TAILQ_FOREACH(mvol, &sc->sc_volumes, v_next) { 724 pv = mvol->v_md_data; 725 if (pv->pv_volume_pos == id) 726 break; 727 } 728 return (mvol); 729} 730 731static int 732g_raid_md_intel_start_disk(struct g_raid_disk *disk) 733{ 734 struct g_raid_softc *sc; 735 struct g_raid_subdisk *sd, *tmpsd; 736 struct g_raid_disk *olddisk, *tmpdisk; 737 struct g_raid_md_object *md; 738 struct g_raid_md_intel_object *mdi; 739 struct g_raid_md_intel_pervolume *pv; 740 struct g_raid_md_intel_perdisk *pd, *oldpd; 741 struct intel_raid_conf *meta; 742 struct intel_raid_vol *mvol; 743 struct intel_raid_map *mmap0, *mmap1; 744 int disk_pos, resurrection = 0; 745 746 sc = disk->d_softc; 747 md = sc->sc_md; 748 mdi = (struct g_raid_md_intel_object *)md; 749 meta = mdi->mdio_meta; 750 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data; 751 olddisk = NULL; 752 753 /* Find disk position in metadata by it's serial. */ 754 disk_pos = intel_meta_find_disk(meta, pd->pd_disk_meta.serial); 755 if (disk_pos < 0) { 756 G_RAID_DEBUG1(1, sc, "Unknown, probably new or stale disk"); 757 /* Disabled disk is useless for us. */ 758 if (pd->pd_disk_meta.flags & INTEL_F_DISABLED) { 759 g_raid_change_disk_state(disk, G_RAID_DISK_S_DISABLED); 760 return (0); 761 } 762 /* Failed stale disk is useless for us. */ 763 if (pd->pd_disk_meta.flags & INTEL_F_FAILED) { 764 g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE_FAILED); 765 return (0); 766 } 767 /* If we are in the start process, that's all for now. */ 768 if (!mdi->mdio_started) 769 goto nofit; 770 /* 771 * If we have already started - try to get use of the disk. 772 * Try to replace OFFLINE disks first, then FAILED. 773 */ 774 TAILQ_FOREACH(tmpdisk, &sc->sc_disks, d_next) { 775 if (tmpdisk->d_state != G_RAID_DISK_S_OFFLINE && 776 tmpdisk->d_state != G_RAID_DISK_S_FAILED) 777 continue; 778 /* Make sure this disk is big enough. */ 779 TAILQ_FOREACH(sd, &tmpdisk->d_subdisks, sd_next) { 780 off_t disk_sectors = 781 intel_get_disk_sectors(&pd->pd_disk_meta); 782 783 if (sd->sd_offset + sd->sd_size + 4096 > 784 disk_sectors * 512) { 785 G_RAID_DEBUG1(1, sc, 786 "Disk too small (%llu < %llu)", 787 (unsigned long long) 788 disk_sectors * 512, 789 (unsigned long long) 790 sd->sd_offset + sd->sd_size + 4096); 791 break; 792 } 793 } 794 if (sd != NULL) 795 continue; 796 if (tmpdisk->d_state == G_RAID_DISK_S_OFFLINE) { 797 olddisk = tmpdisk; 798 break; 799 } else if (olddisk == NULL) 800 olddisk = tmpdisk; 801 } 802 if (olddisk == NULL) { 803nofit: 804 if (pd->pd_disk_meta.flags & INTEL_F_SPARE) { 805 g_raid_change_disk_state(disk, 806 G_RAID_DISK_S_SPARE); 807 return (1); 808 } else { 809 g_raid_change_disk_state(disk, 810 G_RAID_DISK_S_STALE); 811 return (0); 812 } 813 } 814 oldpd = (struct g_raid_md_intel_perdisk *)olddisk->d_md_data; 815 disk_pos = oldpd->pd_disk_pos; 816 resurrection = 1; 817 } 818 819 if (olddisk == NULL) { 820 /* Find placeholder by position. */ 821 olddisk = g_raid_md_intel_get_disk(sc, disk_pos); 822 if (olddisk == NULL) 823 panic("No disk at position %d!", disk_pos); 824 if (olddisk->d_state != G_RAID_DISK_S_OFFLINE) { 825 G_RAID_DEBUG1(1, sc, "More then one disk for pos %d", 826 disk_pos); 827 g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE); 828 return (0); 829 } 830 oldpd = (struct g_raid_md_intel_perdisk *)olddisk->d_md_data; 831 } 832 833 /* Replace failed disk or placeholder with new disk. */ 834 TAILQ_FOREACH_SAFE(sd, &olddisk->d_subdisks, sd_next, tmpsd) { 835 TAILQ_REMOVE(&olddisk->d_subdisks, sd, sd_next); 836 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next); 837 sd->sd_disk = disk; 838 } 839 oldpd->pd_disk_pos = -2; 840 pd->pd_disk_pos = disk_pos; 841 842 /* If it was placeholder -- destroy it. */ 843 if (olddisk->d_state == G_RAID_DISK_S_OFFLINE) { 844 g_raid_destroy_disk(olddisk); 845 } else { 846 /* Otherwise, make it STALE_FAILED. */ 847 g_raid_change_disk_state(olddisk, G_RAID_DISK_S_STALE_FAILED); 848 /* Update global metadata just in case. */ 849 memcpy(&meta->disk[disk_pos], &pd->pd_disk_meta, 850 sizeof(struct intel_raid_disk)); 851 } 852 853 /* Welcome the new disk. */ 854 if (resurrection) 855 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE); 856 else if (meta->disk[disk_pos].flags & INTEL_F_DISABLED) 857 g_raid_change_disk_state(disk, G_RAID_DISK_S_DISABLED); 858 else if (meta->disk[disk_pos].flags & INTEL_F_FAILED) 859 g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED); 860 else if (meta->disk[disk_pos].flags & INTEL_F_SPARE) 861 g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE); 862 else 863 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE); 864 TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) { 865 pv = sd->sd_volume->v_md_data; 866 mvol = intel_get_volume(meta, pv->pv_volume_pos); 867 mmap0 = intel_get_map(mvol, 0); 868 if (mvol->migr_state) 869 mmap1 = intel_get_map(mvol, 1); 870 else 871 mmap1 = mmap0; 872 873 if (resurrection) { 874 /* Stale disk, almost same as new. */ 875 g_raid_change_subdisk_state(sd, 876 G_RAID_SUBDISK_S_NEW); 877 } else if (meta->disk[disk_pos].flags & INTEL_F_DISABLED) { 878 /* Disabled disk, useless. */ 879 g_raid_change_subdisk_state(sd, 880 G_RAID_SUBDISK_S_NONE); 881 } else if (meta->disk[disk_pos].flags & INTEL_F_FAILED) { 882 /* Failed disk, almost useless. */ 883 g_raid_change_subdisk_state(sd, 884 G_RAID_SUBDISK_S_FAILED); 885 } else if (mvol->migr_state == 0) { 886 if (mmap0->status == INTEL_S_UNINITIALIZED && 887 (!pv->pv_cng || pv->pv_cng_master_disk != disk_pos)) { 888 /* Freshly created uninitialized volume. */ 889 g_raid_change_subdisk_state(sd, 890 G_RAID_SUBDISK_S_UNINITIALIZED); 891 } else if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) { 892 /* Freshly inserted disk. */ 893 g_raid_change_subdisk_state(sd, 894 G_RAID_SUBDISK_S_NEW); 895 } else if (mvol->dirty && (!pv->pv_cng || 896 pv->pv_cng_master_disk != disk_pos)) { 897 /* Dirty volume (unclean shutdown). */ 898 g_raid_change_subdisk_state(sd, 899 G_RAID_SUBDISK_S_STALE); 900 } else { 901 /* Up to date disk. */ 902 g_raid_change_subdisk_state(sd, 903 G_RAID_SUBDISK_S_ACTIVE); 904 } 905 } else if (mvol->migr_type == INTEL_MT_INIT || 906 mvol->migr_type == INTEL_MT_REBUILD) { 907 if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) { 908 /* Freshly inserted disk. */ 909 g_raid_change_subdisk_state(sd, 910 G_RAID_SUBDISK_S_NEW); 911 } else if (mmap1->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) { 912 /* Rebuilding disk. */ 913 g_raid_change_subdisk_state(sd, 914 G_RAID_SUBDISK_S_REBUILD); 915 if (mvol->dirty) { 916 sd->sd_rebuild_pos = 0; 917 } else { 918 sd->sd_rebuild_pos = 919 intel_get_vol_curr_migr_unit(mvol) * 920 sd->sd_volume->v_strip_size * 921 mmap0->total_domains; 922 } 923 } else if (mvol->dirty && (!pv->pv_cng || 924 pv->pv_cng_master_disk != disk_pos)) { 925 /* Dirty volume (unclean shutdown). */ 926 g_raid_change_subdisk_state(sd, 927 G_RAID_SUBDISK_S_STALE); 928 } else { 929 /* Up to date disk. */ 930 g_raid_change_subdisk_state(sd, 931 G_RAID_SUBDISK_S_ACTIVE); 932 } 933 } else if (mvol->migr_type == INTEL_MT_VERIFY || 934 mvol->migr_type == INTEL_MT_REPAIR) { 935 if (mmap0->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) { 936 /* Freshly inserted disk. */ 937 g_raid_change_subdisk_state(sd, 938 G_RAID_SUBDISK_S_NEW); 939 } else if (mmap1->disk_idx[sd->sd_pos] & INTEL_DI_RBLD) { 940 /* Resyncing disk. */ 941 g_raid_change_subdisk_state(sd, 942 G_RAID_SUBDISK_S_RESYNC); 943 if (mvol->dirty) { 944 sd->sd_rebuild_pos = 0; 945 } else { 946 sd->sd_rebuild_pos = 947 intel_get_vol_curr_migr_unit(mvol) * 948 sd->sd_volume->v_strip_size * 949 mmap0->total_domains; 950 } 951 } else if (mvol->dirty) { 952 /* Dirty volume (unclean shutdown). */ 953 g_raid_change_subdisk_state(sd, 954 G_RAID_SUBDISK_S_STALE); 955 } else { 956 /* Up to date disk. */ 957 g_raid_change_subdisk_state(sd, 958 G_RAID_SUBDISK_S_ACTIVE); 959 } 960 } 961 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW, 962 G_RAID_EVENT_SUBDISK); 963 } 964 965 /* Update status of our need for spare. */ 966 if (mdi->mdio_started) { 967 mdi->mdio_incomplete = 968 (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) < 969 meta->total_disks); 970 } 971 972 return (resurrection); 973} 974 975static void 976g_disk_md_intel_retaste(void *arg, int pending) 977{ 978 979 G_RAID_DEBUG(1, "Array is not complete, trying to retaste."); 980 g_retaste(&g_raid_class); 981 free(arg, M_MD_INTEL); 982} 983 984static void 985g_raid_md_intel_refill(struct g_raid_softc *sc) 986{ 987 struct g_raid_md_object *md; 988 struct g_raid_md_intel_object *mdi; 989 struct intel_raid_conf *meta; 990 struct g_raid_disk *disk; 991 struct task *task; 992 int update, na; 993 994 md = sc->sc_md; 995 mdi = (struct g_raid_md_intel_object *)md; 996 meta = mdi->mdio_meta; 997 update = 0; 998 do { 999 /* Make sure we miss anything. */ 1000 na = g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE); 1001 if (na == meta->total_disks) 1002 break; 1003 1004 G_RAID_DEBUG1(1, md->mdo_softc, 1005 "Array is not complete (%d of %d), " 1006 "trying to refill.", na, meta->total_disks); 1007 1008 /* Try to get use some of STALE disks. */ 1009 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1010 if (disk->d_state == G_RAID_DISK_S_STALE) { 1011 update += g_raid_md_intel_start_disk(disk); 1012 if (disk->d_state == G_RAID_DISK_S_ACTIVE) 1013 break; 1014 } 1015 } 1016 if (disk != NULL) 1017 continue; 1018 1019 /* Try to get use some of SPARE disks. */ 1020 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1021 if (disk->d_state == G_RAID_DISK_S_SPARE) { 1022 update += g_raid_md_intel_start_disk(disk); 1023 if (disk->d_state == G_RAID_DISK_S_ACTIVE) 1024 break; 1025 } 1026 } 1027 } while (disk != NULL); 1028 1029 /* Write new metadata if we changed something. */ 1030 if (update) { 1031 g_raid_md_write_intel(md, NULL, NULL, NULL); 1032 meta = mdi->mdio_meta; 1033 } 1034 1035 /* Update status of our need for spare. */ 1036 mdi->mdio_incomplete = (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) < 1037 meta->total_disks); 1038 1039 /* Request retaste hoping to find spare. */ 1040 if (mdi->mdio_incomplete) { 1041 task = malloc(sizeof(struct task), 1042 M_MD_INTEL, M_WAITOK | M_ZERO); 1043 TASK_INIT(task, 0, g_disk_md_intel_retaste, task); 1044 taskqueue_enqueue(taskqueue_swi, task); 1045 } 1046} 1047 1048static void 1049g_raid_md_intel_start(struct g_raid_softc *sc) 1050{ 1051 struct g_raid_md_object *md; 1052 struct g_raid_md_intel_object *mdi; 1053 struct g_raid_md_intel_pervolume *pv; 1054 struct g_raid_md_intel_perdisk *pd; 1055 struct intel_raid_conf *meta; 1056 struct intel_raid_vol *mvol; 1057 struct intel_raid_map *mmap; 1058 struct g_raid_volume *vol; 1059 struct g_raid_subdisk *sd; 1060 struct g_raid_disk *disk; 1061 int i, j, disk_pos; 1062 1063 md = sc->sc_md; 1064 mdi = (struct g_raid_md_intel_object *)md; 1065 meta = mdi->mdio_meta; 1066 1067 /* Create volumes and subdisks. */ 1068 for (i = 0; i < meta->total_volumes; i++) { 1069 mvol = intel_get_volume(meta, i); 1070 mmap = intel_get_map(mvol, 0); 1071 vol = g_raid_create_volume(sc, mvol->name, -1); 1072 pv = malloc(sizeof(*pv), M_MD_INTEL, M_WAITOK | M_ZERO); 1073 pv->pv_volume_pos = i; 1074 pv->pv_cng = (mvol->state & INTEL_ST_CLONE_N_GO) != 0; 1075 pv->pv_cng_man_sync = (mvol->state & INTEL_ST_CLONE_MAN_SYNC) != 0; 1076 if (mvol->cng_master_disk < mmap->total_disks) 1077 pv->pv_cng_master_disk = mvol->cng_master_disk; 1078 vol->v_md_data = pv; 1079 vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE; 1080 if (mmap->type == INTEL_T_RAID0) 1081 vol->v_raid_level = G_RAID_VOLUME_RL_RAID0; 1082 else if (mmap->type == INTEL_T_RAID1 && 1083 mmap->total_domains >= 2 && 1084 mmap->total_domains <= mmap->total_disks) { 1085 /* Assume total_domains is correct. */ 1086 if (mmap->total_domains == mmap->total_disks) 1087 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1; 1088 else 1089 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E; 1090 } else if (mmap->type == INTEL_T_RAID1) { 1091 /* total_domains looks wrong. */ 1092 if (mmap->total_disks <= 2) 1093 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1; 1094 else 1095 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E; 1096 } else if (mmap->type == INTEL_T_RAID5) { 1097 vol->v_raid_level = G_RAID_VOLUME_RL_RAID5; 1098 vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_R5LA; 1099 } else 1100 vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN; 1101 vol->v_strip_size = (u_int)mmap->strip_sectors * 512; //ZZZ 1102 vol->v_disks_count = mmap->total_disks; 1103 vol->v_mediasize = (off_t)mvol->total_sectors * 512; //ZZZ 1104 vol->v_sectorsize = 512; //ZZZ 1105 for (j = 0; j < vol->v_disks_count; j++) { 1106 sd = &vol->v_subdisks[j]; 1107 sd->sd_offset = intel_get_map_offset(mmap) * 512; //ZZZ 1108 sd->sd_size = intel_get_map_disk_sectors(mmap) * 512; //ZZZ 1109 } 1110 g_raid_start_volume(vol); 1111 } 1112 1113 /* Create disk placeholders to store data for later writing. */ 1114 for (disk_pos = 0; disk_pos < meta->total_disks; disk_pos++) { 1115 pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO); 1116 pd->pd_disk_pos = disk_pos; 1117 pd->pd_disk_meta = meta->disk[disk_pos]; 1118 disk = g_raid_create_disk(sc); 1119 disk->d_md_data = (void *)pd; 1120 disk->d_state = G_RAID_DISK_S_OFFLINE; 1121 for (i = 0; i < meta->total_volumes; i++) { 1122 mvol = intel_get_volume(meta, i); 1123 mmap = intel_get_map(mvol, 0); 1124 for (j = 0; j < mmap->total_disks; j++) { 1125 if ((mmap->disk_idx[j] & INTEL_DI_IDX) == disk_pos) 1126 break; 1127 } 1128 if (j == mmap->total_disks) 1129 continue; 1130 vol = g_raid_md_intel_get_volume(sc, i); 1131 sd = &vol->v_subdisks[j]; 1132 sd->sd_disk = disk; 1133 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next); 1134 } 1135 } 1136 1137 /* Make all disks found till the moment take their places. */ 1138 do { 1139 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1140 if (disk->d_state == G_RAID_DISK_S_NONE) { 1141 g_raid_md_intel_start_disk(disk); 1142 break; 1143 } 1144 } 1145 } while (disk != NULL); 1146 1147 mdi->mdio_started = 1; 1148 G_RAID_DEBUG1(0, sc, "Array started."); 1149 g_raid_md_write_intel(md, NULL, NULL, NULL); 1150 1151 /* Pickup any STALE/SPARE disks to refill array if needed. */ 1152 g_raid_md_intel_refill(sc); 1153 1154 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 1155 g_raid_event_send(vol, G_RAID_VOLUME_E_START, 1156 G_RAID_EVENT_VOLUME); 1157 } 1158 1159 callout_stop(&mdi->mdio_start_co); 1160 G_RAID_DEBUG1(1, sc, "root_mount_rel %p", mdi->mdio_rootmount); 1161 root_mount_rel(mdi->mdio_rootmount); 1162 mdi->mdio_rootmount = NULL; 1163} 1164 1165static void 1166g_raid_md_intel_new_disk(struct g_raid_disk *disk) 1167{ 1168 struct g_raid_softc *sc; 1169 struct g_raid_md_object *md; 1170 struct g_raid_md_intel_object *mdi; 1171 struct intel_raid_conf *pdmeta; 1172 struct g_raid_md_intel_perdisk *pd; 1173 1174 sc = disk->d_softc; 1175 md = sc->sc_md; 1176 mdi = (struct g_raid_md_intel_object *)md; 1177 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data; 1178 pdmeta = pd->pd_meta; 1179 1180 if (mdi->mdio_started) { 1181 if (g_raid_md_intel_start_disk(disk)) 1182 g_raid_md_write_intel(md, NULL, NULL, NULL); 1183 } else { 1184 /* If we haven't started yet - check metadata freshness. */ 1185 if (mdi->mdio_meta == NULL || 1186 ((int32_t)(pdmeta->generation - mdi->mdio_generation)) > 0) { 1187 G_RAID_DEBUG1(1, sc, "Newer disk"); 1188 if (mdi->mdio_meta != NULL) 1189 free(mdi->mdio_meta, M_MD_INTEL); 1190 mdi->mdio_meta = intel_meta_copy(pdmeta); 1191 mdi->mdio_generation = mdi->mdio_meta->generation; 1192 mdi->mdio_disks_present = 1; 1193 } else if (pdmeta->generation == mdi->mdio_generation) { 1194 mdi->mdio_disks_present++; 1195 G_RAID_DEBUG1(1, sc, "Matching disk (%d of %d up)", 1196 mdi->mdio_disks_present, 1197 mdi->mdio_meta->total_disks); 1198 } else { 1199 G_RAID_DEBUG1(1, sc, "Older disk"); 1200 } 1201 /* If we collected all needed disks - start array. */ 1202 if (mdi->mdio_disks_present == mdi->mdio_meta->total_disks) 1203 g_raid_md_intel_start(sc); 1204 } 1205} 1206 1207static void 1208g_raid_intel_go(void *arg) 1209{ 1210 struct g_raid_softc *sc; 1211 struct g_raid_md_object *md; 1212 struct g_raid_md_intel_object *mdi; 1213 1214 sc = arg; 1215 md = sc->sc_md; 1216 mdi = (struct g_raid_md_intel_object *)md; 1217 if (!mdi->mdio_started) { 1218 G_RAID_DEBUG1(0, sc, "Force array start due to timeout."); 1219 g_raid_event_send(sc, G_RAID_NODE_E_START, 0); 1220 } 1221} 1222 1223static int 1224g_raid_md_create_intel(struct g_raid_md_object *md, struct g_class *mp, 1225 struct g_geom **gp) 1226{ 1227 struct g_raid_softc *sc; 1228 struct g_raid_md_intel_object *mdi; 1229 char name[16]; 1230 1231 mdi = (struct g_raid_md_intel_object *)md; 1232 mdi->mdio_config_id = arc4random(); 1233 mdi->mdio_generation = 0; 1234 snprintf(name, sizeof(name), "Intel-%08x", mdi->mdio_config_id); 1235 sc = g_raid_create_node(mp, name, md); 1236 if (sc == NULL) 1237 return (G_RAID_MD_TASTE_FAIL); 1238 md->mdo_softc = sc; 1239 *gp = sc->sc_geom; 1240 return (G_RAID_MD_TASTE_NEW); 1241} 1242 1243/* 1244 * Return the last N characters of the serial label. The Linux and 1245 * ataraid(7) code always uses the last 16 characters of the label to 1246 * store into the Intel meta format. Generalize this to N characters 1247 * since that's easy. Labels can be up to 20 characters for SATA drives 1248 * and up 251 characters for SAS drives. Since intel controllers don't 1249 * support SAS drives, just stick with the SATA limits for stack friendliness. 1250 */ 1251static int 1252g_raid_md_get_label(struct g_consumer *cp, char *serial, int serlen) 1253{ 1254 char serial_buffer[24]; 1255 int len, error; 1256 1257 len = sizeof(serial_buffer); 1258 error = g_io_getattr("GEOM::ident", cp, &len, serial_buffer); 1259 if (error != 0) 1260 return (error); 1261 len = strlen(serial_buffer); 1262 if (len > serlen) 1263 len -= serlen; 1264 else 1265 len = 0; 1266 strncpy(serial, serial_buffer + len, serlen); 1267 return (0); 1268} 1269 1270static int 1271g_raid_md_taste_intel(struct g_raid_md_object *md, struct g_class *mp, 1272 struct g_consumer *cp, struct g_geom **gp) 1273{ 1274 struct g_consumer *rcp; 1275 struct g_provider *pp; 1276 struct g_raid_md_intel_object *mdi, *mdi1; 1277 struct g_raid_softc *sc; 1278 struct g_raid_disk *disk; 1279 struct intel_raid_conf *meta; 1280 struct g_raid_md_intel_perdisk *pd; 1281 struct g_geom *geom; 1282 int error, disk_pos, result, spare, len; 1283 char serial[INTEL_SERIAL_LEN]; 1284 char name[16]; 1285 uint16_t vendor; 1286 1287 G_RAID_DEBUG(1, "Tasting Intel on %s", cp->provider->name); 1288 mdi = (struct g_raid_md_intel_object *)md; 1289 pp = cp->provider; 1290 1291 /* Read metadata from device. */ 1292 meta = NULL; 1293 vendor = 0xffff; 1294 disk_pos = 0; 1295 if (g_access(cp, 1, 0, 0) != 0) 1296 return (G_RAID_MD_TASTE_FAIL); 1297 g_topology_unlock(); 1298 error = g_raid_md_get_label(cp, serial, sizeof(serial)); 1299 if (error != 0) { 1300 G_RAID_DEBUG(1, "Cannot get serial number from %s (error=%d).", 1301 pp->name, error); 1302 goto fail2; 1303 } 1304 len = 2; 1305 if (pp->geom->rank == 1) 1306 g_io_getattr("GEOM::hba_vendor", cp, &len, &vendor); 1307 meta = intel_meta_read(cp); 1308 g_topology_lock(); 1309 g_access(cp, -1, 0, 0); 1310 if (meta == NULL) { 1311 if (g_raid_aggressive_spare) { 1312 if (vendor != 0x8086) { 1313 G_RAID_DEBUG(1, 1314 "Intel vendor mismatch 0x%04x != 0x8086", 1315 vendor); 1316 } else { 1317 G_RAID_DEBUG(1, 1318 "No Intel metadata, forcing spare."); 1319 spare = 2; 1320 goto search; 1321 } 1322 } 1323 return (G_RAID_MD_TASTE_FAIL); 1324 } 1325 1326 /* Check this disk position in obtained metadata. */ 1327 disk_pos = intel_meta_find_disk(meta, serial); 1328 if (disk_pos < 0) { 1329 G_RAID_DEBUG(1, "Intel serial '%s' not found", serial); 1330 goto fail1; 1331 } 1332 if (intel_get_disk_sectors(&meta->disk[disk_pos]) != 1333 (pp->mediasize / pp->sectorsize)) { 1334 G_RAID_DEBUG(1, "Intel size mismatch %ju != %ju", 1335 intel_get_disk_sectors(&meta->disk[disk_pos]), 1336 (off_t)(pp->mediasize / pp->sectorsize)); 1337 goto fail1; 1338 } 1339 1340 /* Metadata valid. Print it. */ 1341 g_raid_md_intel_print(meta); 1342 G_RAID_DEBUG(1, "Intel disk position %d", disk_pos); 1343 spare = meta->disk[disk_pos].flags & INTEL_F_SPARE; 1344 1345search: 1346 /* Search for matching node. */ 1347 sc = NULL; 1348 mdi1 = NULL; 1349 LIST_FOREACH(geom, &mp->geom, geom) { 1350 sc = geom->softc; 1351 if (sc == NULL) 1352 continue; 1353 if (sc->sc_stopping != 0) 1354 continue; 1355 if (sc->sc_md->mdo_class != md->mdo_class) 1356 continue; 1357 mdi1 = (struct g_raid_md_intel_object *)sc->sc_md; 1358 if (spare) { 1359 if (mdi1->mdio_incomplete) 1360 break; 1361 } else { 1362 if (mdi1->mdio_config_id == meta->config_id) 1363 break; 1364 } 1365 } 1366 1367 /* Found matching node. */ 1368 if (geom != NULL) { 1369 G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name); 1370 result = G_RAID_MD_TASTE_EXISTING; 1371 1372 } else if (spare) { /* Not found needy node -- left for later. */ 1373 G_RAID_DEBUG(1, "Spare is not needed at this time"); 1374 goto fail1; 1375 1376 } else { /* Not found matching node -- create one. */ 1377 result = G_RAID_MD_TASTE_NEW; 1378 mdi->mdio_config_id = meta->config_id; 1379 snprintf(name, sizeof(name), "Intel-%08x", meta->config_id); 1380 sc = g_raid_create_node(mp, name, md); 1381 md->mdo_softc = sc; 1382 geom = sc->sc_geom; 1383 callout_init(&mdi->mdio_start_co, 1); 1384 callout_reset(&mdi->mdio_start_co, g_raid_start_timeout * hz, 1385 g_raid_intel_go, sc); 1386 mdi->mdio_rootmount = root_mount_hold("GRAID-Intel"); 1387 G_RAID_DEBUG1(1, sc, "root_mount_hold %p", mdi->mdio_rootmount); 1388 } 1389 1390 rcp = g_new_consumer(geom); 1391 g_attach(rcp, pp); 1392 if (g_access(rcp, 1, 1, 1) != 0) 1393 ; //goto fail1; 1394 1395 g_topology_unlock(); 1396 sx_xlock(&sc->sc_lock); 1397 1398 pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO); 1399 pd->pd_meta = meta; 1400 pd->pd_disk_pos = -1; 1401 if (spare == 2) { 1402 memcpy(&pd->pd_disk_meta.serial[0], serial, INTEL_SERIAL_LEN); 1403 intel_set_disk_sectors(&pd->pd_disk_meta, 1404 pp->mediasize / pp->sectorsize); 1405 pd->pd_disk_meta.id = 0; 1406 pd->pd_disk_meta.flags = INTEL_F_SPARE; 1407 } else { 1408 pd->pd_disk_meta = meta->disk[disk_pos]; 1409 } 1410 disk = g_raid_create_disk(sc); 1411 disk->d_md_data = (void *)pd; 1412 disk->d_consumer = rcp; 1413 rcp->private = disk; 1414 1415 g_raid_get_disk_info(disk); 1416 1417 g_raid_md_intel_new_disk(disk); 1418 1419 sx_xunlock(&sc->sc_lock); 1420 g_topology_lock(); 1421 *gp = geom; 1422 return (result); 1423fail2: 1424 g_topology_lock(); 1425 g_access(cp, -1, 0, 0); 1426fail1: 1427 free(meta, M_MD_INTEL); 1428 return (G_RAID_MD_TASTE_FAIL); 1429} 1430 1431static int 1432g_raid_md_event_intel(struct g_raid_md_object *md, 1433 struct g_raid_disk *disk, u_int event) 1434{ 1435 struct g_raid_softc *sc; 1436 struct g_raid_subdisk *sd; 1437 struct g_raid_md_intel_object *mdi; 1438 struct g_raid_md_intel_perdisk *pd; 1439 1440 sc = md->mdo_softc; 1441 mdi = (struct g_raid_md_intel_object *)md; 1442 if (disk == NULL) { 1443 switch (event) { 1444 case G_RAID_NODE_E_START: 1445 if (!mdi->mdio_started) 1446 g_raid_md_intel_start(sc); 1447 return (0); 1448 } 1449 return (-1); 1450 } 1451 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data; 1452 switch (event) { 1453 case G_RAID_DISK_E_DISCONNECTED: 1454 /* If disk was assigned, just update statuses. */ 1455 if (pd->pd_disk_pos >= 0) { 1456 g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE); 1457 if (disk->d_consumer) { 1458 g_raid_kill_consumer(sc, disk->d_consumer); 1459 disk->d_consumer = NULL; 1460 } 1461 TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) { 1462 g_raid_change_subdisk_state(sd, 1463 G_RAID_SUBDISK_S_NONE); 1464 g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED, 1465 G_RAID_EVENT_SUBDISK); 1466 } 1467 } else { 1468 /* Otherwise -- delete. */ 1469 g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE); 1470 g_raid_destroy_disk(disk); 1471 } 1472 1473 /* Write updated metadata to all disks. */ 1474 g_raid_md_write_intel(md, NULL, NULL, NULL); 1475 1476 /* Check if anything left except placeholders. */ 1477 if (g_raid_ndisks(sc, -1) == 1478 g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE)) 1479 g_raid_destroy_node(sc, 0); 1480 else 1481 g_raid_md_intel_refill(sc); 1482 return (0); 1483 } 1484 return (-2); 1485} 1486 1487static int 1488g_raid_md_ctl_intel(struct g_raid_md_object *md, 1489 struct gctl_req *req) 1490{ 1491 struct g_raid_softc *sc; 1492 struct g_raid_volume *vol, *vol1; 1493 struct g_raid_subdisk *sd; 1494 struct g_raid_disk *disk; 1495 struct g_raid_md_intel_object *mdi; 1496 struct g_raid_md_intel_pervolume *pv; 1497 struct g_raid_md_intel_perdisk *pd; 1498 struct g_consumer *cp; 1499 struct g_provider *pp; 1500 char arg[16], serial[INTEL_SERIAL_LEN]; 1501 const char *nodename, *verb, *volname, *levelname, *diskname; 1502 char *tmp; 1503 int *nargs, *force; 1504 off_t off, size, sectorsize, strip, disk_sectors; 1505 intmax_t *sizearg, *striparg; 1506 int numdisks, i, len, level, qual, update; 1507 int error; 1508 1509 sc = md->mdo_softc; 1510 mdi = (struct g_raid_md_intel_object *)md; 1511 verb = gctl_get_param(req, "verb", NULL); 1512 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1513 error = 0; 1514 if (strcmp(verb, "label") == 0) { 1515 1516 if (*nargs < 4) { 1517 gctl_error(req, "Invalid number of arguments."); 1518 return (-1); 1519 } 1520 volname = gctl_get_asciiparam(req, "arg1"); 1521 if (volname == NULL) { 1522 gctl_error(req, "No volume name."); 1523 return (-2); 1524 } 1525 levelname = gctl_get_asciiparam(req, "arg2"); 1526 if (levelname == NULL) { 1527 gctl_error(req, "No RAID level."); 1528 return (-3); 1529 } 1530 if (strcasecmp(levelname, "RAID5") == 0) 1531 levelname = "RAID5-LA"; 1532 if (g_raid_volume_str2level(levelname, &level, &qual)) { 1533 gctl_error(req, "Unknown RAID level '%s'.", levelname); 1534 return (-4); 1535 } 1536 numdisks = *nargs - 3; 1537 force = gctl_get_paraml(req, "force", sizeof(*force)); 1538 if (!g_raid_md_intel_supported(level, qual, numdisks, 1539 force ? *force : 0)) { 1540 gctl_error(req, "Unsupported RAID level " 1541 "(0x%02x/0x%02x), or number of disks (%d).", 1542 level, qual, numdisks); 1543 return (-5); 1544 } 1545 1546 /* Search for disks, connect them and probe. */ 1547 size = 0x7fffffffffffffffllu; 1548 sectorsize = 0; 1549 for (i = 0; i < numdisks; i++) { 1550 snprintf(arg, sizeof(arg), "arg%d", i + 3); 1551 diskname = gctl_get_asciiparam(req, arg); 1552 if (diskname == NULL) { 1553 gctl_error(req, "No disk name (%s).", arg); 1554 error = -6; 1555 break; 1556 } 1557 if (strcmp(diskname, "NONE") == 0) { 1558 cp = NULL; 1559 pp = NULL; 1560 } else { 1561 g_topology_lock(); 1562 cp = g_raid_open_consumer(sc, diskname); 1563 if (cp == NULL) { 1564 gctl_error(req, "Can't open disk '%s'.", 1565 diskname); 1566 g_topology_unlock(); 1567 error = -7; 1568 break; 1569 } 1570 pp = cp->provider; 1571 } 1572 pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO); 1573 pd->pd_disk_pos = i; 1574 disk = g_raid_create_disk(sc); 1575 disk->d_md_data = (void *)pd; 1576 disk->d_consumer = cp; 1577 if (cp == NULL) { 1578 strcpy(&pd->pd_disk_meta.serial[0], "NONE"); 1579 pd->pd_disk_meta.id = 0xffffffff; 1580 pd->pd_disk_meta.flags = INTEL_F_ASSIGNED; 1581 continue; 1582 } 1583 cp->private = disk; 1584 g_topology_unlock(); 1585 1586 error = g_raid_md_get_label(cp, 1587 &pd->pd_disk_meta.serial[0], INTEL_SERIAL_LEN); 1588 if (error != 0) { 1589 gctl_error(req, 1590 "Can't get serial for provider '%s'.", 1591 diskname); 1592 error = -8; 1593 break; 1594 } 1595 1596 g_raid_get_disk_info(disk); 1597 1598 intel_set_disk_sectors(&pd->pd_disk_meta, 1599 pp->mediasize / pp->sectorsize); 1600 if (size > pp->mediasize) 1601 size = pp->mediasize; 1602 if (sectorsize < pp->sectorsize) 1603 sectorsize = pp->sectorsize; 1604 pd->pd_disk_meta.id = 0; 1605 pd->pd_disk_meta.flags = INTEL_F_ASSIGNED | INTEL_F_ONLINE; 1606 } 1607 if (error != 0) 1608 return (error); 1609 1610 if (sectorsize <= 0) { 1611 gctl_error(req, "Can't get sector size."); 1612 return (-8); 1613 } 1614 1615 /* Reserve some space for metadata. */ 1616 size -= ((4096 + sectorsize - 1) / sectorsize) * sectorsize; 1617 1618 /* Handle size argument. */ 1619 len = sizeof(*sizearg); 1620 sizearg = gctl_get_param(req, "size", &len); 1621 if (sizearg != NULL && len == sizeof(*sizearg) && 1622 *sizearg > 0) { 1623 if (*sizearg > size) { 1624 gctl_error(req, "Size too big %lld > %lld.", 1625 (long long)*sizearg, (long long)size); 1626 return (-9); 1627 } 1628 size = *sizearg; 1629 } 1630 1631 /* Handle strip argument. */ 1632 strip = 131072; 1633 len = sizeof(*striparg); 1634 striparg = gctl_get_param(req, "strip", &len); 1635 if (striparg != NULL && len == sizeof(*striparg) && 1636 *striparg > 0) { 1637 if (*striparg < sectorsize) { 1638 gctl_error(req, "Strip size too small."); 1639 return (-10); 1640 } 1641 if (*striparg % sectorsize != 0) { 1642 gctl_error(req, "Incorrect strip size."); 1643 return (-11); 1644 } 1645 if (strip > 65535 * sectorsize) { 1646 gctl_error(req, "Strip size too big."); 1647 return (-12); 1648 } 1649 strip = *striparg; 1650 } 1651 1652 /* Round size down to strip or sector. */ 1653 if (level == G_RAID_VOLUME_RL_RAID1) 1654 size -= (size % sectorsize); 1655 else if (level == G_RAID_VOLUME_RL_RAID1E && 1656 (numdisks & 1) != 0) 1657 size -= (size % (2 * strip)); 1658 else 1659 size -= (size % strip); 1660 if (size <= 0) { 1661 gctl_error(req, "Size too small."); 1662 return (-13); 1663 } 1664 1665 /* We have all we need, create things: volume, ... */ 1666 mdi->mdio_started = 1; 1667 vol = g_raid_create_volume(sc, volname, -1); 1668 pv = malloc(sizeof(*pv), M_MD_INTEL, M_WAITOK | M_ZERO); 1669 pv->pv_volume_pos = 0; 1670 vol->v_md_data = pv; 1671 vol->v_raid_level = level; 1672 vol->v_raid_level_qualifier = qual; 1673 vol->v_strip_size = strip; 1674 vol->v_disks_count = numdisks; 1675 if (level == G_RAID_VOLUME_RL_RAID0) 1676 vol->v_mediasize = size * numdisks; 1677 else if (level == G_RAID_VOLUME_RL_RAID1) 1678 vol->v_mediasize = size; 1679 else if (level == G_RAID_VOLUME_RL_RAID5) 1680 vol->v_mediasize = size * (numdisks - 1); 1681 else { /* RAID1E */ 1682 vol->v_mediasize = ((size * numdisks) / strip / 2) * 1683 strip; 1684 } 1685 vol->v_sectorsize = sectorsize; 1686 g_raid_start_volume(vol); 1687 1688 /* , and subdisks. */ 1689 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1690 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data; 1691 sd = &vol->v_subdisks[pd->pd_disk_pos]; 1692 sd->sd_disk = disk; 1693 sd->sd_offset = 0; 1694 sd->sd_size = size; 1695 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next); 1696 if (sd->sd_disk->d_consumer != NULL) { 1697 g_raid_change_disk_state(disk, 1698 G_RAID_DISK_S_ACTIVE); 1699 if (level == G_RAID_VOLUME_RL_RAID5) 1700 g_raid_change_subdisk_state(sd, 1701 G_RAID_SUBDISK_S_UNINITIALIZED); 1702 else 1703 g_raid_change_subdisk_state(sd, 1704 G_RAID_SUBDISK_S_ACTIVE); 1705 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW, 1706 G_RAID_EVENT_SUBDISK); 1707 } else { 1708 g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE); 1709 } 1710 } 1711 1712 /* Write metadata based on created entities. */ 1713 G_RAID_DEBUG1(0, sc, "Array started."); 1714 g_raid_md_write_intel(md, NULL, NULL, NULL); 1715 1716 /* Pickup any STALE/SPARE disks to refill array if needed. */ 1717 g_raid_md_intel_refill(sc); 1718 1719 g_raid_event_send(vol, G_RAID_VOLUME_E_START, 1720 G_RAID_EVENT_VOLUME); 1721 return (0); 1722 } 1723 if (strcmp(verb, "add") == 0) { 1724 1725 if (*nargs != 3) { 1726 gctl_error(req, "Invalid number of arguments."); 1727 return (-1); 1728 } 1729 volname = gctl_get_asciiparam(req, "arg1"); 1730 if (volname == NULL) { 1731 gctl_error(req, "No volume name."); 1732 return (-2); 1733 } 1734 levelname = gctl_get_asciiparam(req, "arg2"); 1735 if (levelname == NULL) { 1736 gctl_error(req, "No RAID level."); 1737 return (-3); 1738 } 1739 if (strcasecmp(levelname, "RAID5") == 0) 1740 levelname = "RAID5-LA"; 1741 if (g_raid_volume_str2level(levelname, &level, &qual)) { 1742 gctl_error(req, "Unknown RAID level '%s'.", levelname); 1743 return (-4); 1744 } 1745 1746 /* Look for existing volumes. */ 1747 i = 0; 1748 vol1 = NULL; 1749 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 1750 vol1 = vol; 1751 i++; 1752 } 1753 if (i > 1) { 1754 gctl_error(req, "Maximum two volumes supported."); 1755 return (-6); 1756 } 1757 if (vol1 == NULL) { 1758 gctl_error(req, "At least one volume must exist."); 1759 return (-7); 1760 } 1761 1762 numdisks = vol1->v_disks_count; 1763 force = gctl_get_paraml(req, "force", sizeof(*force)); 1764 if (!g_raid_md_intel_supported(level, qual, numdisks, 1765 force ? *force : 0)) { 1766 gctl_error(req, "Unsupported RAID level " 1767 "(0x%02x/0x%02x), or number of disks (%d).", 1768 level, qual, numdisks); 1769 return (-5); 1770 } 1771 1772 /* Collect info about present disks. */ 1773 size = 0x7fffffffffffffffllu; 1774 sectorsize = 512; 1775 for (i = 0; i < numdisks; i++) { 1776 disk = vol1->v_subdisks[i].sd_disk; 1777 pd = (struct g_raid_md_intel_perdisk *) 1778 disk->d_md_data; 1779 disk_sectors = 1780 intel_get_disk_sectors(&pd->pd_disk_meta); 1781 1782 if (disk_sectors * 512 < size) 1783 size = disk_sectors * 512; 1784 if (disk->d_consumer != NULL && 1785 disk->d_consumer->provider != NULL && 1786 disk->d_consumer->provider->sectorsize > 1787 sectorsize) { 1788 sectorsize = 1789 disk->d_consumer->provider->sectorsize; 1790 } 1791 } 1792 1793 /* Reserve some space for metadata. */ 1794 size -= ((4096 + sectorsize - 1) / sectorsize) * sectorsize; 1795 1796 /* Decide insert before or after. */ 1797 sd = &vol1->v_subdisks[0]; 1798 if (sd->sd_offset > 1799 size - (sd->sd_offset + sd->sd_size)) { 1800 off = 0; 1801 size = sd->sd_offset; 1802 } else { 1803 off = sd->sd_offset + sd->sd_size; 1804 size = size - (sd->sd_offset + sd->sd_size); 1805 } 1806 1807 /* Handle strip argument. */ 1808 strip = 131072; 1809 len = sizeof(*striparg); 1810 striparg = gctl_get_param(req, "strip", &len); 1811 if (striparg != NULL && len == sizeof(*striparg) && 1812 *striparg > 0) { 1813 if (*striparg < sectorsize) { 1814 gctl_error(req, "Strip size too small."); 1815 return (-10); 1816 } 1817 if (*striparg % sectorsize != 0) { 1818 gctl_error(req, "Incorrect strip size."); 1819 return (-11); 1820 } 1821 if (strip > 65535 * sectorsize) { 1822 gctl_error(req, "Strip size too big."); 1823 return (-12); 1824 } 1825 strip = *striparg; 1826 } 1827 1828 /* Round offset up to strip. */ 1829 if (off % strip != 0) { 1830 size -= strip - off % strip; 1831 off += strip - off % strip; 1832 } 1833 1834 /* Handle size argument. */ 1835 len = sizeof(*sizearg); 1836 sizearg = gctl_get_param(req, "size", &len); 1837 if (sizearg != NULL && len == sizeof(*sizearg) && 1838 *sizearg > 0) { 1839 if (*sizearg > size) { 1840 gctl_error(req, "Size too big %lld > %lld.", 1841 (long long)*sizearg, (long long)size); 1842 return (-9); 1843 } 1844 size = *sizearg; 1845 } 1846 1847 /* Round size down to strip or sector. */ 1848 if (level == G_RAID_VOLUME_RL_RAID1) 1849 size -= (size % sectorsize); 1850 else 1851 size -= (size % strip); 1852 if (size <= 0) { 1853 gctl_error(req, "Size too small."); 1854 return (-13); 1855 } 1856 if (size > 0xffffffffllu * sectorsize) { 1857 gctl_error(req, "Size too big."); 1858 return (-14); 1859 } 1860 1861 /* We have all we need, create things: volume, ... */ 1862 vol = g_raid_create_volume(sc, volname, -1); 1863 pv = malloc(sizeof(*pv), M_MD_INTEL, M_WAITOK | M_ZERO); 1864 pv->pv_volume_pos = i; 1865 vol->v_md_data = pv; 1866 vol->v_raid_level = level; 1867 vol->v_raid_level_qualifier = qual; 1868 vol->v_strip_size = strip; 1869 vol->v_disks_count = numdisks; 1870 if (level == G_RAID_VOLUME_RL_RAID0) 1871 vol->v_mediasize = size * numdisks; 1872 else if (level == G_RAID_VOLUME_RL_RAID1) 1873 vol->v_mediasize = size; 1874 else if (level == G_RAID_VOLUME_RL_RAID5) 1875 vol->v_mediasize = size * (numdisks - 1); 1876 else { /* RAID1E */ 1877 vol->v_mediasize = ((size * numdisks) / strip / 2) * 1878 strip; 1879 } 1880 vol->v_sectorsize = sectorsize; 1881 g_raid_start_volume(vol); 1882 1883 /* , and subdisks. */ 1884 for (i = 0; i < numdisks; i++) { 1885 disk = vol1->v_subdisks[i].sd_disk; 1886 sd = &vol->v_subdisks[i]; 1887 sd->sd_disk = disk; 1888 sd->sd_offset = off; 1889 sd->sd_size = size; 1890 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next); 1891 if (disk->d_state == G_RAID_DISK_S_ACTIVE) { 1892 if (level == G_RAID_VOLUME_RL_RAID5) 1893 g_raid_change_subdisk_state(sd, 1894 G_RAID_SUBDISK_S_UNINITIALIZED); 1895 else 1896 g_raid_change_subdisk_state(sd, 1897 G_RAID_SUBDISK_S_ACTIVE); 1898 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW, 1899 G_RAID_EVENT_SUBDISK); 1900 } 1901 } 1902 1903 /* Write metadata based on created entities. */ 1904 g_raid_md_write_intel(md, NULL, NULL, NULL); 1905 1906 g_raid_event_send(vol, G_RAID_VOLUME_E_START, 1907 G_RAID_EVENT_VOLUME); 1908 return (0); 1909 } 1910 if (strcmp(verb, "delete") == 0) { 1911 1912 nodename = gctl_get_asciiparam(req, "arg0"); 1913 if (nodename != NULL && strcasecmp(sc->sc_name, nodename) != 0) 1914 nodename = NULL; 1915 1916 /* Full node destruction. */ 1917 if (*nargs == 1 && nodename != NULL) { 1918 /* Check if some volume is still open. */ 1919 force = gctl_get_paraml(req, "force", sizeof(*force)); 1920 if (force != NULL && *force == 0 && 1921 g_raid_nopens(sc) != 0) { 1922 gctl_error(req, "Some volume is still open."); 1923 return (-4); 1924 } 1925 1926 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1927 if (disk->d_consumer) 1928 intel_meta_erase(disk->d_consumer); 1929 } 1930 g_raid_destroy_node(sc, 0); 1931 return (0); 1932 } 1933 1934 /* Destroy specified volume. If it was last - all node. */ 1935 if (*nargs > 2) { 1936 gctl_error(req, "Invalid number of arguments."); 1937 return (-1); 1938 } 1939 volname = gctl_get_asciiparam(req, 1940 nodename != NULL ? "arg1" : "arg0"); 1941 if (volname == NULL) { 1942 gctl_error(req, "No volume name."); 1943 return (-2); 1944 } 1945 1946 /* Search for volume. */ 1947 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 1948 if (strcmp(vol->v_name, volname) == 0) 1949 break; 1950 pp = vol->v_provider; 1951 if (pp == NULL) 1952 continue; 1953 if (strcmp(pp->name, volname) == 0) 1954 break; 1955 if (strncmp(pp->name, "raid/", 5) == 0 && 1956 strcmp(pp->name + 5, volname) == 0) 1957 break; 1958 } 1959 if (vol == NULL) { 1960 i = strtol(volname, &tmp, 10); 1961 if (verb != volname && tmp[0] == 0) { 1962 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 1963 if (vol->v_global_id == i) 1964 break; 1965 } 1966 } 1967 } 1968 if (vol == NULL) { 1969 gctl_error(req, "Volume '%s' not found.", volname); 1970 return (-3); 1971 } 1972 1973 /* Check if volume is still open. */ 1974 force = gctl_get_paraml(req, "force", sizeof(*force)); 1975 if (force != NULL && *force == 0 && 1976 vol->v_provider_open != 0) { 1977 gctl_error(req, "Volume is still open."); 1978 return (-4); 1979 } 1980 1981 /* Destroy volume and potentially node. */ 1982 i = 0; 1983 TAILQ_FOREACH(vol1, &sc->sc_volumes, v_next) 1984 i++; 1985 if (i >= 2) { 1986 g_raid_destroy_volume(vol); 1987 g_raid_md_write_intel(md, NULL, NULL, NULL); 1988 } else { 1989 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1990 if (disk->d_consumer) 1991 intel_meta_erase(disk->d_consumer); 1992 } 1993 g_raid_destroy_node(sc, 0); 1994 } 1995 return (0); 1996 } 1997 if (strcmp(verb, "remove") == 0 || 1998 strcmp(verb, "fail") == 0) { 1999 if (*nargs < 2) { 2000 gctl_error(req, "Invalid number of arguments."); 2001 return (-1); 2002 } 2003 for (i = 1; i < *nargs; i++) { 2004 snprintf(arg, sizeof(arg), "arg%d", i); 2005 diskname = gctl_get_asciiparam(req, arg); 2006 if (diskname == NULL) { 2007 gctl_error(req, "No disk name (%s).", arg); 2008 error = -2; 2009 break; 2010 } 2011 if (strncmp(diskname, "/dev/", 5) == 0) 2012 diskname += 5; 2013 2014 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 2015 if (disk->d_consumer != NULL && 2016 disk->d_consumer->provider != NULL && 2017 strcmp(disk->d_consumer->provider->name, 2018 diskname) == 0) 2019 break; 2020 } 2021 if (disk == NULL) { 2022 gctl_error(req, "Disk '%s' not found.", 2023 diskname); 2024 error = -3; 2025 break; 2026 } 2027 2028 if (strcmp(verb, "fail") == 0) { 2029 g_raid_md_fail_disk_intel(md, NULL, disk); 2030 continue; 2031 } 2032 2033 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data; 2034 2035 /* Erase metadata on deleting disk. */ 2036 intel_meta_erase(disk->d_consumer); 2037 2038 /* If disk was assigned, just update statuses. */ 2039 if (pd->pd_disk_pos >= 0) { 2040 g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE); 2041 g_raid_kill_consumer(sc, disk->d_consumer); 2042 disk->d_consumer = NULL; 2043 TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) { 2044 g_raid_change_subdisk_state(sd, 2045 G_RAID_SUBDISK_S_NONE); 2046 g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED, 2047 G_RAID_EVENT_SUBDISK); 2048 } 2049 } else { 2050 /* Otherwise -- delete. */ 2051 g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE); 2052 g_raid_destroy_disk(disk); 2053 } 2054 } 2055 2056 /* Write updated metadata to remaining disks. */ 2057 g_raid_md_write_intel(md, NULL, NULL, NULL); 2058 2059 /* Check if anything left except placeholders. */ 2060 if (g_raid_ndisks(sc, -1) == 2061 g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE)) 2062 g_raid_destroy_node(sc, 0); 2063 else 2064 g_raid_md_intel_refill(sc); 2065 return (error); 2066 } 2067 if (strcmp(verb, "insert") == 0) { 2068 if (*nargs < 2) { 2069 gctl_error(req, "Invalid number of arguments."); 2070 return (-1); 2071 } 2072 update = 0; 2073 for (i = 1; i < *nargs; i++) { 2074 /* Get disk name. */ 2075 snprintf(arg, sizeof(arg), "arg%d", i); 2076 diskname = gctl_get_asciiparam(req, arg); 2077 if (diskname == NULL) { 2078 gctl_error(req, "No disk name (%s).", arg); 2079 error = -3; 2080 break; 2081 } 2082 2083 /* Try to find provider with specified name. */ 2084 g_topology_lock(); 2085 cp = g_raid_open_consumer(sc, diskname); 2086 if (cp == NULL) { 2087 gctl_error(req, "Can't open disk '%s'.", 2088 diskname); 2089 g_topology_unlock(); 2090 error = -4; 2091 break; 2092 } 2093 pp = cp->provider; 2094 g_topology_unlock(); 2095 2096 /* Read disk serial. */ 2097 error = g_raid_md_get_label(cp, 2098 &serial[0], INTEL_SERIAL_LEN); 2099 if (error != 0) { 2100 gctl_error(req, 2101 "Can't get serial for provider '%s'.", 2102 diskname); 2103 g_raid_kill_consumer(sc, cp); 2104 error = -7; 2105 break; 2106 } 2107 2108 pd = malloc(sizeof(*pd), M_MD_INTEL, M_WAITOK | M_ZERO); 2109 pd->pd_disk_pos = -1; 2110 2111 disk = g_raid_create_disk(sc); 2112 disk->d_consumer = cp; 2113 disk->d_md_data = (void *)pd; 2114 cp->private = disk; 2115 2116 g_raid_get_disk_info(disk); 2117 2118 memcpy(&pd->pd_disk_meta.serial[0], &serial[0], 2119 INTEL_SERIAL_LEN); 2120 intel_set_disk_sectors(&pd->pd_disk_meta, 2121 pp->mediasize / pp->sectorsize); 2122 pd->pd_disk_meta.id = 0; 2123 pd->pd_disk_meta.flags = INTEL_F_SPARE; 2124 2125 /* Welcome the "new" disk. */ 2126 update += g_raid_md_intel_start_disk(disk); 2127 if (disk->d_state == G_RAID_DISK_S_SPARE) { 2128 intel_meta_write_spare(cp, &pd->pd_disk_meta); 2129 g_raid_destroy_disk(disk); 2130 } else if (disk->d_state != G_RAID_DISK_S_ACTIVE) { 2131 gctl_error(req, "Disk '%s' doesn't fit.", 2132 diskname); 2133 g_raid_destroy_disk(disk); 2134 error = -8; 2135 break; 2136 } 2137 } 2138 2139 /* Write new metadata if we changed something. */ 2140 if (update) 2141 g_raid_md_write_intel(md, NULL, NULL, NULL); 2142 return (error); 2143 } 2144 return (-100); 2145} 2146 2147static int 2148g_raid_md_write_intel(struct g_raid_md_object *md, struct g_raid_volume *tvol, 2149 struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk) 2150{ 2151 struct g_raid_softc *sc; 2152 struct g_raid_volume *vol; 2153 struct g_raid_subdisk *sd; 2154 struct g_raid_disk *disk; 2155 struct g_raid_md_intel_object *mdi; 2156 struct g_raid_md_intel_pervolume *pv; 2157 struct g_raid_md_intel_perdisk *pd; 2158 struct intel_raid_conf *meta; 2159 struct intel_raid_vol *mvol; 2160 struct intel_raid_map *mmap0, *mmap1; 2161 off_t sectorsize = 512, pos; 2162 const char *version, *cv; 2163 int vi, sdi, numdisks, len, state, stale; 2164 2165 sc = md->mdo_softc; 2166 mdi = (struct g_raid_md_intel_object *)md; 2167 2168 if (sc->sc_stopping == G_RAID_DESTROY_HARD) 2169 return (0); 2170 2171 /* Bump generation. Newly written metadata may differ from previous. */ 2172 mdi->mdio_generation++; 2173 2174 /* Count number of disks. */ 2175 numdisks = 0; 2176 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 2177 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data; 2178 if (pd->pd_disk_pos < 0) 2179 continue; 2180 numdisks++; 2181 if (disk->d_state == G_RAID_DISK_S_ACTIVE) { 2182 pd->pd_disk_meta.flags = 2183 INTEL_F_ONLINE | INTEL_F_ASSIGNED; 2184 } else if (disk->d_state == G_RAID_DISK_S_FAILED) { 2185 pd->pd_disk_meta.flags = INTEL_F_FAILED | 2186 INTEL_F_ASSIGNED; 2187 } else if (disk->d_state == G_RAID_DISK_S_DISABLED) { 2188 pd->pd_disk_meta.flags = INTEL_F_FAILED | 2189 INTEL_F_ASSIGNED | INTEL_F_DISABLED; 2190 } else { 2191 pd->pd_disk_meta.flags = INTEL_F_ASSIGNED; 2192 if (pd->pd_disk_meta.id != 0xffffffff) { 2193 pd->pd_disk_meta.id = 0xffffffff; 2194 len = strlen(pd->pd_disk_meta.serial); 2195 len = min(len, INTEL_SERIAL_LEN - 3); 2196 strcpy(pd->pd_disk_meta.serial + len, ":0"); 2197 } 2198 } 2199 } 2200 2201 /* Fill anchor and disks. */ 2202 meta = malloc(INTEL_MAX_MD_SIZE(numdisks), 2203 M_MD_INTEL, M_WAITOK | M_ZERO); 2204 memcpy(&meta->intel_id[0], INTEL_MAGIC, sizeof(INTEL_MAGIC) - 1); 2205 meta->config_size = INTEL_MAX_MD_SIZE(numdisks); 2206 meta->config_id = mdi->mdio_config_id; 2207 meta->generation = mdi->mdio_generation; 2208 meta->attributes = INTEL_ATTR_CHECKSUM; 2209 meta->total_disks = numdisks; 2210 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 2211 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data; 2212 if (pd->pd_disk_pos < 0) 2213 continue; 2214 meta->disk[pd->pd_disk_pos] = pd->pd_disk_meta; 2215 } 2216 2217 /* Fill volumes and maps. */ 2218 vi = 0; 2219 version = INTEL_VERSION_1000; 2220 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 2221 pv = vol->v_md_data; 2222 if (vol->v_stopping) 2223 continue; 2224 mvol = intel_get_volume(meta, vi); 2225 2226 /* New metadata may have different volumes order. */ 2227 pv->pv_volume_pos = vi; 2228 2229 for (sdi = 0; sdi < vol->v_disks_count; sdi++) { 2230 sd = &vol->v_subdisks[sdi]; 2231 if (sd->sd_disk != NULL) 2232 break; 2233 } 2234 if (sdi >= vol->v_disks_count) 2235 panic("No any filled subdisk in volume"); 2236 if (vol->v_mediasize >= 0x20000000000llu) 2237 meta->attributes |= INTEL_ATTR_2TB; 2238 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0) 2239 meta->attributes |= INTEL_ATTR_RAID0; 2240 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1) 2241 meta->attributes |= INTEL_ATTR_RAID1; 2242 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5) 2243 meta->attributes |= INTEL_ATTR_RAID5; 2244 else 2245 meta->attributes |= INTEL_ATTR_RAID10; 2246 2247 if (meta->attributes & INTEL_ATTR_2TB) 2248 cv = INTEL_VERSION_1300; 2249 else if (pv->pv_cng) 2250 cv = INTEL_VERSION_1206; 2251 else if (vol->v_disks_count > 4) 2252 cv = INTEL_VERSION_1204; 2253 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5) 2254 cv = INTEL_VERSION_1202; 2255 else if (vol->v_disks_count > 2) 2256 cv = INTEL_VERSION_1201; 2257 else if (vi > 0) 2258 cv = INTEL_VERSION_1200; 2259 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1) 2260 cv = INTEL_VERSION_1100; 2261 else 2262 cv = INTEL_VERSION_1000; 2263 if (strcmp(cv, version) > 0) 2264 version = cv; 2265 2266 strlcpy(&mvol->name[0], vol->v_name, sizeof(mvol->name)); 2267 mvol->total_sectors = vol->v_mediasize / sectorsize; 2268 if (pv->pv_cng) { 2269 mvol->state |= INTEL_ST_CLONE_N_GO; 2270 if (pv->pv_cng_man_sync) 2271 mvol->state |= INTEL_ST_CLONE_MAN_SYNC; 2272 mvol->cng_master_disk = pv->pv_cng_master_disk; 2273 if (vol->v_subdisks[pv->pv_cng_master_disk].sd_state == 2274 G_RAID_SUBDISK_S_NONE) 2275 mvol->cng_state = INTEL_SNGST_MASTER_MISSING; 2276 else if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL) 2277 mvol->cng_state = INTEL_SNGST_NEEDS_UPDATE; 2278 } 2279 2280 /* Check for any recovery in progress. */ 2281 state = G_RAID_SUBDISK_S_ACTIVE; 2282 pos = 0x7fffffffffffffffllu; 2283 stale = 0; 2284 for (sdi = 0; sdi < vol->v_disks_count; sdi++) { 2285 sd = &vol->v_subdisks[sdi]; 2286 if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD) 2287 state = G_RAID_SUBDISK_S_REBUILD; 2288 else if (sd->sd_state == G_RAID_SUBDISK_S_RESYNC && 2289 state != G_RAID_SUBDISK_S_REBUILD) 2290 state = G_RAID_SUBDISK_S_RESYNC; 2291 else if (sd->sd_state == G_RAID_SUBDISK_S_STALE) 2292 stale = 1; 2293 if ((sd->sd_state == G_RAID_SUBDISK_S_REBUILD || 2294 sd->sd_state == G_RAID_SUBDISK_S_RESYNC) && 2295 sd->sd_rebuild_pos < pos) 2296 pos = sd->sd_rebuild_pos; 2297 } 2298 if (state == G_RAID_SUBDISK_S_REBUILD) { 2299 mvol->migr_state = 1; 2300 mvol->migr_type = INTEL_MT_REBUILD; 2301 } else if (state == G_RAID_SUBDISK_S_RESYNC) { 2302 mvol->migr_state = 1; 2303 /* mvol->migr_type = INTEL_MT_REPAIR; */ 2304 mvol->migr_type = INTEL_MT_VERIFY; 2305 mvol->state |= INTEL_ST_VERIFY_AND_FIX; 2306 } else 2307 mvol->migr_state = 0; 2308 mvol->dirty = (vol->v_dirty || stale); 2309 2310 mmap0 = intel_get_map(mvol, 0); 2311 2312 /* Write map / common part of two maps. */ 2313 intel_set_map_offset(mmap0, sd->sd_offset / sectorsize); 2314 intel_set_map_disk_sectors(mmap0, sd->sd_size / sectorsize); 2315 mmap0->strip_sectors = vol->v_strip_size / sectorsize; 2316 if (vol->v_state == G_RAID_VOLUME_S_BROKEN) 2317 mmap0->status = INTEL_S_FAILURE; 2318 else if (vol->v_state == G_RAID_VOLUME_S_DEGRADED) 2319 mmap0->status = INTEL_S_DEGRADED; 2320 else if (g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_UNINITIALIZED) 2321 == g_raid_nsubdisks(vol, -1)) 2322 mmap0->status = INTEL_S_UNINITIALIZED; 2323 else 2324 mmap0->status = INTEL_S_READY; 2325 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0) 2326 mmap0->type = INTEL_T_RAID0; 2327 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 || 2328 vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) 2329 mmap0->type = INTEL_T_RAID1; 2330 else 2331 mmap0->type = INTEL_T_RAID5; 2332 mmap0->total_disks = vol->v_disks_count; 2333 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1) 2334 mmap0->total_domains = vol->v_disks_count; 2335 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) 2336 mmap0->total_domains = 2; 2337 else 2338 mmap0->total_domains = 1; 2339 intel_set_map_stripe_count(mmap0, 2340 sd->sd_size / vol->v_strip_size / mmap0->total_domains); 2341 mmap0->failed_disk_num = 0xff; 2342 mmap0->ddf = 1; 2343 2344 /* If there are two maps - copy common and update. */ 2345 if (mvol->migr_state) { 2346 intel_set_vol_curr_migr_unit(mvol, 2347 pos / vol->v_strip_size / mmap0->total_domains); 2348 mmap1 = intel_get_map(mvol, 1); 2349 memcpy(mmap1, mmap0, sizeof(struct intel_raid_map)); 2350 mmap0->status = INTEL_S_READY; 2351 } else 2352 mmap1 = NULL; 2353 2354 /* Write disk indexes and put rebuild flags. */ 2355 for (sdi = 0; sdi < vol->v_disks_count; sdi++) { 2356 sd = &vol->v_subdisks[sdi]; 2357 pd = (struct g_raid_md_intel_perdisk *) 2358 sd->sd_disk->d_md_data; 2359 mmap0->disk_idx[sdi] = pd->pd_disk_pos; 2360 if (mvol->migr_state) 2361 mmap1->disk_idx[sdi] = pd->pd_disk_pos; 2362 if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD || 2363 sd->sd_state == G_RAID_SUBDISK_S_RESYNC) { 2364 mmap1->disk_idx[sdi] |= INTEL_DI_RBLD; 2365 } else if (sd->sd_state != G_RAID_SUBDISK_S_ACTIVE && 2366 sd->sd_state != G_RAID_SUBDISK_S_STALE && 2367 sd->sd_state != G_RAID_SUBDISK_S_UNINITIALIZED) { 2368 mmap0->disk_idx[sdi] |= INTEL_DI_RBLD; 2369 if (mvol->migr_state) 2370 mmap1->disk_idx[sdi] |= INTEL_DI_RBLD; 2371 } 2372 if ((sd->sd_state == G_RAID_SUBDISK_S_NONE || 2373 sd->sd_state == G_RAID_SUBDISK_S_FAILED) && 2374 mmap0->failed_disk_num == 0xff) { 2375 mmap0->failed_disk_num = sdi; 2376 if (mvol->migr_state) 2377 mmap1->failed_disk_num = sdi; 2378 } 2379 } 2380 vi++; 2381 } 2382 meta->total_volumes = vi; 2383 if (strcmp(version, INTEL_VERSION_1300) != 0) 2384 meta->attributes &= INTEL_ATTR_CHECKSUM; 2385 memcpy(&meta->version[0], version, sizeof(INTEL_VERSION_1000) - 1); 2386 2387 /* We are done. Print meta data and store them to disks. */ 2388 g_raid_md_intel_print(meta); 2389 if (mdi->mdio_meta != NULL) 2390 free(mdi->mdio_meta, M_MD_INTEL); 2391 mdi->mdio_meta = meta; 2392 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 2393 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data; 2394 if (disk->d_state != G_RAID_DISK_S_ACTIVE) 2395 continue; 2396 if (pd->pd_meta != NULL) { 2397 free(pd->pd_meta, M_MD_INTEL); 2398 pd->pd_meta = NULL; 2399 } 2400 pd->pd_meta = intel_meta_copy(meta); 2401 intel_meta_write(disk->d_consumer, meta); 2402 } 2403 return (0); 2404} 2405 2406static int 2407g_raid_md_fail_disk_intel(struct g_raid_md_object *md, 2408 struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk) 2409{ 2410 struct g_raid_softc *sc; 2411 struct g_raid_md_intel_object *mdi; 2412 struct g_raid_md_intel_perdisk *pd; 2413 struct g_raid_subdisk *sd; 2414 2415 sc = md->mdo_softc; 2416 mdi = (struct g_raid_md_intel_object *)md; 2417 pd = (struct g_raid_md_intel_perdisk *)tdisk->d_md_data; 2418 2419 /* We can't fail disk that is not a part of array now. */ 2420 if (pd->pd_disk_pos < 0) 2421 return (-1); 2422 2423 /* 2424 * Mark disk as failed in metadata and try to write that metadata 2425 * to the disk itself to prevent it's later resurrection as STALE. 2426 */ 2427 mdi->mdio_meta->disk[pd->pd_disk_pos].flags = INTEL_F_FAILED; 2428 pd->pd_disk_meta.flags = INTEL_F_FAILED; 2429 g_raid_md_intel_print(mdi->mdio_meta); 2430 if (tdisk->d_consumer) 2431 intel_meta_write(tdisk->d_consumer, mdi->mdio_meta); 2432 2433 /* Change states. */ 2434 g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED); 2435 TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) { 2436 g_raid_change_subdisk_state(sd, 2437 G_RAID_SUBDISK_S_FAILED); 2438 g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED, 2439 G_RAID_EVENT_SUBDISK); 2440 } 2441 2442 /* Write updated metadata to remaining disks. */ 2443 g_raid_md_write_intel(md, NULL, NULL, tdisk); 2444 2445 /* Check if anything left except placeholders. */ 2446 if (g_raid_ndisks(sc, -1) == 2447 g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE)) 2448 g_raid_destroy_node(sc, 0); 2449 else 2450 g_raid_md_intel_refill(sc); 2451 return (0); 2452} 2453 2454static int 2455g_raid_md_free_disk_intel(struct g_raid_md_object *md, 2456 struct g_raid_disk *disk) 2457{ 2458 struct g_raid_md_intel_perdisk *pd; 2459 2460 pd = (struct g_raid_md_intel_perdisk *)disk->d_md_data; 2461 if (pd->pd_meta != NULL) { 2462 free(pd->pd_meta, M_MD_INTEL); 2463 pd->pd_meta = NULL; 2464 } 2465 free(pd, M_MD_INTEL); 2466 disk->d_md_data = NULL; 2467 return (0); 2468} 2469 2470static int 2471g_raid_md_free_volume_intel(struct g_raid_md_object *md, 2472 struct g_raid_volume *vol) 2473{ 2474 struct g_raid_md_intel_pervolume *pv; 2475 2476 pv = (struct g_raid_md_intel_pervolume *)vol->v_md_data; 2477 free(pv, M_MD_INTEL); 2478 vol->v_md_data = NULL; 2479 return (0); 2480} 2481 2482static int 2483g_raid_md_free_intel(struct g_raid_md_object *md) 2484{ 2485 struct g_raid_md_intel_object *mdi; 2486 2487 mdi = (struct g_raid_md_intel_object *)md; 2488 if (!mdi->mdio_started) { 2489 mdi->mdio_started = 0; 2490 callout_stop(&mdi->mdio_start_co); 2491 G_RAID_DEBUG1(1, md->mdo_softc, 2492 "root_mount_rel %p", mdi->mdio_rootmount); 2493 root_mount_rel(mdi->mdio_rootmount); 2494 mdi->mdio_rootmount = NULL; 2495 } 2496 if (mdi->mdio_meta != NULL) { 2497 free(mdi->mdio_meta, M_MD_INTEL); 2498 mdi->mdio_meta = NULL; 2499 } 2500 return (0); 2501} 2502 2503G_RAID_MD_DECLARE(intel, "Intel"); 2504