md_ddf.c revision 234899
1/*- 2 * Copyright (c) 2012 Alexander Motin <mav@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/geom/raid/md_ddf.c 234899 2012-05-01 18:00:31Z mav $"); 29 30#include <sys/param.h> 31#include <sys/bio.h> 32#include <sys/endian.h> 33#include <sys/kernel.h> 34#include <sys/kobj.h> 35#include <sys/limits.h> 36#include <sys/lock.h> 37#include <sys/malloc.h> 38#include <sys/mutex.h> 39#include <sys/systm.h> 40#include <sys/time.h> 41#include <sys/clock.h> 42#include <geom/geom.h> 43#include "geom/raid/g_raid.h" 44#include "geom/raid/md_ddf.h" 45#include "g_raid_md_if.h" 46 47static MALLOC_DEFINE(M_MD_DDF, "md_ddf_data", "GEOM_RAID DDF metadata"); 48 49#define DDF_MAX_DISKS_HARD 128 50 51#define DDF_MAX_DISKS 16 52#define DDF_MAX_VDISKS 7 53#define DDF_MAX_PARTITIONS 1 54 55#define DECADE (3600*24*(365*10+2)) /* 10 years in seconds. */ 56 57struct ddf_meta { 58 u_int sectorsize; 59 u_int bigendian; 60 struct ddf_header *hdr; 61 struct ddf_cd_record *cdr; 62 struct ddf_pd_record *pdr; 63 struct ddf_vd_record *vdr; 64 void *cr; 65 struct ddf_pdd_record *pdd; 66 struct ddf_bbm_log *bbm; 67}; 68 69struct ddf_vol_meta { 70 u_int sectorsize; 71 u_int bigendian; 72 struct ddf_header *hdr; 73 struct ddf_cd_record *cdr; 74 struct ddf_vd_entry *vde; 75 struct ddf_vdc_record *vdc; 76 struct ddf_vdc_record *bvdc[DDF_MAX_DISKS_HARD]; 77}; 78 79struct g_raid_md_ddf_perdisk { 80 struct ddf_meta pd_meta; 81}; 82 83struct g_raid_md_ddf_pervolume { 84 struct ddf_vol_meta pv_meta; 85 int pv_started; 86 struct callout pv_start_co; /* STARTING state timer. */ 87}; 88 89struct g_raid_md_ddf_object { 90 struct g_raid_md_object mdio_base; 91 struct ddf_meta mdio_meta; 92 int mdio_starting; 93 struct callout mdio_start_co; /* STARTING state timer. */ 94 int mdio_started; 95 struct root_hold_token *mdio_rootmount; /* Root mount delay token. */ 96}; 97 98static g_raid_md_create_t g_raid_md_create_ddf; 99static g_raid_md_taste_t g_raid_md_taste_ddf; 100static g_raid_md_event_t g_raid_md_event_ddf; 101static g_raid_md_volume_event_t g_raid_md_volume_event_ddf; 102static g_raid_md_ctl_t g_raid_md_ctl_ddf; 103static g_raid_md_write_t g_raid_md_write_ddf; 104static g_raid_md_fail_disk_t g_raid_md_fail_disk_ddf; 105static g_raid_md_free_disk_t g_raid_md_free_disk_ddf; 106static g_raid_md_free_volume_t g_raid_md_free_volume_ddf; 107static g_raid_md_free_t g_raid_md_free_ddf; 108 109static kobj_method_t g_raid_md_ddf_methods[] = { 110 KOBJMETHOD(g_raid_md_create, g_raid_md_create_ddf), 111 KOBJMETHOD(g_raid_md_taste, g_raid_md_taste_ddf), 112 KOBJMETHOD(g_raid_md_event, g_raid_md_event_ddf), 113 KOBJMETHOD(g_raid_md_volume_event, g_raid_md_volume_event_ddf), 114 KOBJMETHOD(g_raid_md_ctl, g_raid_md_ctl_ddf), 115 KOBJMETHOD(g_raid_md_write, g_raid_md_write_ddf), 116 KOBJMETHOD(g_raid_md_fail_disk, g_raid_md_fail_disk_ddf), 117 KOBJMETHOD(g_raid_md_free_disk, g_raid_md_free_disk_ddf), 118 KOBJMETHOD(g_raid_md_free_volume, g_raid_md_free_volume_ddf), 119 KOBJMETHOD(g_raid_md_free, g_raid_md_free_ddf), 120 { 0, 0 } 121}; 122 123static struct g_raid_md_class g_raid_md_ddf_class = { 124 "DDF", 125 g_raid_md_ddf_methods, 126 sizeof(struct g_raid_md_ddf_object), 127 .mdc_priority = 100 128}; 129 130#define GET8(m, f) ((m)->f) 131#define GET16(m, f) ((m)->bigendian ? be16dec(&(m)->f) : le16dec(&(m)->f)) 132#define GET32(m, f) ((m)->bigendian ? be32dec(&(m)->f) : le32dec(&(m)->f)) 133#define GET64(m, f) ((m)->bigendian ? be64dec(&(m)->f) : le64dec(&(m)->f)) 134#define GET8D(m, f) (f) 135#define GET16D(m, f) ((m)->bigendian ? be16dec(&f) : le16dec(&f)) 136#define GET32D(m, f) ((m)->bigendian ? be32dec(&f) : le32dec(&f)) 137#define GET64D(m, f) ((m)->bigendian ? be64dec(&f) : le64dec(&f)) 138#define GET8P(m, f) (*(f)) 139#define GET16P(m, f) ((m)->bigendian ? be16dec(f) : le16dec(f)) 140#define GET32P(m, f) ((m)->bigendian ? be32dec(f) : le32dec(f)) 141#define GET64P(m, f) ((m)->bigendian ? be64dec(f) : le64dec(f)) 142 143#define SET8P(m, f, v) \ 144 (*(f) = (v)) 145#define SET16P(m, f, v) \ 146 do { \ 147 if ((m)->bigendian) \ 148 be16enc((f), (v)); \ 149 else \ 150 le16enc((f), (v)); \ 151 } while (0) 152#define SET32P(m, f, v) \ 153 do { \ 154 if ((m)->bigendian) \ 155 be32enc((f), (v)); \ 156 else \ 157 le32enc((f), (v)); \ 158 } while (0) 159#define SET64P(m, f, v) \ 160 do { \ 161 if ((m)->bigendian) \ 162 be64enc((f), (v)); \ 163 else \ 164 le64enc((f), (v)); \ 165 } while (0) 166#define SET8(m, f, v) SET8P((m), &((m)->f), (v)) 167#define SET16(m, f, v) SET16P((m), &((m)->f), (v)) 168#define SET32(m, f, v) SET32P((m), &((m)->f), (v)) 169#define SET64(m, f, v) SET64P((m), &((m)->f), (v)) 170#define SET8D(m, f, v) SET8P((m), &(f), (v)) 171#define SET16D(m, f, v) SET16P((m), &(f), (v)) 172#define SET32D(m, f, v) SET32P((m), &(f), (v)) 173#define SET64D(m, f, v) SET64P((m), &(f), (v)) 174 175#define GETCRNUM(m) (GET32((m), hdr->cr_length) / \ 176 GET16((m), hdr->Configuration_Record_Length)) 177 178#define GETVDCPTR(m, n) ((struct ddf_vdc_record *)((uint8_t *)(m)->cr + \ 179 (n) * GET16((m), hdr->Configuration_Record_Length) * \ 180 (m)->sectorsize)) 181 182#define GETSAPTR(m, n) ((struct ddf_sa_record *)((uint8_t *)(m)->cr + \ 183 (n) * GET16((m), hdr->Configuration_Record_Length) * \ 184 (m)->sectorsize)) 185 186static int 187isff(uint8_t *buf, int size) 188{ 189 int i; 190 191 for (i = 0; i < size; i++) 192 if (buf[i] != 0xff) 193 return (0); 194 return (1); 195} 196 197static void 198print_guid(uint8_t *buf) 199{ 200 int i, ascii; 201 202 ascii = 1; 203 for (i = 0; i < 24; i++) { 204 if (buf[i] != 0 && (buf[i] < ' ' || buf[i] > 127)) { 205 ascii = 0; 206 break; 207 } 208 } 209 if (ascii) { 210 printf("'%.24s'", buf); 211 } else { 212 for (i = 0; i < 24; i++) 213 printf("%02x", buf[i]); 214 } 215} 216 217static void 218g_raid_md_ddf_print(struct ddf_meta *meta) 219{ 220 struct ddf_vdc_record *vdc; 221 struct ddf_vuc_record *vuc; 222 struct ddf_sa_record *sa; 223 uint64_t *val2; 224 uint32_t val; 225 int i, j, k, num, num2; 226 227 if (g_raid_debug < 1) 228 return; 229 230 printf("********* DDF Metadata *********\n"); 231 printf("**** Header ****\n"); 232 printf("DDF_Header_GUID "); 233 print_guid(meta->hdr->DDF_Header_GUID); 234 printf("\n"); 235 printf("DDF_rev %8.8s\n", (char *)&meta->hdr->DDF_rev[0]); 236 printf("Sequence_Number 0x%08x\n", GET32(meta, hdr->Sequence_Number)); 237 printf("TimeStamp 0x%08x\n", GET32(meta, hdr->TimeStamp)); 238 printf("Open_Flag 0x%02x\n", GET16(meta, hdr->Open_Flag)); 239 printf("Foreign_Flag 0x%02x\n", GET16(meta, hdr->Foreign_Flag)); 240 printf("Diskgrouping 0x%02x\n", GET16(meta, hdr->Diskgrouping)); 241 printf("Primary_Header_LBA %ju\n", GET64(meta, hdr->Primary_Header_LBA)); 242 printf("Secondary_Header_LBA %ju\n", GET64(meta, hdr->Secondary_Header_LBA)); 243 printf("WorkSpace_Length %u\n", GET32(meta, hdr->WorkSpace_Length)); 244 printf("WorkSpace_LBA %ju\n", GET64(meta, hdr->WorkSpace_LBA)); 245 printf("Max_PD_Entries %u\n", GET16(meta, hdr->Max_PD_Entries)); 246 printf("Max_VD_Entries %u\n", GET16(meta, hdr->Max_VD_Entries)); 247 printf("Max_Partitions %u\n", GET16(meta, hdr->Max_Partitions)); 248 printf("Configuration_Record_Length %u\n", GET16(meta, hdr->Configuration_Record_Length)); 249 printf("Max_Primary_Element_Entries %u\n", GET16(meta, hdr->Max_Primary_Element_Entries)); 250 printf("Controller Data %u:%u\n", GET32(meta, hdr->cd_section), GET32(meta, hdr->cd_length)); 251 printf("Physical Disk %u:%u\n", GET32(meta, hdr->pdr_section), GET32(meta, hdr->pdr_length)); 252 printf("Virtual Disk %u:%u\n", GET32(meta, hdr->vdr_section), GET32(meta, hdr->vdr_length)); 253 printf("Configuration Recs %u:%u\n", GET32(meta, hdr->cr_section), GET32(meta, hdr->cr_length)); 254 printf("Physical Disk Recs %u:%u\n", GET32(meta, hdr->pdd_section), GET32(meta, hdr->pdd_length)); 255 printf("BBM Log %u:%u\n", GET32(meta, hdr->bbmlog_section), GET32(meta, hdr->bbmlog_length)); 256 printf("Diagnostic Space %u:%u\n", GET32(meta, hdr->Diagnostic_Space), GET32(meta, hdr->Diagnostic_Space_Length)); 257 printf("Vendor_Specific_Logs %u:%u\n", GET32(meta, hdr->Vendor_Specific_Logs), GET32(meta, hdr->Vendor_Specific_Logs_Length)); 258 printf("**** Controler Data ****\n"); 259 printf("Controller_GUID "); 260 print_guid(meta->cdr->Controller_GUID); 261 printf("\n"); 262 printf("Controller_Type 0x%04x%04x 0x%04x%04x\n", 263 GET16(meta, cdr->Controller_Type.Vendor_ID), 264 GET16(meta, cdr->Controller_Type.Device_ID), 265 GET16(meta, cdr->Controller_Type.SubVendor_ID), 266 GET16(meta, cdr->Controller_Type.SubDevice_ID)); 267 printf("Product_ID '%.16s'\n", (char *)&meta->cdr->Product_ID[0]); 268 printf("**** Physical Disk Records ****\n"); 269 printf("Populated_PDEs %u\n", GET16(meta, pdr->Populated_PDEs)); 270 printf("Max_PDE_Supported %u\n", GET16(meta, pdr->Max_PDE_Supported)); 271 for (j = 0; j < GET16(meta, pdr->Populated_PDEs); j++) { 272 if (isff(meta->pdr->entry[j].PD_GUID, 24)) 273 continue; 274 if (GET32(meta, pdr->entry[j].PD_Reference) == 0xffffffff) 275 continue; 276 printf("PD_GUID "); 277 print_guid(meta->pdr->entry[j].PD_GUID); 278 printf("\n"); 279 printf("PD_Reference 0x%08x\n", 280 GET32(meta, pdr->entry[j].PD_Reference)); 281 printf("PD_Type 0x%04x\n", 282 GET16(meta, pdr->entry[j].PD_Type)); 283 printf("PD_State 0x%04x\n", 284 GET16(meta, pdr->entry[j].PD_State)); 285 printf("Configured_Size %ju\n", 286 GET64(meta, pdr->entry[j].Configured_Size)); 287 printf("Block_Size %u\n", 288 GET16(meta, pdr->entry[j].Block_Size)); 289 } 290 printf("**** Virtual Disk Records ****\n"); 291 printf("Populated_VDEs %u\n", GET16(meta, vdr->Populated_VDEs)); 292 printf("Max_VDE_Supported %u\n", GET16(meta, vdr->Max_VDE_Supported)); 293 for (j = 0; j < GET16(meta, vdr->Populated_VDEs); j++) { 294 if (isff(meta->vdr->entry[j].VD_GUID, 24)) 295 continue; 296 printf("VD_GUID "); 297 print_guid(meta->vdr->entry[j].VD_GUID); 298 printf("\n"); 299 printf("VD_Number 0x%04x\n", 300 GET16(meta, vdr->entry[j].VD_Number)); 301 printf("VD_Type 0x%04x\n", 302 GET16(meta, vdr->entry[j].VD_Type)); 303 printf("VD_State 0x%02x\n", 304 GET8(meta, vdr->entry[j].VD_State)); 305 printf("Init_State 0x%02x\n", 306 GET8(meta, vdr->entry[j].Init_State)); 307 printf("Drive_Failures_Remaining %u\n", 308 GET8(meta, vdr->entry[j].Drive_Failures_Remaining)); 309 printf("VD_Name '%.16s'\n", 310 (char *)&meta->vdr->entry[j].VD_Name); 311 } 312 printf("**** Configuration Records ****\n"); 313 num = GETCRNUM(meta); 314 for (j = 0; j < num; j++) { 315 vdc = GETVDCPTR(meta, j); 316 val = GET32D(meta, vdc->Signature); 317 switch (val) { 318 case DDF_VDCR_SIGNATURE: 319 printf("** Virtual Disk Configuration **\n"); 320 printf("VD_GUID "); 321 print_guid(vdc->VD_GUID); 322 printf("\n"); 323 printf("Timestamp 0x%08x\n", 324 GET32D(meta, vdc->Timestamp)); 325 printf("Sequence_Number 0x%08x\n", 326 GET32D(meta, vdc->Sequence_Number)); 327 printf("Primary_Element_Count %u\n", 328 GET16D(meta, vdc->Primary_Element_Count)); 329 printf("Stripe_Size %u\n", 330 GET8D(meta, vdc->Stripe_Size)); 331 printf("Primary_RAID_Level 0x%02x\n", 332 GET8D(meta, vdc->Primary_RAID_Level)); 333 printf("RLQ 0x%02x\n", 334 GET8D(meta, vdc->RLQ)); 335 printf("Secondary_Element_Count %u\n", 336 GET8D(meta, vdc->Secondary_Element_Count)); 337 printf("Secondary_Element_Seq %u\n", 338 GET8D(meta, vdc->Secondary_Element_Seq)); 339 printf("Secondary_RAID_Level 0x%02x\n", 340 GET8D(meta, vdc->Secondary_RAID_Level)); 341 printf("Block_Count %ju\n", 342 GET64D(meta, vdc->Block_Count)); 343 printf("VD_Size %ju\n", 344 GET64D(meta, vdc->VD_Size)); 345 printf("Block_Size %u\n", 346 GET16D(meta, vdc->Block_Size)); 347 printf("Rotate_Parity_count %u\n", 348 GET8D(meta, vdc->Rotate_Parity_count)); 349 printf("Associated_Spare_Disks"); 350 for (i = 0; i < 8; i++) { 351 if (GET32D(meta, vdc->Associated_Spares[i]) != 0xffffffff) 352 printf(" 0x%08x", GET32D(meta, vdc->Associated_Spares[i])); 353 } 354 printf("\n"); 355 printf("Cache_Flags %016jx\n", 356 GET64D(meta, vdc->Cache_Flags)); 357 printf("BG_Rate %u\n", 358 GET8D(meta, vdc->BG_Rate)); 359 printf("MDF_Parity_Disks %u\n", 360 GET8D(meta, vdc->MDF_Parity_Disks)); 361 printf("MDF_Parity_Generator_Polynomial 0x%04x\n", 362 GET16D(meta, vdc->MDF_Parity_Generator_Polynomial)); 363 printf("MDF_Constant_Generation_Method 0x%02x\n", 364 GET8D(meta, vdc->MDF_Constant_Generation_Method)); 365 printf("Physical_Disks "); 366 num2 = GET16D(meta, vdc->Primary_Element_Count); 367 val2 = (uint64_t *)&(vdc->Physical_Disk_Sequence[GET16(meta, hdr->Max_Primary_Element_Entries)]); 368 for (i = 0; i < num2; i++) 369 printf(" 0x%08x @ %ju", 370 GET32D(meta, vdc->Physical_Disk_Sequence[i]), 371 GET64P(meta, val2 + i)); 372 printf("\n"); 373 break; 374 case DDF_VUCR_SIGNATURE: 375 printf("** Vendor Unique Configuration **\n"); 376 vuc = (struct ddf_vuc_record *)vdc; 377 printf("VD_GUID "); 378 print_guid(vuc->VD_GUID); 379 printf("\n"); 380 break; 381 case DDF_SA_SIGNATURE: 382 printf("** Spare Assignment Configuration **\n"); 383 sa = (struct ddf_sa_record *)vdc; 384 printf("Timestamp 0x%08x\n", 385 GET32D(meta, sa->Timestamp)); 386 printf("Spare_Type 0x%02x\n", 387 GET8D(meta, sa->Spare_Type)); 388 printf("Populated_SAEs %u\n", 389 GET16D(meta, sa->Populated_SAEs)); 390 printf("MAX_SAE_Supported %u\n", 391 GET16D(meta, sa->MAX_SAE_Supported)); 392 for (i = 0; i < GET16D(meta, sa->Populated_SAEs); i++) { 393 if (isff(sa->entry[i].VD_GUID, 24)) 394 continue; 395 printf("VD_GUID "); 396 for (k = 0; k < 24; k++) 397 printf("%02x", sa->entry[i].VD_GUID[k]); 398 printf("\n"); 399 printf("Secondary_Element %u\n", 400 GET16D(meta, sa->entry[i].Secondary_Element)); 401 } 402 break; 403 case 0x00000000: 404 case 0xFFFFFFFF: 405 break; 406 default: 407 printf("Unknown configuration signature %08x\n", val); 408 break; 409 } 410 } 411 printf("**** Physical Disk Data ****\n"); 412 printf("PD_GUID "); 413 print_guid(meta->pdd->PD_GUID); 414 printf("\n"); 415 printf("PD_Reference 0x%08x\n", 416 GET32(meta, pdd->PD_Reference)); 417 printf("Forced_Ref_Flag 0x%02x\n", 418 GET8(meta, pdd->Forced_Ref_Flag)); 419 printf("Forced_PD_GUID_Flag 0x%02x\n", 420 GET8(meta, pdd->Forced_PD_GUID_Flag)); 421} 422 423static int 424ddf_meta_find_pd(struct ddf_meta *meta, uint8_t *GUID, uint32_t PD_Reference) 425{ 426 int i; 427 428 for (i = 0; i < GET16(meta, pdr->Populated_PDEs); i++) { 429 if (GUID != NULL) { 430 if (memcmp(meta->pdr->entry[i].PD_GUID, GUID, 24) == 0) 431 return (i); 432 } else if (PD_Reference != 0xffffffff) { 433 if (GET32(meta, pdr->entry[i].PD_Reference) == PD_Reference) 434 return (i); 435 } else 436 if (isff(meta->pdr->entry[i].PD_GUID, 24)) 437 return (i); 438 } 439 if (GUID == NULL && PD_Reference == 0xffffffff) { 440 if (i >= GET16(meta, pdr->Max_PDE_Supported)) 441 return (-1); 442 SET16(meta, pdr->Populated_PDEs, i + 1); 443 return (i); 444 } 445 return (-1); 446} 447 448static int 449ddf_meta_find_vd(struct ddf_meta *meta, uint8_t *GUID) 450{ 451 int i; 452 453 for (i = 0; i < GET16(meta, vdr->Populated_VDEs); i++) { 454 if (GUID != NULL) { 455 if (memcmp(meta->vdr->entry[i].VD_GUID, GUID, 24) == 0) 456 return (i); 457 } else 458 if (isff(meta->vdr->entry[i].VD_GUID, 24)) 459 return (i); 460 } 461 if (GUID == NULL) { 462 if (i >= GET16(meta, vdr->Max_VDE_Supported)) 463 return (-1); 464 SET16(meta, vdr->Populated_VDEs, i + 1); 465 return (i); 466 } 467 return (-1); 468} 469 470static struct ddf_vdc_record * 471ddf_meta_find_vdc(struct ddf_meta *meta, uint8_t *GUID) 472{ 473 struct ddf_vdc_record *vdc; 474 int i, num; 475 476 num = GETCRNUM(meta); 477 for (i = 0; i < num; i++) { 478 vdc = GETVDCPTR(meta, i); 479 if (GUID != NULL) { 480 if (GET32D(meta, vdc->Signature) == DDF_VDCR_SIGNATURE && 481 memcmp(vdc->VD_GUID, GUID, 24) == 0) 482 return (vdc); 483 } else 484 if (GET32D(meta, vdc->Signature) == 0xffffffff || 485 GET32D(meta, vdc->Signature) == 0) 486 return (vdc); 487 } 488 return (NULL); 489} 490 491static int 492ddf_meta_count_vdc(struct ddf_meta *meta, uint8_t *GUID) 493{ 494 struct ddf_vdc_record *vdc; 495 int i, num, cnt; 496 497 cnt = 0; 498 num = GETCRNUM(meta); 499 for (i = 0; i < num; i++) { 500 vdc = GETVDCPTR(meta, i); 501 if (GET32D(meta, vdc->Signature) != DDF_VDCR_SIGNATURE) 502 continue; 503 if (GUID == NULL || memcmp(vdc->VD_GUID, GUID, 24) == 0) 504 cnt++; 505 } 506 return (cnt); 507} 508 509static int 510ddf_meta_find_disk(struct ddf_vol_meta *vmeta, uint32_t PD_Reference, 511 int *bvdp, int *posp) 512{ 513 int i, bvd, pos; 514 515 i = 0; 516 for (bvd = 0; bvd < GET16(vmeta, vdc->Secondary_Element_Count); bvd++) { 517 if (vmeta->bvdc[bvd] == NULL) { 518 i += GET16(vmeta, vdc->Primary_Element_Count); // XXX 519 continue; 520 } 521 for (pos = 0; pos < GET16(vmeta, bvdc[bvd]->Primary_Element_Count); 522 pos++, i++) { 523 if (GET32(vmeta, bvdc[bvd]->Physical_Disk_Sequence[pos]) == 524 PD_Reference) { 525 if (bvdp != NULL) 526 *bvdp = bvd; 527 if (posp != NULL) 528 *posp = pos; 529 return (i); 530 } 531 } 532 } 533 return (-1); 534} 535 536static struct ddf_sa_record * 537ddf_meta_find_sa(struct ddf_meta *meta, int create) 538{ 539 struct ddf_sa_record *sa; 540 int i, num; 541 542 num = GETCRNUM(meta); 543 for (i = 0; i < num; i++) { 544 sa = GETSAPTR(meta, i); 545 if (GET32D(meta, sa->Signature) == DDF_SA_SIGNATURE) 546 return (sa); 547 } 548 if (create) { 549 for (i = 0; i < num; i++) { 550 sa = GETSAPTR(meta, i); 551 if (GET32D(meta, sa->Signature) == 0xffffffff || 552 GET32D(meta, sa->Signature) == 0) 553 return (sa); 554 } 555 } 556 return (NULL); 557} 558 559static void 560ddf_meta_create(struct g_raid_disk *disk, struct ddf_meta *sample) 561{ 562 struct timespec ts; 563 struct clocktime ct; 564 struct g_raid_md_ddf_perdisk *pd; 565 struct ddf_meta *meta; 566 struct ddf_pd_entry *pde; 567 off_t anchorlba; 568 u_int ss, pos, size; 569 int len, error; 570 char serial_buffer[24]; 571 572 if (sample->hdr == NULL) 573 sample = NULL; 574 575 pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data; 576 meta = &pd->pd_meta; 577 ss = disk->d_consumer->provider->sectorsize; 578 anchorlba = disk->d_consumer->provider->mediasize / ss - 1; 579 580 meta->sectorsize = ss; 581 meta->bigendian = sample ? sample->bigendian : 0; 582 getnanotime(&ts); 583 clock_ts_to_ct(&ts, &ct); 584 585 /* Header */ 586 meta->hdr = malloc(ss, M_MD_DDF, M_WAITOK); 587 memset(meta->hdr, 0xff, ss); 588 if (sample) { 589 memcpy(meta->hdr, sample->hdr, sizeof(struct ddf_header)); 590 if (ss != sample->sectorsize) { 591 SET32(meta, hdr->WorkSpace_Length, 592 (GET32(sample, hdr->WorkSpace_Length) * 593 sample->sectorsize + ss - 1) / ss); 594 SET16(meta, hdr->Configuration_Record_Length, 595 (GET16(sample, hdr->Configuration_Record_Length) * 596 sample->sectorsize + ss - 1) / ss); 597 SET32(meta, hdr->cd_length, 598 (GET32(sample, hdr->cd_length) * 599 sample->sectorsize + ss - 1) / ss); 600 SET32(meta, hdr->pdr_length, 601 (GET32(sample, hdr->pdr_length) * 602 sample->sectorsize + ss - 1) / ss); 603 SET32(meta, hdr->vdr_length, 604 (GET32(sample, hdr->vdr_length) * 605 sample->sectorsize + ss - 1) / ss); 606 SET32(meta, hdr->cr_length, 607 (GET32(sample, hdr->cr_length) * 608 sample->sectorsize + ss - 1) / ss); 609 SET32(meta, hdr->pdd_length, 610 (GET32(sample, hdr->pdd_length) * 611 sample->sectorsize + ss - 1) / ss); 612 SET32(meta, hdr->bbmlog_length, 613 (GET32(sample, hdr->bbmlog_length) * 614 sample->sectorsize + ss - 1) / ss); 615 SET32(meta, hdr->Diagnostic_Space, 616 (GET32(sample, hdr->bbmlog_length) * 617 sample->sectorsize + ss - 1) / ss); 618 SET32(meta, hdr->Vendor_Specific_Logs, 619 (GET32(sample, hdr->bbmlog_length) * 620 sample->sectorsize + ss - 1) / ss); 621 } 622 } else { 623 SET32(meta, hdr->Signature, DDF_HEADER_SIGNATURE); 624 snprintf(meta->hdr->DDF_Header_GUID, 25, "FreeBSD %08x%08x", 625 (u_int)(ts.tv_sec - DECADE), arc4random()); 626 memcpy(meta->hdr->DDF_rev, "02.00.00", 8); 627 SET32(meta, hdr->TimeStamp, (ts.tv_sec - DECADE)); 628 SET32(meta, hdr->WorkSpace_Length, 16 * 1024 * 1024 / ss); 629 SET16(meta, hdr->Max_PD_Entries, DDF_MAX_DISKS - 1); 630 SET16(meta, hdr->Max_VD_Entries, DDF_MAX_VDISKS); 631 SET16(meta, hdr->Max_Partitions, DDF_MAX_PARTITIONS); 632 SET16(meta, hdr->Max_Primary_Element_Entries, DDF_MAX_DISKS); 633 SET16(meta, hdr->Configuration_Record_Length, 634 (sizeof(struct ddf_vdc_record) + 635 (4 + 8) * GET16(meta, hdr->Max_Primary_Element_Entries) + 636 ss - 1) / ss); 637 SET32(meta, hdr->cd_length, 638 (sizeof(struct ddf_cd_record) + ss - 1) / ss); 639 SET32(meta, hdr->pdr_length, 640 (sizeof(struct ddf_pd_record) + 641 sizeof(struct ddf_pd_entry) * 642 GET16(meta, hdr->Max_PD_Entries) + ss - 1) / ss); 643 SET32(meta, hdr->vdr_length, 644 (sizeof(struct ddf_vd_record) + 645 sizeof(struct ddf_vd_entry) * 646 GET16(meta, hdr->Max_VD_Entries) + ss - 1) / ss); 647 SET32(meta, hdr->cr_length, 648 GET16(meta, hdr->Configuration_Record_Length) * 649 (GET16(meta, hdr->Max_Partitions) + 1)); 650 SET32(meta, hdr->pdd_length, 651 (sizeof(struct ddf_pdd_record) + ss - 1) / ss); 652 SET32(meta, hdr->bbmlog_length, 0); 653 SET32(meta, hdr->Diagnostic_Space_Length, 0); 654 SET32(meta, hdr->Vendor_Specific_Logs_Length, 0); 655 } 656 pos = 1; 657 SET32(meta, hdr->cd_section, pos); 658 pos += GET32(meta, hdr->cd_length); 659 SET32(meta, hdr->pdr_section, pos); 660 pos += GET32(meta, hdr->pdr_length); 661 SET32(meta, hdr->vdr_section, pos); 662 pos += GET32(meta, hdr->vdr_length); 663 SET32(meta, hdr->cr_section, pos); 664 pos += GET32(meta, hdr->cr_length); 665 SET32(meta, hdr->pdd_section, pos); 666 pos += GET32(meta, hdr->pdd_length); 667 SET32(meta, hdr->bbmlog_section, 668 GET32(meta, hdr->bbmlog_length) != 0 ? pos : 0xffffffff); 669 pos += GET32(meta, hdr->bbmlog_length); 670 SET32(meta, hdr->Diagnostic_Space, 671 GET32(meta, hdr->Diagnostic_Space_Length) != 0 ? pos : 0xffffffff); 672 pos += GET32(meta, hdr->Diagnostic_Space_Length); 673 SET32(meta, hdr->Vendor_Specific_Logs, 674 GET32(meta, hdr->Vendor_Specific_Logs_Length) != 0 ? pos : 0xffffffff); 675 pos += min(GET32(meta, hdr->Vendor_Specific_Logs_Length), 1); 676 SET64(meta, hdr->Primary_Header_LBA, 677 anchorlba - pos); 678 SET64(meta, hdr->Secondary_Header_LBA, 679 0xffffffffffffffffULL); 680 SET64(meta, hdr->WorkSpace_LBA, 681 anchorlba + 1 - 32 * 1024 * 1024 / ss); 682 683 /* Controller Data */ 684 size = GET32(meta, hdr->cd_length) * ss; 685 meta->cdr = malloc(size, M_MD_DDF, M_WAITOK); 686 memset(meta->cdr, 0xff, size); 687 SET32(meta, cdr->Signature, DDF_CONTROLLER_DATA_SIGNATURE); 688 memcpy(meta->cdr->Controller_GUID, "FreeBSD GEOM RAID SERIAL", 24); 689 memcpy(meta->cdr->Product_ID, "FreeBSD GEOMRAID", 16); 690 691 /* Physical Drive Records. */ 692 size = GET32(meta, hdr->pdr_length) * ss; 693 meta->pdr = malloc(size, M_MD_DDF, M_WAITOK); 694 memset(meta->pdr, 0xff, size); 695 SET32(meta, pdr->Signature, DDF_PDR_SIGNATURE); 696 SET16(meta, pdr->Populated_PDEs, 1); 697 SET16(meta, pdr->Max_PDE_Supported, 698 GET16(meta, hdr->Max_PD_Entries)); 699 700 pde = &meta->pdr->entry[0]; 701 len = sizeof(serial_buffer); 702 error = g_io_getattr("GEOM::ident", disk->d_consumer, &len, serial_buffer); 703 if (error == 0 && (len = strlen (serial_buffer)) >= 6 && len <= 20) 704 snprintf(pde->PD_GUID, 25, "DISK%20s", serial_buffer); 705 else 706 snprintf(pde->PD_GUID, 25, "DISK%04d%02d%02d%08x%04x", 707 ct.year, ct.mon, ct.day, 708 arc4random(), arc4random() & 0xffff); 709 SET32D(meta, pde->PD_Reference, arc4random()); 710 SET16D(meta, pde->PD_Type, DDF_PDE_GUID_FORCE); 711 SET16D(meta, pde->PD_State, 0); 712 SET64D(meta, pde->Configured_Size, 713 anchorlba + 1 - 32 * 1024 * 1024 / ss); 714 SET16D(meta, pde->Block_Size, ss); 715 716 /* Virtual Drive Records. */ 717 size = GET32(meta, hdr->vdr_length) * ss; 718 meta->vdr = malloc(size, M_MD_DDF, M_WAITOK); 719 memset(meta->vdr, 0xff, size); 720 SET32(meta, vdr->Signature, DDF_VD_RECORD_SIGNATURE); 721 SET32(meta, vdr->Populated_VDEs, 0); 722 SET16(meta, vdr->Max_VDE_Supported, 723 GET16(meta, hdr->Max_VD_Entries)); 724 725 /* Configuration Records. */ 726 size = GET32(meta, hdr->cr_length) * ss; 727 meta->cr = malloc(size, M_MD_DDF, M_WAITOK); 728 memset(meta->cr, 0xff, size); 729 730 /* Physical Disk Data. */ 731 size = GET32(meta, hdr->pdd_length) * ss; 732 meta->pdd = malloc(size, M_MD_DDF, M_WAITOK); 733 memset(meta->pdd, 0xff, size); 734 SET32(meta, pdd->Signature, DDF_PDD_SIGNATURE); 735 memcpy(meta->pdd->PD_GUID, pde->PD_GUID, 24); 736 SET32(meta, pdd->PD_Reference, GET32D(meta, pde->PD_Reference)); 737 SET8(meta, pdd->Forced_Ref_Flag, DDF_PDD_FORCED_REF); 738 SET8(meta, pdd->Forced_PD_GUID_Flag, DDF_PDD_FORCED_GUID); 739 740 /* Bad Block Management Log. */ 741 if (GET32(meta, hdr->bbmlog_length) != 0) { 742 size = GET32(meta, hdr->bbmlog_length) * ss; 743 meta->bbm = malloc(size, M_MD_DDF, M_WAITOK); 744 memset(meta->bbm, 0xff, size); 745 SET32(meta, bbm->Signature, DDF_BBML_SIGNATURE); 746 SET32(meta, bbm->Entry_Count, 0); 747 SET32(meta, bbm->Spare_Block_Count, 0); 748 } 749} 750 751static void 752ddf_meta_copy(struct ddf_meta *dst, struct ddf_meta *src) 753{ 754 struct ddf_header *hdr; 755 u_int ss; 756 757 hdr = src->hdr; 758 dst->bigendian = src->bigendian; 759 ss = dst->sectorsize = src->sectorsize; 760 dst->hdr = malloc(ss, M_MD_DDF, M_WAITOK); 761 memcpy(dst->hdr, src->hdr, ss); 762 dst->cdr = malloc(GET32(src, hdr->cd_length) * ss, M_MD_DDF, M_WAITOK); 763 memcpy(dst->cdr, src->cdr, GET32(src, hdr->cd_length) * ss); 764 dst->pdr = malloc(GET32(src, hdr->pdr_length) * ss, M_MD_DDF, M_WAITOK); 765 memcpy(dst->pdr, src->pdr, GET32(src, hdr->pdr_length) * ss); 766 dst->vdr = malloc(GET32(src, hdr->vdr_length) * ss, M_MD_DDF, M_WAITOK); 767 memcpy(dst->vdr, src->vdr, GET32(src, hdr->vdr_length) * ss); 768 dst->cr = malloc(GET32(src, hdr->cr_length) * ss, M_MD_DDF, M_WAITOK); 769 memcpy(dst->cr, src->cr, GET32(src, hdr->cr_length) * ss); 770 dst->pdd = malloc(GET32(src, hdr->pdd_length) * ss, M_MD_DDF, M_WAITOK); 771 memcpy(dst->pdd, src->pdd, GET32(src, hdr->pdd_length) * ss); 772 if (src->bbm != NULL) { 773 dst->bbm = malloc(GET32(src, hdr->bbmlog_length) * ss, M_MD_DDF, M_WAITOK); 774 memcpy(dst->bbm, src->bbm, GET32(src, hdr->bbmlog_length) * ss); 775 } 776} 777 778static void 779ddf_meta_update(struct ddf_meta *meta, struct ddf_meta *src) 780{ 781 struct ddf_pd_entry *pde, *spde; 782 int i, j; 783 784 for (i = 0; i < GET16(src, pdr->Populated_PDEs); i++) { 785 spde = &src->pdr->entry[i]; 786 if (isff(spde->PD_GUID, 24)) 787 continue; 788 j = ddf_meta_find_pd(meta, NULL, 789 src->pdr->entry[i].PD_Reference); 790 if (j < 0) { 791 j = ddf_meta_find_pd(meta, NULL, 0xffffffff); 792 pde = &meta->pdr->entry[j]; 793 memcpy(pde, spde, sizeof(*pde)); 794 } else { 795 pde = &meta->pdr->entry[j]; 796 SET16D(meta, pde->PD_State, 797 GET16D(meta, pde->PD_State) | 798 GET16D(src, pde->PD_State)); 799 } 800 } 801} 802 803static void 804ddf_meta_free(struct ddf_meta *meta) 805{ 806 807 if (meta->hdr != NULL) { 808 free(meta->hdr, M_MD_DDF); 809 meta->hdr = NULL; 810 } 811 if (meta->cdr != NULL) { 812 free(meta->cdr, M_MD_DDF); 813 meta->cdr = NULL; 814 } 815 if (meta->pdr != NULL) { 816 free(meta->pdr, M_MD_DDF); 817 meta->pdr = NULL; 818 } 819 if (meta->vdr != NULL) { 820 free(meta->vdr, M_MD_DDF); 821 meta->vdr = NULL; 822 } 823 if (meta->cr != NULL) { 824 free(meta->cr, M_MD_DDF); 825 meta->cr = NULL; 826 } 827 if (meta->pdd != NULL) { 828 free(meta->pdd, M_MD_DDF); 829 meta->pdd = NULL; 830 } 831 if (meta->bbm != NULL) { 832 free(meta->bbm, M_MD_DDF); 833 meta->bbm = NULL; 834 } 835} 836 837static void 838ddf_vol_meta_create(struct ddf_vol_meta *meta, struct ddf_meta *sample) 839{ 840 struct timespec ts; 841 struct clocktime ct; 842 struct ddf_header *hdr; 843 u_int ss, size; 844 845 hdr = sample->hdr; 846 meta->bigendian = sample->bigendian; 847 ss = meta->sectorsize = sample->sectorsize; 848 meta->hdr = malloc(ss, M_MD_DDF, M_WAITOK); 849 memcpy(meta->hdr, sample->hdr, ss); 850 meta->cdr = malloc(GET32(sample, hdr->cd_length) * ss, M_MD_DDF, M_WAITOK); 851 memcpy(meta->cdr, sample->cdr, GET32(sample, hdr->cd_length) * ss); 852 meta->vde = malloc(sizeof(struct ddf_vd_entry), M_MD_DDF, M_WAITOK); 853 memset(meta->vde, 0xff, sizeof(struct ddf_vd_entry)); 854 getnanotime(&ts); 855 clock_ts_to_ct(&ts, &ct); 856 snprintf(meta->vde->VD_GUID, 25, "FreeBSD%04d%02d%02d%08x%01x", 857 ct.year, ct.mon, ct.day, 858 arc4random(), arc4random() & 0xf); 859 size = GET16(sample, hdr->Configuration_Record_Length) * ss; 860 meta->vdc = malloc(size, M_MD_DDF, M_WAITOK); 861 memset(meta->vdc, 0xff, size); 862 SET32(meta, vdc->Signature, DDF_VDCR_SIGNATURE); 863 memcpy(meta->vdc->VD_GUID, meta->vde->VD_GUID, 24); 864 SET32(meta, vdc->Sequence_Number, 0); 865} 866 867static void 868ddf_vol_meta_update(struct ddf_vol_meta *dst, struct ddf_meta *src, 869 uint8_t *GUID, int started) 870{ 871 struct ddf_header *hdr; 872 struct ddf_vd_entry *vde; 873 struct ddf_vdc_record *vdc; 874 int vnew, bvnew, bvd, size; 875 u_int ss; 876 877 hdr = src->hdr; 878 vde = &src->vdr->entry[ddf_meta_find_vd(src, GUID)]; 879 vdc = ddf_meta_find_vdc(src, GUID); 880 bvd = GET8D(src, vdc->Secondary_Element_Seq); 881 size = GET16(src, hdr->Configuration_Record_Length) * src->sectorsize; 882 883 if (dst->vdc == NULL || 884 (!started && ((int32_t)(GET32D(src, vdc->Sequence_Number) - 885 GET32(dst, vdc->Sequence_Number))) > 0)) 886 vnew = 1; 887 else 888 vnew = 0; 889 890 if (dst->bvdc[bvd] == NULL || 891 (!started && ((int32_t)(GET32D(src, vdc->Sequence_Number) - 892 GET32(dst, bvdc[bvd]->Sequence_Number))) > 0)) 893 bvnew = 1; 894 else 895 bvnew = 0; 896 897 if (vnew) { 898 dst->bigendian = src->bigendian; 899 ss = dst->sectorsize = src->sectorsize; 900 if (dst->hdr != NULL) 901 free(dst->hdr, M_MD_DDF); 902 dst->hdr = malloc(ss, M_MD_DDF, M_WAITOK); 903 memcpy(dst->hdr, src->hdr, ss); 904 if (dst->cdr != NULL) 905 free(dst->cdr, M_MD_DDF); 906 dst->cdr = malloc(GET32(src, hdr->cd_length) * ss, M_MD_DDF, M_WAITOK); 907 memcpy(dst->cdr, src->cdr, GET32(src, hdr->cd_length) * ss); 908 if (dst->vde != NULL) 909 free(dst->vde, M_MD_DDF); 910 dst->vde = malloc(sizeof(struct ddf_vd_entry), M_MD_DDF, M_WAITOK); 911 memcpy(dst->vde, vde, sizeof(struct ddf_vd_entry)); 912 if (dst->vdc != NULL) 913 free(dst->vdc, M_MD_DDF); 914 dst->vdc = malloc(size, M_MD_DDF, M_WAITOK); 915 memcpy(dst->vdc, vdc, size); 916 } 917 if (bvnew) { 918 if (dst->bvdc[bvd] != NULL) 919 free(dst->bvdc[bvd], M_MD_DDF); 920 dst->bvdc[bvd] = malloc(size, M_MD_DDF, M_WAITOK); 921 memcpy(dst->bvdc[bvd], vdc, size); 922 } 923} 924 925static void 926ddf_vol_meta_free(struct ddf_vol_meta *meta) 927{ 928 int i; 929 930 if (meta->hdr != NULL) { 931 free(meta->hdr, M_MD_DDF); 932 meta->hdr = NULL; 933 } 934 if (meta->cdr != NULL) { 935 free(meta->cdr, M_MD_DDF); 936 meta->cdr = NULL; 937 } 938 if (meta->vde != NULL) { 939 free(meta->vde, M_MD_DDF); 940 meta->vde = NULL; 941 } 942 if (meta->vdc != NULL) { 943 free(meta->vdc, M_MD_DDF); 944 meta->vdc = NULL; 945 } 946 for (i = 0; i < DDF_MAX_DISKS_HARD; i++) { 947 if (meta->bvdc[i] != NULL) { 948 free(meta->bvdc[i], M_MD_DDF); 949 meta->bvdc[i] = NULL; 950 } 951 } 952} 953 954static int 955ddf_meta_unused_range(struct ddf_meta *meta, off_t *off, off_t *size) 956{ 957 struct ddf_vdc_record *vdc; 958 off_t beg[32], end[32], beg1, end1; 959 uint64_t *offp; 960 int i, j, n, num, pos; 961 uint32_t ref; 962 963 *off = 0; 964 *size = 0; 965 ref = GET32(meta, pdd->PD_Reference); 966 pos = ddf_meta_find_pd(meta, NULL, ref); 967 beg[0] = 0; 968 end[0] = GET64(meta, pdr->entry[pos].Configured_Size); 969 n = 1; 970 num = GETCRNUM(meta); 971 for (i = 0; i < num; i++) { 972 vdc = GETVDCPTR(meta, i); 973 if (GET32D(meta, vdc->Signature) != DDF_VDCR_SIGNATURE) 974 continue; 975 for (pos = 0; pos < GET16D(meta, vdc->Primary_Element_Count); pos++) 976 if (GET32D(meta, vdc->Physical_Disk_Sequence[pos]) == ref) 977 break; 978 if (pos == GET16D(meta, vdc->Primary_Element_Count)) 979 continue; 980 offp = (uint64_t *)&(vdc->Physical_Disk_Sequence[ 981 GET16(meta, hdr->Max_Primary_Element_Entries)]); 982 beg1 = GET64P(meta, offp + pos); 983 end1 = beg1 + GET64D(meta, vdc->Block_Count); 984 for (j = 0; j < n; j++) { 985 if (beg[j] >= end1 || end[j] <= beg1 ) 986 continue; 987 if (beg[j] < beg1 && end[j] > end1) { 988 beg[n] = end1; 989 end[n] = end[j]; 990 end[j] = beg1; 991 n++; 992 } else if (beg[j] < beg1) 993 end[j] = beg1; 994 else 995 beg[j] = end1; 996 } 997 } 998 for (j = 0; j < n; j++) { 999 if (end[j] - beg[j] > *size) { 1000 *off = beg[j]; 1001 *size = end[j] - beg[j]; 1002 } 1003 } 1004 return ((*size > 0) ? 1 : 0); 1005} 1006 1007static void 1008ddf_meta_get_name(struct ddf_meta *meta, int num, char *buf) 1009{ 1010 const char *b; 1011 int i; 1012 1013 b = meta->vdr->entry[num].VD_Name; 1014 for (i = 15; i >= 0; i--) 1015 if (b[i] != 0x20) 1016 break; 1017 memcpy(buf, b, i + 1); 1018 buf[i + 1] = 0; 1019} 1020 1021static void 1022ddf_meta_put_name(struct ddf_vol_meta *meta, char *buf) 1023{ 1024 int len; 1025 1026 len = min(strlen(buf), 16); 1027 memset(meta->vde->VD_Name, 0x20, 16); 1028 memcpy(meta->vde->VD_Name, buf, len); 1029} 1030 1031static int 1032ddf_meta_read(struct g_consumer *cp, struct ddf_meta *meta) 1033{ 1034 struct g_provider *pp; 1035 struct ddf_header *ahdr, *hdr; 1036 char *abuf, *buf; 1037 off_t plba, slba, lba; 1038 int error, len, i; 1039 u_int ss; 1040 uint32_t val; 1041 1042 ddf_meta_free(meta); 1043 pp = cp->provider; 1044 ss = meta->sectorsize = pp->sectorsize; 1045 /* Read anchor block. */ 1046 abuf = g_read_data(cp, pp->mediasize - ss, ss, &error); 1047 if (abuf == NULL) { 1048 G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).", 1049 pp->name, error); 1050 return (error); 1051 } 1052 ahdr = (struct ddf_header *)abuf; 1053 1054 /* Check if this is an DDF RAID struct */ 1055 if (be32dec(&ahdr->Signature) == DDF_HEADER_SIGNATURE) 1056 meta->bigendian = 1; 1057 else if (le32dec(&ahdr->Signature) == DDF_HEADER_SIGNATURE) 1058 meta->bigendian = 0; 1059 else { 1060 G_RAID_DEBUG(1, "DDF signature check failed on %s", pp->name); 1061 error = EINVAL; 1062 goto done; 1063 } 1064 if (ahdr->Header_Type != DDF_HEADER_ANCHOR) { 1065 G_RAID_DEBUG(1, "DDF header type check failed on %s", pp->name); 1066 error = EINVAL; 1067 goto done; 1068 } 1069 meta->hdr = ahdr; 1070 plba = GET64(meta, hdr->Primary_Header_LBA); 1071 slba = GET64(meta, hdr->Secondary_Header_LBA); 1072 val = GET32(meta, hdr->CRC); 1073 SET32(meta, hdr->CRC, 0xffffffff); 1074 meta->hdr = NULL; 1075 if (crc32(ahdr, ss) != val) { 1076 G_RAID_DEBUG(1, "DDF CRC mismatch on %s", pp->name); 1077 error = EINVAL; 1078 goto done; 1079 } 1080 if ((plba + 6) * ss >= pp->mediasize) { 1081 G_RAID_DEBUG(1, "DDF primary header LBA is wrong on %s", pp->name); 1082 error = EINVAL; 1083 goto done; 1084 } 1085 if (slba != -1 && (slba + 6) * ss >= pp->mediasize) { 1086 G_RAID_DEBUG(1, "DDF secondary header LBA is wrong on %s", pp->name); 1087 error = EINVAL; 1088 goto done; 1089 } 1090 lba = plba; 1091 1092doread: 1093 error = 0; 1094 ddf_meta_free(meta); 1095 1096 /* Read header block. */ 1097 buf = g_read_data(cp, lba * ss, ss, &error); 1098 if (buf == NULL) { 1099readerror: 1100 G_RAID_DEBUG(1, "DDF %s metadata read error on %s (error=%d).", 1101 (lba == plba) ? "primary" : "secondary", pp->name, error); 1102 if (lba == plba && slba != -1) { 1103 lba = slba; 1104 goto doread; 1105 } 1106 G_RAID_DEBUG(1, "DDF metadata read error on %s.", pp->name); 1107 goto done; 1108 } 1109 meta->hdr = malloc(ss, M_MD_DDF, M_WAITOK); 1110 memcpy(meta->hdr, buf, ss); 1111 g_free(buf); 1112 hdr = meta->hdr; 1113 val = GET32(meta, hdr->CRC); 1114 SET32(meta, hdr->CRC, 0xffffffff); 1115 if (hdr->Signature != ahdr->Signature || 1116 crc32(meta->hdr, ss) != val || 1117 memcmp(hdr->DDF_Header_GUID, ahdr->DDF_Header_GUID, 24) || 1118 GET64(meta, hdr->Primary_Header_LBA) != plba || 1119 GET64(meta, hdr->Secondary_Header_LBA) != slba) { 1120hdrerror: 1121 G_RAID_DEBUG(1, "DDF %s metadata check failed on %s", 1122 (lba == plba) ? "primary" : "secondary", pp->name); 1123 if (lba == plba && slba != -1) { 1124 lba = slba; 1125 goto doread; 1126 } 1127 G_RAID_DEBUG(1, "DDF metadata check failed on %s", pp->name); 1128 error = EINVAL; 1129 goto done; 1130 } 1131 if ((lba == plba && hdr->Header_Type != DDF_HEADER_PRIMARY) || 1132 (lba == slba && hdr->Header_Type != DDF_HEADER_SECONDARY)) 1133 goto hdrerror; 1134 len = 1; 1135 len = max(len, GET32(meta, hdr->cd_section) + GET32(meta, hdr->cd_length)); 1136 len = max(len, GET32(meta, hdr->pdr_section) + GET32(meta, hdr->pdr_length)); 1137 len = max(len, GET32(meta, hdr->vdr_section) + GET32(meta, hdr->vdr_length)); 1138 len = max(len, GET32(meta, hdr->cr_section) + GET32(meta, hdr->cr_length)); 1139 len = max(len, GET32(meta, hdr->pdd_section) + GET32(meta, hdr->pdd_length)); 1140 if ((val = GET32(meta, hdr->bbmlog_section)) != 0xffffffff) 1141 len = max(len, val + GET32(meta, hdr->bbmlog_length)); 1142 if ((val = GET32(meta, hdr->Diagnostic_Space)) != 0xffffffff) 1143 len = max(len, val + GET32(meta, hdr->Diagnostic_Space_Length)); 1144 if ((val = GET32(meta, hdr->Vendor_Specific_Logs)) != 0xffffffff) 1145 len = max(len, val + GET32(meta, hdr->Vendor_Specific_Logs_Length)); 1146 if ((plba + len) * ss >= pp->mediasize) 1147 goto hdrerror; 1148 if (slba != -1 && (slba + len) * ss >= pp->mediasize) 1149 goto hdrerror; 1150 /* Workaround for Adaptec implementation. */ 1151 if (GET16(meta, hdr->Max_Primary_Element_Entries) == 0xffff) { 1152 SET16(meta, hdr->Max_Primary_Element_Entries, 1153 min(GET16(meta, hdr->Max_PD_Entries), 1154 (GET16(meta, hdr->Configuration_Record_Length) * ss - 512) / 12)); 1155 } 1156 1157 /* Read controller data. */ 1158 buf = g_read_data(cp, (lba + GET32(meta, hdr->cd_section)) * ss, 1159 GET32(meta, hdr->cd_length) * ss, &error); 1160 if (buf == NULL) 1161 goto readerror; 1162 meta->cdr = malloc(GET32(meta, hdr->cd_length) * ss, M_MD_DDF, M_WAITOK); 1163 memcpy(meta->cdr, buf, GET32(meta, hdr->cd_length) * ss); 1164 g_free(buf); 1165 if (GET32(meta, cdr->Signature) != DDF_CONTROLLER_DATA_SIGNATURE) 1166 goto hdrerror; 1167 1168 /* Read physical disk records. */ 1169 buf = g_read_data(cp, (lba + GET32(meta, hdr->pdr_section)) * ss, 1170 GET32(meta, hdr->pdr_length) * ss, &error); 1171 if (buf == NULL) 1172 goto readerror; 1173 meta->pdr = malloc(GET32(meta, hdr->pdr_length) * ss, M_MD_DDF, M_WAITOK); 1174 memcpy(meta->pdr, buf, GET32(meta, hdr->pdr_length) * ss); 1175 g_free(buf); 1176 if (GET32(meta, pdr->Signature) != DDF_PDR_SIGNATURE) 1177 goto hdrerror; 1178 1179 /* Read virtual disk records. */ 1180 buf = g_read_data(cp, (lba + GET32(meta, hdr->vdr_section)) * ss, 1181 GET32(meta, hdr->vdr_length) * ss, &error); 1182 if (buf == NULL) 1183 goto readerror; 1184 meta->vdr = malloc(GET32(meta, hdr->vdr_length) * ss, M_MD_DDF, M_WAITOK); 1185 memcpy(meta->vdr, buf, GET32(meta, hdr->vdr_length) * ss); 1186 g_free(buf); 1187 if (GET32(meta, vdr->Signature) != DDF_VD_RECORD_SIGNATURE) 1188 goto hdrerror; 1189 1190 /* Read configuration records. */ 1191 buf = g_read_data(cp, (lba + GET32(meta, hdr->cr_section)) * ss, 1192 GET32(meta, hdr->cr_length) * ss, &error); 1193 if (buf == NULL) 1194 goto readerror; 1195 meta->cr = malloc(GET32(meta, hdr->cr_length) * ss, M_MD_DDF, M_WAITOK); 1196 memcpy(meta->cr, buf, GET32(meta, hdr->cr_length) * ss); 1197 g_free(buf); 1198 1199 /* Read physical disk data. */ 1200 buf = g_read_data(cp, (lba + GET32(meta, hdr->pdd_section)) * ss, 1201 GET32(meta, hdr->pdd_length) * ss, &error); 1202 if (buf == NULL) 1203 goto readerror; 1204 meta->pdd = malloc(GET32(meta, hdr->pdd_length) * ss, M_MD_DDF, M_WAITOK); 1205 memcpy(meta->pdd, buf, GET32(meta, hdr->pdd_length) * ss); 1206 g_free(buf); 1207 if (GET32(meta, pdd->Signature) != DDF_PDD_SIGNATURE) 1208 goto hdrerror; 1209 i = ddf_meta_find_pd(meta, NULL, GET32(meta, pdd->PD_Reference)); 1210 if (i < 0) 1211 goto hdrerror; 1212 1213 /* Read BBM Log. */ 1214 if (GET32(meta, hdr->bbmlog_section) != 0xffffffff && 1215 GET32(meta, hdr->bbmlog_length) != 0) { 1216 buf = g_read_data(cp, (lba + GET32(meta, hdr->bbmlog_section)) * ss, 1217 GET32(meta, hdr->bbmlog_length) * ss, &error); 1218 if (buf == NULL) 1219 goto readerror; 1220 meta->bbm = malloc(GET32(meta, hdr->bbmlog_length) * ss, M_MD_DDF, M_WAITOK); 1221 memcpy(meta->bbm, buf, GET32(meta, hdr->bbmlog_length) * ss); 1222 g_free(buf); 1223 if (GET32(meta, bbm->Signature) != DDF_BBML_SIGNATURE) 1224 goto hdrerror; 1225 } 1226 1227done: 1228 free(abuf, M_MD_DDF); 1229 if (error != 0) 1230 ddf_meta_free(meta); 1231 return (error); 1232} 1233 1234static int 1235ddf_meta_write(struct g_consumer *cp, struct ddf_meta *meta) 1236{ 1237 struct g_provider *pp; 1238 struct ddf_vdc_record *vdc; 1239 off_t alba, plba, slba, lba; 1240 u_int ss, size; 1241 int error, i, num; 1242 1243 pp = cp->provider; 1244 ss = pp->sectorsize; 1245 lba = alba = pp->mediasize / ss - 1; 1246 plba = GET64(meta, hdr->Primary_Header_LBA); 1247 slba = GET64(meta, hdr->Secondary_Header_LBA); 1248 1249next: 1250 SET8(meta, hdr->Header_Type, (lba == alba) ? DDF_HEADER_ANCHOR : 1251 (lba == plba) ? DDF_HEADER_PRIMARY : DDF_HEADER_SECONDARY); 1252 SET32(meta, hdr->CRC, 0xffffffff); 1253 SET32(meta, hdr->CRC, crc32(meta->hdr, ss)); 1254 error = g_write_data(cp, lba * ss, meta->hdr, ss); 1255 if (error != 0) { 1256err: 1257 G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).", 1258 pp->name, error); 1259 if (lba != alba) 1260 goto done; 1261 } 1262 if (lba == alba) { 1263 lba = plba; 1264 goto next; 1265 } 1266 1267 size = GET32(meta, hdr->cd_length) * ss; 1268 SET32(meta, cdr->CRC, 0xffffffff); 1269 SET32(meta, cdr->CRC, crc32(meta->cdr, size)); 1270 error = g_write_data(cp, (lba + GET32(meta, hdr->cd_section)) * ss, 1271 meta->cdr, size); 1272 if (error != 0) 1273 goto err; 1274 1275 size = GET32(meta, hdr->pdr_length) * ss; 1276 SET32(meta, pdr->CRC, 0xffffffff); 1277 SET32(meta, pdr->CRC, crc32(meta->pdr, size)); 1278 error = g_write_data(cp, (lba + GET32(meta, hdr->pdr_section)) * ss, 1279 meta->pdr, size); 1280 if (error != 0) 1281 goto err; 1282 1283 size = GET32(meta, hdr->vdr_length) * ss; 1284 SET32(meta, vdr->CRC, 0xffffffff); 1285 SET32(meta, vdr->CRC, crc32(meta->vdr, size)); 1286 error = g_write_data(cp, (lba + GET32(meta, hdr->vdr_section)) * ss, 1287 meta->vdr, size); 1288 if (error != 0) 1289 goto err; 1290 1291 size = GET16(meta, hdr->Configuration_Record_Length) * ss; 1292 num = GETCRNUM(meta); 1293 for (i = 0; i < num; i++) { 1294 vdc = GETVDCPTR(meta, i); 1295 SET32D(meta, vdc->CRC, 0xffffffff); 1296 SET32D(meta, vdc->CRC, crc32(vdc, size)); 1297 } 1298 error = g_write_data(cp, (lba + GET32(meta, hdr->cr_section)) * ss, 1299 meta->cr, size * num); 1300 if (error != 0) 1301 goto err; 1302 1303 size = GET32(meta, hdr->pdd_length) * ss; 1304 SET32(meta, pdd->CRC, 0xffffffff); 1305 SET32(meta, pdd->CRC, crc32(meta->pdd, size)); 1306 error = g_write_data(cp, (lba + GET32(meta, hdr->pdd_section)) * ss, 1307 meta->pdd, size); 1308 if (error != 0) 1309 goto err; 1310 1311 if (GET32(meta, hdr->bbmlog_length) != 0) { 1312 size = GET32(meta, hdr->bbmlog_length) * ss; 1313 SET32(meta, bbm->CRC, 0xffffffff); 1314 SET32(meta, bbm->CRC, crc32(meta->bbm, size)); 1315 error = g_write_data(cp, 1316 (lba + GET32(meta, hdr->bbmlog_section)) * ss, 1317 meta->bbm, size); 1318 if (error != 0) 1319 goto err; 1320 } 1321 1322done: 1323 if (lba == plba && slba != -1) { 1324 lba = slba; 1325 goto next; 1326 } 1327 1328 return (error); 1329} 1330 1331static int 1332ddf_meta_erase(struct g_consumer *cp) 1333{ 1334 struct g_provider *pp; 1335 char *buf; 1336 int error; 1337 1338 pp = cp->provider; 1339 buf = malloc(pp->sectorsize, M_MD_DDF, M_WAITOK | M_ZERO); 1340 error = g_write_data(cp, pp->mediasize - pp->sectorsize, 1341 buf, pp->sectorsize); 1342 if (error != 0) { 1343 G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).", 1344 pp->name, error); 1345 } 1346 free(buf, M_MD_DDF); 1347 return (error); 1348} 1349 1350static struct g_raid_volume * 1351g_raid_md_ddf_get_volume(struct g_raid_softc *sc, uint8_t *GUID) 1352{ 1353 struct g_raid_volume *vol; 1354 struct g_raid_md_ddf_pervolume *pv; 1355 1356 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 1357 pv = vol->v_md_data; 1358 if (memcmp(pv->pv_meta.vde->VD_GUID, GUID, 24) == 0) 1359 break; 1360 } 1361 return (vol); 1362} 1363 1364static struct g_raid_disk * 1365g_raid_md_ddf_get_disk(struct g_raid_softc *sc, uint8_t *GUID, uint32_t id) 1366{ 1367 struct g_raid_disk *disk; 1368 struct g_raid_md_ddf_perdisk *pd; 1369 struct ddf_meta *meta; 1370 1371 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1372 pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data; 1373 meta = &pd->pd_meta; 1374 if (GUID != NULL) { 1375 if (memcmp(meta->pdd->PD_GUID, GUID, 24) == 0) 1376 break; 1377 } else { 1378 if (GET32(meta, pdd->PD_Reference) == id) 1379 break; 1380 } 1381 } 1382 return (disk); 1383} 1384 1385static int 1386g_raid_md_ddf_purge_volumes(struct g_raid_softc *sc) 1387{ 1388 struct g_raid_volume *vol, *tvol; 1389 struct g_raid_md_ddf_pervolume *pv; 1390 int i, res; 1391 1392 res = 0; 1393 TAILQ_FOREACH_SAFE(vol, &sc->sc_volumes, v_next, tvol) { 1394 pv = vol->v_md_data; 1395 if (vol->v_stopping) 1396 continue; 1397 for (i = 0; i < vol->v_disks_count; i++) { 1398 if (vol->v_subdisks[i].sd_state != G_RAID_SUBDISK_S_NONE) 1399 break; 1400 } 1401 if (i >= vol->v_disks_count) { 1402 g_raid_destroy_volume(vol); 1403 res = 1; 1404 } 1405 } 1406 return (res); 1407} 1408 1409static int 1410g_raid_md_ddf_purge_disks(struct g_raid_softc *sc) 1411{ 1412#if 0 1413 struct g_raid_disk *disk, *tdisk; 1414 struct g_raid_volume *vol; 1415 struct g_raid_md_ddf_perdisk *pd; 1416 int i, j, res; 1417 1418 res = 0; 1419 TAILQ_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) { 1420 if (disk->d_state == G_RAID_DISK_S_SPARE) 1421 continue; 1422 pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data; 1423 1424 /* Scan for deleted volumes. */ 1425 for (i = 0; i < pd->pd_subdisks; ) { 1426 vol = g_raid_md_ddf_get_volume(sc, 1427 pd->pd_meta[i]->volume_id); 1428 if (vol != NULL && !vol->v_stopping) { 1429 i++; 1430 continue; 1431 } 1432 free(pd->pd_meta[i], M_MD_DDF); 1433 for (j = i; j < pd->pd_subdisks - 1; j++) 1434 pd->pd_meta[j] = pd->pd_meta[j + 1]; 1435 pd->pd_meta[DDF_MAX_SUBDISKS - 1] = NULL; 1436 pd->pd_subdisks--; 1437 pd->pd_updated = 1; 1438 } 1439 1440 /* If there is no metadata left - erase and delete disk. */ 1441 if (pd->pd_subdisks == 0) { 1442 ddf_meta_erase(disk->d_consumer); 1443 g_raid_destroy_disk(disk); 1444 res = 1; 1445 } 1446 } 1447 return (res); 1448#endif 1449 return (0); 1450} 1451 1452static int 1453g_raid_md_ddf_supported(int level, int qual, int disks, int force) 1454{ 1455 1456 if (disks > DDF_MAX_DISKS_HARD) 1457 return (0); 1458 switch (level) { 1459 case G_RAID_VOLUME_RL_RAID0: 1460 if (qual != G_RAID_VOLUME_RLQ_NONE) 1461 return (0); 1462 if (disks < 1) 1463 return (0); 1464 if (!force && disks < 2) 1465 return (0); 1466 break; 1467 case G_RAID_VOLUME_RL_RAID1: 1468 if (disks < 1) 1469 return (0); 1470 if (qual == G_RAID_VOLUME_RLQ_R1SM) { 1471 if (!force && disks != 2) 1472 return (0); 1473 } else if (qual == G_RAID_VOLUME_RLQ_R1MM) { 1474 if (!force && disks != 3) 1475 return (0); 1476 } else 1477 return (0); 1478 break; 1479 case G_RAID_VOLUME_RL_RAID3: 1480 if (qual != G_RAID_VOLUME_RLQ_R3P0 && 1481 qual != G_RAID_VOLUME_RLQ_R3PN) 1482 return (0); 1483 if (disks < 3) 1484 return (0); 1485 break; 1486 case G_RAID_VOLUME_RL_RAID4: 1487 if (qual != G_RAID_VOLUME_RLQ_R4P0 && 1488 qual != G_RAID_VOLUME_RLQ_R4PN) 1489 return (0); 1490 if (disks < 3) 1491 return (0); 1492 break; 1493 case G_RAID_VOLUME_RL_RAID5: 1494 if (qual != G_RAID_VOLUME_RLQ_R5RA && 1495 qual != G_RAID_VOLUME_RLQ_R5RS && 1496 qual != G_RAID_VOLUME_RLQ_R5LA && 1497 qual != G_RAID_VOLUME_RLQ_R5LS) 1498 return (0); 1499 if (disks < 3) 1500 return (0); 1501 break; 1502 case G_RAID_VOLUME_RL_RAID6: 1503 if (qual != G_RAID_VOLUME_RLQ_R6RA && 1504 qual != G_RAID_VOLUME_RLQ_R6RS && 1505 qual != G_RAID_VOLUME_RLQ_R6LA && 1506 qual != G_RAID_VOLUME_RLQ_R6LS) 1507 return (0); 1508 if (disks < 4) 1509 return (0); 1510 break; 1511 case G_RAID_VOLUME_RL_RAIDMDF: 1512 if (qual != G_RAID_VOLUME_RLQ_RMDFRA && 1513 qual != G_RAID_VOLUME_RLQ_RMDFRS && 1514 qual != G_RAID_VOLUME_RLQ_RMDFLA && 1515 qual != G_RAID_VOLUME_RLQ_RMDFLS) 1516 return (0); 1517 if (disks < 5) 1518 return (0); 1519 break; 1520 case G_RAID_VOLUME_RL_RAID1E: 1521 if (qual != G_RAID_VOLUME_RLQ_R1EA && 1522 qual != G_RAID_VOLUME_RLQ_R1EO) 1523 return (0); 1524 if (disks < 3) 1525 return (0); 1526 break; 1527 case G_RAID_VOLUME_RL_SINGLE: 1528 if (qual != G_RAID_VOLUME_RLQ_NONE) 1529 return (0); 1530 if (disks != 1) 1531 return (0); 1532 break; 1533 case G_RAID_VOLUME_RL_CONCAT: 1534 if (qual != G_RAID_VOLUME_RLQ_NONE) 1535 return (0); 1536 if (disks < 2) 1537 return (0); 1538 break; 1539 case G_RAID_VOLUME_RL_RAID5E: 1540 if (qual != G_RAID_VOLUME_RLQ_R5ERA && 1541 qual != G_RAID_VOLUME_RLQ_R5ERS && 1542 qual != G_RAID_VOLUME_RLQ_R5ELA && 1543 qual != G_RAID_VOLUME_RLQ_R5ELS) 1544 return (0); 1545 if (disks < 4) 1546 return (0); 1547 break; 1548 case G_RAID_VOLUME_RL_RAID5EE: 1549 if (qual != G_RAID_VOLUME_RLQ_R5EERA && 1550 qual != G_RAID_VOLUME_RLQ_R5EERS && 1551 qual != G_RAID_VOLUME_RLQ_R5EELA && 1552 qual != G_RAID_VOLUME_RLQ_R5EELS) 1553 return (0); 1554 if (disks < 4) 1555 return (0); 1556 break; 1557 case G_RAID_VOLUME_RL_RAID5R: 1558 if (qual != G_RAID_VOLUME_RLQ_R5RRA && 1559 qual != G_RAID_VOLUME_RLQ_R5RRS && 1560 qual != G_RAID_VOLUME_RLQ_R5RLA && 1561 qual != G_RAID_VOLUME_RLQ_R5RLS) 1562 return (0); 1563 if (disks < 3) 1564 return (0); 1565 break; 1566 default: 1567 return (0); 1568 } 1569 return (1); 1570} 1571 1572static int 1573g_raid_md_ddf_start_disk(struct g_raid_disk *disk, struct g_raid_volume *vol) 1574{ 1575 struct g_raid_softc *sc; 1576 struct g_raid_subdisk *sd; 1577 struct g_raid_md_ddf_perdisk *pd; 1578 struct g_raid_md_ddf_pervolume *pv; 1579 struct g_raid_md_ddf_object *mdi; 1580 struct ddf_vol_meta *vmeta; 1581 struct ddf_meta *pdmeta, *gmeta; 1582 struct ddf_vdc_record *vdc1; 1583 struct ddf_sa_record *sa; 1584 off_t size, eoff = 0, esize = 0; 1585 uint64_t *val2; 1586 int disk_pos, md_disk_bvd = -1, md_disk_pos = -1, md_pde_pos; 1587 int i, resurrection = 0; 1588 uint32_t reference; 1589 1590 sc = disk->d_softc; 1591 mdi = (struct g_raid_md_ddf_object *)sc->sc_md; 1592 pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data; 1593 pdmeta = &pd->pd_meta; 1594 reference = GET32(&pd->pd_meta, pdd->PD_Reference); 1595 1596 pv = vol->v_md_data; 1597 vmeta = &pv->pv_meta; 1598 gmeta = &mdi->mdio_meta; 1599 1600 /* Find disk position in metadata by it's reference. */ 1601 disk_pos = ddf_meta_find_disk(vmeta, reference, 1602 &md_disk_bvd, &md_disk_pos); 1603 md_pde_pos = ddf_meta_find_pd(gmeta, NULL, reference); 1604 1605 if (disk_pos < 0) { 1606 G_RAID_DEBUG1(1, sc, 1607 "Disk %s is not a present part of the volume %s", 1608 g_raid_get_diskname(disk), vol->v_name); 1609 1610 /* Failed stale disk is useless for us. */ 1611 if ((GET16(gmeta, pdr->entry[md_pde_pos].PD_State) & DDF_PDE_PFA) != 0) { 1612 g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE_FAILED); 1613 return (0); 1614 } 1615 1616 /* If disk has some metadata for this volume - erase. */ 1617 if ((vdc1 = ddf_meta_find_vdc(pdmeta, vmeta->vdc->VD_GUID)) != NULL) 1618 SET32D(pdmeta, vdc1->Signature, 0xffffffff); 1619 1620 /* If we are in the start process, that's all for now. */ 1621 if (!pv->pv_started) 1622 goto nofit; 1623 /* 1624 * If we have already started - try to get use of the disk. 1625 * Try to replace OFFLINE disks first, then FAILED. 1626 */ 1627 if (ddf_meta_count_vdc(&pd->pd_meta, NULL) >= 1628 GET16(&pd->pd_meta, hdr->Max_Partitions)) { 1629 G_RAID_DEBUG1(1, sc, "No free partitions on disk %s", 1630 g_raid_get_diskname(disk)); 1631 goto nofit; 1632 } 1633 ddf_meta_unused_range(&pd->pd_meta, &eoff, &esize); 1634 if (esize == 0) { 1635 G_RAID_DEBUG1(1, sc, "No free space on disk %s", 1636 g_raid_get_diskname(disk)); 1637 goto nofit; 1638 } 1639 size = INT64_MAX; 1640 for (i = 0; i < vol->v_disks_count; i++) { 1641 sd = &vol->v_subdisks[i]; 1642 if (sd->sd_state != G_RAID_SUBDISK_S_NONE) 1643 size = sd->sd_size; 1644 if (sd->sd_state <= G_RAID_SUBDISK_S_FAILED && 1645 (disk_pos < 0 || 1646 vol->v_subdisks[i].sd_state < sd->sd_state)) 1647 disk_pos = i; 1648 } 1649 if (disk_pos >= 0 && 1650 vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT && 1651 (off_t)esize * 512 < size) { 1652 G_RAID_DEBUG1(1, sc, "Disk %s free space " 1653 "is too small (%ju < %ju)", 1654 g_raid_get_diskname(disk), 1655 (off_t)esize * 512, size); 1656 disk_pos = -1; 1657 } 1658 if (disk_pos >= 0) { 1659 if (vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT) 1660 esize = size / 512; 1661 md_disk_bvd = disk_pos / GET16(vmeta, vdc->Primary_Element_Count); // XXX 1662 md_disk_pos = disk_pos % GET16(vmeta, vdc->Primary_Element_Count); // XXX 1663 } else { 1664nofit: 1665 if (disk->d_state == G_RAID_DISK_S_NONE) 1666 g_raid_change_disk_state(disk, 1667 G_RAID_DISK_S_STALE); 1668 return (0); 1669 } 1670 1671 /* 1672 * If spare is committable, delete spare record. 1673 * Othersize, mark it active and leave there. 1674 */ 1675 sa = ddf_meta_find_sa(&pd->pd_meta, 0); 1676 if (sa != NULL) { 1677 if ((GET8D(&pd->pd_meta, sa->Spare_Type) & 1678 DDF_SAR_TYPE_REVERTIBLE) == 0) { 1679 SET32D(&pd->pd_meta, sa->Signature, 0xffffffff); 1680 } else { 1681 SET8D(&pd->pd_meta, sa->Spare_Type, 1682 GET8D(&pd->pd_meta, sa->Spare_Type) | 1683 DDF_SAR_TYPE_ACTIVE); 1684 } 1685 } 1686 1687 G_RAID_DEBUG1(1, sc, "Disk %s takes pos %d in the volume %s", 1688 g_raid_get_diskname(disk), disk_pos, vol->v_name); 1689 resurrection = 1; 1690 } 1691 1692 sd = &vol->v_subdisks[disk_pos]; 1693 1694 if (resurrection && sd->sd_disk != NULL) { 1695 g_raid_change_disk_state(sd->sd_disk, 1696 G_RAID_DISK_S_STALE_FAILED); 1697 TAILQ_REMOVE(&sd->sd_disk->d_subdisks, 1698 sd, sd_next); 1699 } 1700 vol->v_subdisks[disk_pos].sd_disk = disk; 1701 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next); 1702 1703 /* Welcome the new disk. */ 1704 if (resurrection) 1705 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE); 1706 else if (GET8(gmeta, pdr->entry[md_pde_pos].PD_State) & DDF_PDE_PFA) 1707 g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED); 1708 else 1709 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE); 1710 1711 if (resurrection) { 1712 sd->sd_offset = (off_t)eoff * 512; 1713 sd->sd_size = (off_t)esize * 512; 1714 } else if (pdmeta->cr != NULL && 1715 (vdc1 = ddf_meta_find_vdc(pdmeta, vmeta->vdc->VD_GUID)) != NULL) { 1716 val2 = (uint64_t *)&(vdc1->Physical_Disk_Sequence[GET16(vmeta, hdr->Max_Primary_Element_Entries)]); 1717 sd->sd_offset = (off_t)GET64P(pdmeta, val2 + md_disk_pos) * 512; 1718 sd->sd_size = (off_t)GET64D(pdmeta, vdc1->Block_Count) * 512; 1719 } 1720 1721 if (resurrection) { 1722 /* Stale disk, almost same as new. */ 1723 g_raid_change_subdisk_state(sd, 1724 G_RAID_SUBDISK_S_NEW); 1725 } else if (GET8(gmeta, pdr->entry[md_pde_pos].PD_State) & DDF_PDE_PFA) { 1726 /* Failed disk. */ 1727 g_raid_change_subdisk_state(sd, 1728 G_RAID_SUBDISK_S_FAILED); 1729 } else if ((GET8(gmeta, pdr->entry[md_pde_pos].PD_State) & 1730 (DDF_PDE_FAILED | DDF_PDE_REBUILD)) != 0) { 1731 /* Rebuilding disk. */ 1732 g_raid_change_subdisk_state(sd, 1733 G_RAID_SUBDISK_S_REBUILD); 1734 sd->sd_rebuild_pos = 0; 1735 } else if ((GET8(vmeta, vde->VD_State) & DDF_VDE_DIRTY) != 0 || 1736 (GET8(vmeta, vde->Init_State) & DDF_VDE_INIT_MASK) != 1737 DDF_VDE_INIT_FULL) { 1738 /* Stale disk or dirty volume (unclean shutdown). */ 1739 g_raid_change_subdisk_state(sd, 1740 G_RAID_SUBDISK_S_STALE); 1741 } else { 1742 /* Up to date disk. */ 1743 g_raid_change_subdisk_state(sd, 1744 G_RAID_SUBDISK_S_ACTIVE); 1745 } 1746 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW, 1747 G_RAID_EVENT_SUBDISK); 1748 1749 return (resurrection); 1750} 1751 1752static void 1753g_raid_md_ddf_refill(struct g_raid_softc *sc) 1754{ 1755 struct g_raid_volume *vol; 1756 struct g_raid_subdisk *sd; 1757 struct g_raid_disk *disk; 1758 struct g_raid_md_object *md; 1759 struct g_raid_md_ddf_perdisk *pd; 1760 struct g_raid_md_ddf_pervolume *pv; 1761 int update, updated, i, bad; 1762 1763 md = sc->sc_md; 1764restart: 1765 updated = 0; 1766 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 1767 pv = vol->v_md_data; 1768 if (!pv->pv_started || vol->v_stopping) 1769 continue; 1770 1771 /* Search for subdisk that needs replacement. */ 1772 bad = 0; 1773 for (i = 0; i < vol->v_disks_count; i++) { 1774 sd = &vol->v_subdisks[i]; 1775 if (sd->sd_state == G_RAID_SUBDISK_S_NONE || 1776 sd->sd_state == G_RAID_SUBDISK_S_FAILED) 1777 bad = 1; 1778 } 1779 if (!bad) 1780 continue; 1781 1782 G_RAID_DEBUG1(1, sc, "Volume %s is not complete, " 1783 "trying to refill.", vol->v_name); 1784 1785 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1786 /* Skip failed. */ 1787 if (disk->d_state < G_RAID_DISK_S_SPARE) 1788 continue; 1789 /* Skip already used by this volume. */ 1790 for (i = 0; i < vol->v_disks_count; i++) { 1791 sd = &vol->v_subdisks[i]; 1792 if (sd->sd_disk == disk) 1793 break; 1794 } 1795 if (i < vol->v_disks_count) 1796 continue; 1797 1798 /* Try to use disk if it has empty extents. */ 1799 pd = disk->d_md_data; 1800 if (ddf_meta_count_vdc(&pd->pd_meta, NULL) < 1801 GET16(&pd->pd_meta, hdr->Max_Partitions)) { 1802 update = g_raid_md_ddf_start_disk(disk, vol); 1803 } else 1804 update = 0; 1805 if (update) { 1806 updated = 1; 1807 g_raid_md_write_ddf(md, vol, NULL, disk); 1808 break; 1809 } 1810 } 1811 } 1812 if (updated) 1813 goto restart; 1814} 1815 1816static void 1817g_raid_md_ddf_start(struct g_raid_volume *vol) 1818{ 1819 struct g_raid_softc *sc; 1820 struct g_raid_subdisk *sd; 1821 struct g_raid_disk *disk; 1822 struct g_raid_md_object *md; 1823 struct g_raid_md_ddf_perdisk *pd; 1824 struct g_raid_md_ddf_pervolume *pv; 1825 struct g_raid_md_ddf_object *mdi; 1826 struct ddf_vol_meta *vmeta; 1827 struct ddf_vdc_record *vdc; 1828 uint64_t *val2; 1829 int i, j, bvd; 1830 1831 sc = vol->v_softc; 1832 md = sc->sc_md; 1833 mdi = (struct g_raid_md_ddf_object *)md; 1834 pv = vol->v_md_data; 1835 vmeta = &pv->pv_meta; 1836 vdc = vmeta->vdc; 1837 1838 vol->v_raid_level = GET8(vmeta, vdc->Primary_RAID_Level); 1839 vol->v_raid_level_qualifier = GET8(vmeta, vdc->RLQ); 1840 if (GET8(vmeta, vdc->Secondary_Element_Count) > 1 && 1841 vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 && 1842 GET8(vmeta, vdc->Secondary_RAID_Level) == 0) 1843 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E; 1844 vol->v_sectorsize = GET16(vmeta, vdc->Block_Size); 1845 if (vol->v_sectorsize == 0xffff) 1846 vol->v_sectorsize = vmeta->sectorsize; 1847 vol->v_strip_size = vol->v_sectorsize << GET8(vmeta, vdc->Stripe_Size); 1848 vol->v_disks_count = GET16(vmeta, vdc->Primary_Element_Count) * 1849 GET8(vmeta, vdc->Secondary_Element_Count); 1850 vol->v_mediasize = GET64(vmeta, vdc->VD_Size) * vol->v_sectorsize; 1851 for (i = 0, j = 0, bvd = 0; i < vol->v_disks_count; i++, j++) { 1852 if (j == GET16(vmeta, vdc->Primary_Element_Count)) { 1853 j = 0; 1854 bvd++; 1855 } 1856 sd = &vol->v_subdisks[i]; 1857 if (vmeta->bvdc[bvd] == NULL) { 1858 sd->sd_offset = 0; 1859 sd->sd_size = GET64(vmeta, vdc->Block_Count) * 1860 vol->v_sectorsize; 1861 continue; 1862 } 1863 val2 = (uint64_t *)&(vmeta->bvdc[bvd]->Physical_Disk_Sequence[ 1864 GET16(vmeta, hdr->Max_Primary_Element_Entries)]); 1865 sd->sd_offset = GET64P(vmeta, val2 + j) * vol->v_sectorsize; 1866 sd->sd_size = GET64(vmeta, bvdc[bvd]->Block_Count) * 1867 vol->v_sectorsize; 1868 } 1869 g_raid_start_volume(vol); 1870 1871 /* Make all disks found till the moment take their places. */ 1872 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1873 pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data; 1874 if (ddf_meta_find_vdc(&pd->pd_meta, vmeta->vdc->VD_GUID) != NULL) 1875 g_raid_md_ddf_start_disk(disk, vol); 1876 } 1877 1878 pv->pv_started = 1; 1879 mdi->mdio_starting--; 1880 callout_stop(&pv->pv_start_co); 1881 G_RAID_DEBUG1(0, sc, "Volume started."); 1882 g_raid_md_write_ddf(md, vol, NULL, NULL); 1883 1884 /* Pickup any STALE/SPARE disks to refill array if needed. */ 1885 g_raid_md_ddf_refill(sc); 1886 1887 g_raid_event_send(vol, G_RAID_VOLUME_E_START, G_RAID_EVENT_VOLUME); 1888} 1889 1890static void 1891g_raid_ddf_go(void *arg) 1892{ 1893 struct g_raid_volume *vol; 1894 struct g_raid_softc *sc; 1895 struct g_raid_md_ddf_pervolume *pv; 1896 1897 vol = arg; 1898 pv = vol->v_md_data; 1899 sc = vol->v_softc; 1900 if (!pv->pv_started) { 1901 G_RAID_DEBUG1(0, sc, "Force volume start due to timeout."); 1902 g_raid_event_send(vol, G_RAID_VOLUME_E_STARTMD, 1903 G_RAID_EVENT_VOLUME); 1904 } 1905} 1906 1907static void 1908g_raid_md_ddf_new_disk(struct g_raid_disk *disk) 1909{ 1910 struct g_raid_softc *sc; 1911 struct g_raid_md_object *md; 1912 struct g_raid_md_ddf_perdisk *pd; 1913 struct g_raid_md_ddf_pervolume *pv; 1914 struct g_raid_md_ddf_object *mdi; 1915 struct g_raid_volume *vol; 1916 struct ddf_meta *pdmeta; 1917 struct ddf_vol_meta *vmeta; 1918 struct ddf_vdc_record *vdc; 1919 struct ddf_vd_entry *vde; 1920 int i, j, k, num, have, need, cnt, spare; 1921 uint32_t val; 1922 char buf[17]; 1923 1924 sc = disk->d_softc; 1925 md = sc->sc_md; 1926 mdi = (struct g_raid_md_ddf_object *)md; 1927 pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data; 1928 pdmeta = &pd->pd_meta; 1929 spare = -1; 1930 1931 if (mdi->mdio_meta.hdr == NULL) 1932 ddf_meta_copy(&mdi->mdio_meta, pdmeta); 1933 else 1934 ddf_meta_update(&mdi->mdio_meta, pdmeta); 1935 1936 num = GETCRNUM(pdmeta); 1937 for (j = 0; j < num; j++) { 1938 vdc = GETVDCPTR(pdmeta, j); 1939 val = GET32D(pdmeta, vdc->Signature); 1940 1941 if (val == DDF_SA_SIGNATURE && spare == -1) 1942 spare = 1; 1943 1944 if (val != DDF_VDCR_SIGNATURE) 1945 continue; 1946 spare = 0; 1947 k = ddf_meta_find_vd(pdmeta, vdc->VD_GUID); 1948 if (k < 0) 1949 continue; 1950 vde = &pdmeta->vdr->entry[k]; 1951 1952 /* Look for volume with matching ID. */ 1953 vol = g_raid_md_ddf_get_volume(sc, vdc->VD_GUID); 1954 if (vol == NULL) { 1955 ddf_meta_get_name(pdmeta, k, buf); 1956 vol = g_raid_create_volume(sc, buf, 1957 GET16D(pdmeta, vde->VD_Number)); 1958 pv = malloc(sizeof(*pv), M_MD_DDF, M_WAITOK | M_ZERO); 1959 vol->v_md_data = pv; 1960 callout_init(&pv->pv_start_co, 1); 1961 callout_reset(&pv->pv_start_co, 1962 g_raid_start_timeout * hz, 1963 g_raid_ddf_go, vol); 1964 mdi->mdio_starting++; 1965 } else 1966 pv = vol->v_md_data; 1967 1968 /* If we haven't started yet - check metadata freshness. */ 1969 vmeta = &pv->pv_meta; 1970 ddf_vol_meta_update(vmeta, pdmeta, vdc->VD_GUID, pv->pv_started); 1971 } 1972 1973 if (spare == 1) { 1974 g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE); 1975 g_raid_md_ddf_refill(sc); 1976 } 1977 1978 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 1979 pv = vol->v_md_data; 1980 vmeta = &pv->pv_meta; 1981 1982 if (ddf_meta_find_vdc(pdmeta, vmeta->vdc->VD_GUID) == NULL) 1983 continue; 1984 1985 if (pv->pv_started) { 1986 if (g_raid_md_ddf_start_disk(disk, vol)) 1987 g_raid_md_write_ddf(md, vol, NULL, NULL); 1988 continue; 1989 } 1990 1991 /* If we collected all needed disks - start array. */ 1992 need = 0; 1993 have = 0; 1994 for (k = 0; k < GET8(vmeta, vdc->Secondary_Element_Count); k++) { 1995 if (vmeta->bvdc[k] == NULL) { 1996 need += GET16(vmeta, vdc->Primary_Element_Count); 1997 continue; 1998 } 1999 cnt = GET16(vmeta, bvdc[k]->Primary_Element_Count); 2000 need += cnt; 2001 for (i = 0; i < cnt; i++) { 2002 val = GET32(vmeta, bvdc[k]->Physical_Disk_Sequence[i]); 2003 if (g_raid_md_ddf_get_disk(sc, NULL, val) != NULL) 2004 have++; 2005 } 2006 } 2007 G_RAID_DEBUG1(1, sc, "Volume %s now has %d of %d disks", 2008 vol->v_name, have, need); 2009 if (have == need) 2010 g_raid_md_ddf_start(vol); 2011 } 2012} 2013 2014static int 2015g_raid_md_create_ddf(struct g_raid_md_object *md, struct g_class *mp, 2016 struct g_geom **gp) 2017{ 2018 struct g_geom *geom; 2019 struct g_raid_softc *sc; 2020 2021 /* Search for existing node. */ 2022 LIST_FOREACH(geom, &mp->geom, geom) { 2023 sc = geom->softc; 2024 if (sc == NULL) 2025 continue; 2026 if (sc->sc_stopping != 0) 2027 continue; 2028 if (sc->sc_md->mdo_class != md->mdo_class) 2029 continue; 2030 break; 2031 } 2032 if (geom != NULL) { 2033 *gp = geom; 2034 return (G_RAID_MD_TASTE_EXISTING); 2035 } 2036 2037 /* Create new one if not found. */ 2038 sc = g_raid_create_node(mp, "DDF", md); 2039 if (sc == NULL) 2040 return (G_RAID_MD_TASTE_FAIL); 2041 md->mdo_softc = sc; 2042 *gp = sc->sc_geom; 2043 return (G_RAID_MD_TASTE_NEW); 2044} 2045 2046static int 2047g_raid_md_taste_ddf(struct g_raid_md_object *md, struct g_class *mp, 2048 struct g_consumer *cp, struct g_geom **gp) 2049{ 2050 struct g_consumer *rcp; 2051 struct g_provider *pp; 2052 struct g_raid_softc *sc; 2053 struct g_raid_disk *disk; 2054 struct ddf_meta meta; 2055 struct g_raid_md_ddf_perdisk *pd; 2056 struct g_geom *geom; 2057 int error, result, len; 2058 char name[16]; 2059 2060 G_RAID_DEBUG(1, "Tasting DDF on %s", cp->provider->name); 2061 pp = cp->provider; 2062 2063 /* Read metadata from device. */ 2064 if (g_access(cp, 1, 0, 0) != 0) 2065 return (G_RAID_MD_TASTE_FAIL); 2066 g_topology_unlock(); 2067 bzero(&meta, sizeof(meta)); 2068 error = ddf_meta_read(cp, &meta); 2069 g_topology_lock(); 2070 g_access(cp, -1, 0, 0); 2071 if (error != 0) 2072 return (G_RAID_MD_TASTE_FAIL); 2073 2074 /* Metadata valid. Print it. */ 2075 g_raid_md_ddf_print(&meta); 2076 2077 /* Search for matching node. */ 2078 sc = NULL; 2079 LIST_FOREACH(geom, &mp->geom, geom) { 2080 sc = geom->softc; 2081 if (sc == NULL) 2082 continue; 2083 if (sc->sc_stopping != 0) 2084 continue; 2085 if (sc->sc_md->mdo_class != md->mdo_class) 2086 continue; 2087 break; 2088 } 2089 2090 /* Found matching node. */ 2091 if (geom != NULL) { 2092 G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name); 2093 result = G_RAID_MD_TASTE_EXISTING; 2094 2095 } else { /* Not found matching node -- create one. */ 2096 result = G_RAID_MD_TASTE_NEW; 2097 snprintf(name, sizeof(name), "DDF"); 2098 sc = g_raid_create_node(mp, name, md); 2099 md->mdo_softc = sc; 2100 geom = sc->sc_geom; 2101 } 2102 2103 rcp = g_new_consumer(geom); 2104 g_attach(rcp, pp); 2105 if (g_access(rcp, 1, 1, 1) != 0) 2106 ; //goto fail1; 2107 2108 g_topology_unlock(); 2109 sx_xlock(&sc->sc_lock); 2110 2111 pd = malloc(sizeof(*pd), M_MD_DDF, M_WAITOK | M_ZERO); 2112 pd->pd_meta = meta; 2113 disk = g_raid_create_disk(sc); 2114 disk->d_md_data = (void *)pd; 2115 disk->d_consumer = rcp; 2116 rcp->private = disk; 2117 2118 /* Read kernel dumping information. */ 2119 disk->d_kd.offset = 0; 2120 disk->d_kd.length = OFF_MAX; 2121 len = sizeof(disk->d_kd); 2122 error = g_io_getattr("GEOM::kerneldump", rcp, &len, &disk->d_kd); 2123 if (disk->d_kd.di.dumper == NULL) 2124 G_RAID_DEBUG1(2, sc, "Dumping not supported by %s: %d.", 2125 rcp->provider->name, error); 2126 2127 g_raid_md_ddf_new_disk(disk); 2128 2129 sx_xunlock(&sc->sc_lock); 2130 g_topology_lock(); 2131 *gp = geom; 2132 return (result); 2133} 2134 2135static int 2136g_raid_md_event_ddf(struct g_raid_md_object *md, 2137 struct g_raid_disk *disk, u_int event) 2138{ 2139 struct g_raid_softc *sc; 2140 2141 sc = md->mdo_softc; 2142 if (disk == NULL) 2143 return (-1); 2144 switch (event) { 2145 case G_RAID_DISK_E_DISCONNECTED: 2146 /* Delete disk. */ 2147 g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE); 2148 g_raid_destroy_disk(disk); 2149 g_raid_md_ddf_purge_volumes(sc); 2150 2151 /* Write updated metadata to all disks. */ 2152 g_raid_md_write_ddf(md, NULL, NULL, NULL); 2153 2154 /* Check if anything left. */ 2155 if (g_raid_ndisks(sc, -1) == 0) 2156 g_raid_destroy_node(sc, 0); 2157 else 2158 g_raid_md_ddf_refill(sc); 2159 return (0); 2160 } 2161 return (-2); 2162} 2163 2164static int 2165g_raid_md_volume_event_ddf(struct g_raid_md_object *md, 2166 struct g_raid_volume *vol, u_int event) 2167{ 2168 struct g_raid_md_ddf_pervolume *pv; 2169 2170 pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data; 2171 switch (event) { 2172 case G_RAID_VOLUME_E_STARTMD: 2173 if (!pv->pv_started) 2174 g_raid_md_ddf_start(vol); 2175 return (0); 2176 } 2177 return (-2); 2178} 2179 2180static int 2181g_raid_md_ctl_ddf(struct g_raid_md_object *md, 2182 struct gctl_req *req) 2183{ 2184 struct g_raid_softc *sc; 2185 struct g_raid_volume *vol, *vol1; 2186 struct g_raid_subdisk *sd; 2187 struct g_raid_disk *disk, *disks[DDF_MAX_DISKS_HARD]; 2188 struct g_raid_md_ddf_perdisk *pd; 2189 struct g_raid_md_ddf_pervolume *pv; 2190 struct g_raid_md_ddf_object *mdi; 2191 struct ddf_sa_record *sa; 2192 struct g_consumer *cp; 2193 struct g_provider *pp; 2194 char arg[16]; 2195 const char *verb, *volname, *levelname, *diskname; 2196 char *tmp; 2197 int *nargs, *force; 2198 off_t size, sectorsize, strip, offs[DDF_MAX_DISKS_HARD], esize; 2199 intmax_t *sizearg, *striparg; 2200 int i, numdisks, len, level, qual; 2201 int error; 2202 2203 sc = md->mdo_softc; 2204 mdi = (struct g_raid_md_ddf_object *)md; 2205 verb = gctl_get_param(req, "verb", NULL); 2206 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 2207 error = 0; 2208 2209 if (strcmp(verb, "label") == 0) { 2210 2211 if (*nargs < 4) { 2212 gctl_error(req, "Invalid number of arguments."); 2213 return (-1); 2214 } 2215 volname = gctl_get_asciiparam(req, "arg1"); 2216 if (volname == NULL) { 2217 gctl_error(req, "No volume name."); 2218 return (-2); 2219 } 2220 levelname = gctl_get_asciiparam(req, "arg2"); 2221 if (levelname == NULL) { 2222 gctl_error(req, "No RAID level."); 2223 return (-3); 2224 } 2225 if (g_raid_volume_str2level(levelname, &level, &qual)) { 2226 gctl_error(req, "Unknown RAID level '%s'.", levelname); 2227 return (-4); 2228 } 2229 numdisks = *nargs - 3; 2230 force = gctl_get_paraml(req, "force", sizeof(*force)); 2231 if (!g_raid_md_ddf_supported(level, qual, numdisks, 2232 force ? *force : 0)) { 2233 gctl_error(req, "Unsupported RAID level " 2234 "(0x%02x/0x%02x), or number of disks (%d).", 2235 level, qual, numdisks); 2236 return (-5); 2237 } 2238 2239 /* Search for disks, connect them and probe. */ 2240 size = INT64_MAX; 2241 sectorsize = 0; 2242 bzero(disks, sizeof(disks)); 2243 bzero(offs, sizeof(offs)); 2244 for (i = 0; i < numdisks; i++) { 2245 snprintf(arg, sizeof(arg), "arg%d", i + 3); 2246 diskname = gctl_get_asciiparam(req, arg); 2247 if (diskname == NULL) { 2248 gctl_error(req, "No disk name (%s).", arg); 2249 error = -6; 2250 break; 2251 } 2252 if (strcmp(diskname, "NONE") == 0) 2253 continue; 2254 2255 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 2256 if (disk->d_consumer != NULL && 2257 disk->d_consumer->provider != NULL && 2258 strcmp(disk->d_consumer->provider->name, 2259 diskname) == 0) 2260 break; 2261 } 2262 if (disk != NULL) { 2263 if (disk->d_state != G_RAID_DISK_S_ACTIVE) { 2264 gctl_error(req, "Disk '%s' is in a " 2265 "wrong state (%s).", diskname, 2266 g_raid_disk_state2str(disk->d_state)); 2267 error = -7; 2268 break; 2269 } 2270 pd = disk->d_md_data; 2271 if (ddf_meta_count_vdc(&pd->pd_meta, NULL) >= 2272 GET16(&pd->pd_meta, hdr->Max_Partitions)) { 2273 gctl_error(req, "No free partitions " 2274 "on disk '%s'.", 2275 diskname); 2276 error = -7; 2277 break; 2278 } 2279 pp = disk->d_consumer->provider; 2280 disks[i] = disk; 2281 ddf_meta_unused_range(&pd->pd_meta, 2282 &offs[i], &esize); 2283 size = MIN(size, (off_t)esize * pp->sectorsize); 2284 sectorsize = MAX(sectorsize, pp->sectorsize); 2285 continue; 2286 } 2287 2288 g_topology_lock(); 2289 cp = g_raid_open_consumer(sc, diskname); 2290 if (cp == NULL) { 2291 gctl_error(req, "Can't open disk '%s'.", 2292 diskname); 2293 g_topology_unlock(); 2294 error = -8; 2295 break; 2296 } 2297 pp = cp->provider; 2298 pd = malloc(sizeof(*pd), M_MD_DDF, M_WAITOK | M_ZERO); 2299 disk = g_raid_create_disk(sc); 2300 disk->d_md_data = (void *)pd; 2301 disk->d_consumer = cp; 2302 disks[i] = disk; 2303 cp->private = disk; 2304 ddf_meta_create(disk, &mdi->mdio_meta); 2305 if (mdi->mdio_meta.hdr == NULL) 2306 ddf_meta_copy(&mdi->mdio_meta, &pd->pd_meta); 2307 else 2308 ddf_meta_update(&mdi->mdio_meta, &pd->pd_meta); 2309 g_topology_unlock(); 2310 2311 /* Read kernel dumping information. */ 2312 disk->d_kd.offset = 0; 2313 disk->d_kd.length = OFF_MAX; 2314 len = sizeof(disk->d_kd); 2315 g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd); 2316 if (disk->d_kd.di.dumper == NULL) 2317 G_RAID_DEBUG1(2, sc, 2318 "Dumping not supported by %s.", 2319 cp->provider->name); 2320 2321 /* Reserve some space for metadata. */ 2322 size = MIN(size, pp->mediasize - 131072llu * pp->sectorsize); 2323 sectorsize = MAX(sectorsize, pp->sectorsize); 2324 } 2325 if (error != 0) { 2326 for (i = 0; i < numdisks; i++) { 2327 if (disks[i] != NULL && 2328 disks[i]->d_state == G_RAID_DISK_S_NONE) 2329 g_raid_destroy_disk(disks[i]); 2330 } 2331 return (error); 2332 } 2333 2334 if (sectorsize <= 0) { 2335 gctl_error(req, "Can't get sector size."); 2336 return (-8); 2337 } 2338 2339 /* Handle size argument. */ 2340 len = sizeof(*sizearg); 2341 sizearg = gctl_get_param(req, "size", &len); 2342 if (sizearg != NULL && len == sizeof(*sizearg) && 2343 *sizearg > 0) { 2344 if (*sizearg > size) { 2345 gctl_error(req, "Size too big %lld > %lld.", 2346 (long long)*sizearg, (long long)size); 2347 return (-9); 2348 } 2349 size = *sizearg; 2350 } 2351 2352 /* Handle strip argument. */ 2353 strip = 131072; 2354 len = sizeof(*striparg); 2355 striparg = gctl_get_param(req, "strip", &len); 2356 if (striparg != NULL && len == sizeof(*striparg) && 2357 *striparg > 0) { 2358 if (*striparg < sectorsize) { 2359 gctl_error(req, "Strip size too small."); 2360 return (-10); 2361 } 2362 if (*striparg % sectorsize != 0) { 2363 gctl_error(req, "Incorrect strip size."); 2364 return (-11); 2365 } 2366 strip = *striparg; 2367 } 2368 2369 /* Round size down to strip or sector. */ 2370 if (level == G_RAID_VOLUME_RL_RAID1 || 2371 level == G_RAID_VOLUME_RL_RAID3 || 2372 level == G_RAID_VOLUME_RL_SINGLE || 2373 level == G_RAID_VOLUME_RL_CONCAT) 2374 size -= (size % sectorsize); 2375 else if (level == G_RAID_VOLUME_RL_RAID1E && 2376 (numdisks & 1) != 0) 2377 size -= (size % (2 * strip)); 2378 else 2379 size -= (size % strip); 2380 if (size <= 0) { 2381 gctl_error(req, "Size too small."); 2382 return (-13); 2383 } 2384 2385 /* We have all we need, create things: volume, ... */ 2386 pv = malloc(sizeof(*pv), M_MD_DDF, M_WAITOK | M_ZERO); 2387 ddf_vol_meta_create(&pv->pv_meta, &mdi->mdio_meta); 2388 pv->pv_started = 1; 2389 vol = g_raid_create_volume(sc, volname, -1); 2390 vol->v_md_data = pv; 2391 vol->v_raid_level = level; 2392 vol->v_raid_level_qualifier = qual; 2393 vol->v_strip_size = strip; 2394 vol->v_disks_count = numdisks; 2395 if (level == G_RAID_VOLUME_RL_RAID0 || 2396 level == G_RAID_VOLUME_RL_CONCAT || 2397 level == G_RAID_VOLUME_RL_SINGLE) 2398 vol->v_mediasize = size * numdisks; 2399 else if (level == G_RAID_VOLUME_RL_RAID1) 2400 vol->v_mediasize = size; 2401 else if (level == G_RAID_VOLUME_RL_RAID3 || 2402 level == G_RAID_VOLUME_RL_RAID4 || 2403 level == G_RAID_VOLUME_RL_RAID5 || 2404 level == G_RAID_VOLUME_RL_RAID5R) 2405 vol->v_mediasize = size * (numdisks - 1); 2406 else if (level == G_RAID_VOLUME_RL_RAID6 || 2407 level == G_RAID_VOLUME_RL_RAID5E || 2408 level == G_RAID_VOLUME_RL_RAID5EE) 2409 vol->v_mediasize = size * (numdisks - 2); 2410 else if (level == G_RAID_VOLUME_RL_RAIDMDF) 2411 vol->v_mediasize = size * (numdisks - 3); 2412 else { /* RAID1E */ 2413 vol->v_mediasize = ((size * numdisks) / strip / 2) * 2414 strip; 2415 } 2416 vol->v_sectorsize = sectorsize; 2417 g_raid_start_volume(vol); 2418 2419 /* , and subdisks. */ 2420 for (i = 0; i < numdisks; i++) { 2421 disk = disks[i]; 2422 sd = &vol->v_subdisks[i]; 2423 sd->sd_disk = disk; 2424 sd->sd_offset = (off_t)offs[i] * 512; 2425 sd->sd_size = size; 2426 if (disk == NULL) 2427 continue; 2428 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next); 2429 g_raid_change_disk_state(disk, 2430 G_RAID_DISK_S_ACTIVE); 2431 g_raid_change_subdisk_state(sd, 2432 G_RAID_SUBDISK_S_ACTIVE); 2433 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW, 2434 G_RAID_EVENT_SUBDISK); 2435 } 2436 2437 /* Write metadata based on created entities. */ 2438 G_RAID_DEBUG1(0, sc, "Array started."); 2439 g_raid_md_write_ddf(md, vol, NULL, NULL); 2440 2441 /* Pickup any STALE/SPARE disks to refill array if needed. */ 2442 g_raid_md_ddf_refill(sc); 2443 2444 g_raid_event_send(vol, G_RAID_VOLUME_E_START, 2445 G_RAID_EVENT_VOLUME); 2446 return (0); 2447 } 2448 if (strcmp(verb, "add") == 0) { 2449 2450 gctl_error(req, "`add` command is not applicable, " 2451 "use `label` instead."); 2452 return (-99); 2453 } 2454 if (strcmp(verb, "delete") == 0) { 2455 2456 /* Full node destruction. */ 2457 if (*nargs == 1) { 2458 /* Check if some volume is still open. */ 2459 force = gctl_get_paraml(req, "force", sizeof(*force)); 2460 if (force != NULL && *force == 0 && 2461 g_raid_nopens(sc) != 0) { 2462 gctl_error(req, "Some volume is still open."); 2463 return (-4); 2464 } 2465 2466 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 2467 if (disk->d_consumer) 2468 ddf_meta_erase(disk->d_consumer); 2469 } 2470 g_raid_destroy_node(sc, 0); 2471 return (0); 2472 } 2473 2474 /* Destroy specified volume. If it was last - all node. */ 2475 if (*nargs != 2) { 2476 gctl_error(req, "Invalid number of arguments."); 2477 return (-1); 2478 } 2479 volname = gctl_get_asciiparam(req, "arg1"); 2480 if (volname == NULL) { 2481 gctl_error(req, "No volume name."); 2482 return (-2); 2483 } 2484 2485 /* Search for volume. */ 2486 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 2487 if (strcmp(vol->v_name, volname) == 0) 2488 break; 2489 } 2490 if (vol == NULL) { 2491 i = strtol(volname, &tmp, 10); 2492 if (verb != volname && tmp[0] == 0) { 2493 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 2494 if (vol->v_global_id == i) 2495 break; 2496 } 2497 } 2498 } 2499 if (vol == NULL) { 2500 gctl_error(req, "Volume '%s' not found.", volname); 2501 return (-3); 2502 } 2503 2504 /* Check if volume is still open. */ 2505 force = gctl_get_paraml(req, "force", sizeof(*force)); 2506 if (force != NULL && *force == 0 && 2507 vol->v_provider_open != 0) { 2508 gctl_error(req, "Volume is still open."); 2509 return (-4); 2510 } 2511 2512 /* Destroy volume and potentially node. */ 2513 i = 0; 2514 TAILQ_FOREACH(vol1, &sc->sc_volumes, v_next) 2515 i++; 2516 if (i >= 2) { 2517 g_raid_destroy_volume(vol); 2518 g_raid_md_ddf_purge_disks(sc); 2519 g_raid_md_write_ddf(md, NULL, NULL, NULL); 2520 } else { 2521 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 2522 if (disk->d_consumer) 2523 ddf_meta_erase(disk->d_consumer); 2524 } 2525 g_raid_destroy_node(sc, 0); 2526 } 2527 return (0); 2528 } 2529 if (strcmp(verb, "remove") == 0 || 2530 strcmp(verb, "fail") == 0) { 2531 if (*nargs < 2) { 2532 gctl_error(req, "Invalid number of arguments."); 2533 return (-1); 2534 } 2535 for (i = 1; i < *nargs; i++) { 2536 snprintf(arg, sizeof(arg), "arg%d", i); 2537 diskname = gctl_get_asciiparam(req, arg); 2538 if (diskname == NULL) { 2539 gctl_error(req, "No disk name (%s).", arg); 2540 error = -2; 2541 break; 2542 } 2543 if (strncmp(diskname, "/dev/", 5) == 0) 2544 diskname += 5; 2545 2546 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 2547 if (disk->d_consumer != NULL && 2548 disk->d_consumer->provider != NULL && 2549 strcmp(disk->d_consumer->provider->name, 2550 diskname) == 0) 2551 break; 2552 } 2553 if (disk == NULL) { 2554 gctl_error(req, "Disk '%s' not found.", 2555 diskname); 2556 error = -3; 2557 break; 2558 } 2559 2560 if (strcmp(verb, "fail") == 0) { 2561 g_raid_md_fail_disk_ddf(md, NULL, disk); 2562 continue; 2563 } 2564 2565 /* Erase metadata on deleting disk and destroy it. */ 2566 ddf_meta_erase(disk->d_consumer); 2567 g_raid_destroy_disk(disk); 2568 } 2569 g_raid_md_ddf_purge_volumes(sc); 2570 2571 /* Write updated metadata to remaining disks. */ 2572 g_raid_md_write_ddf(md, NULL, NULL, NULL); 2573 2574 /* Check if anything left. */ 2575 if (g_raid_ndisks(sc, -1) == 0) 2576 g_raid_destroy_node(sc, 0); 2577 else 2578 g_raid_md_ddf_refill(sc); 2579 return (error); 2580 } 2581 if (strcmp(verb, "insert") == 0) { 2582 if (*nargs < 2) { 2583 gctl_error(req, "Invalid number of arguments."); 2584 return (-1); 2585 } 2586 for (i = 1; i < *nargs; i++) { 2587 /* Get disk name. */ 2588 snprintf(arg, sizeof(arg), "arg%d", i); 2589 diskname = gctl_get_asciiparam(req, arg); 2590 if (diskname == NULL) { 2591 gctl_error(req, "No disk name (%s).", arg); 2592 error = -3; 2593 break; 2594 } 2595 2596 /* Try to find provider with specified name. */ 2597 g_topology_lock(); 2598 cp = g_raid_open_consumer(sc, diskname); 2599 if (cp == NULL) { 2600 gctl_error(req, "Can't open disk '%s'.", 2601 diskname); 2602 g_topology_unlock(); 2603 error = -4; 2604 break; 2605 } 2606 pp = cp->provider; 2607 g_topology_unlock(); 2608 2609 pd = malloc(sizeof(*pd), M_MD_DDF, M_WAITOK | M_ZERO); 2610 2611 disk = g_raid_create_disk(sc); 2612 disk->d_consumer = cp; 2613 disk->d_md_data = (void *)pd; 2614 cp->private = disk; 2615 2616 /* Read kernel dumping information. */ 2617 disk->d_kd.offset = 0; 2618 disk->d_kd.length = OFF_MAX; 2619 len = sizeof(disk->d_kd); 2620 g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd); 2621 if (disk->d_kd.di.dumper == NULL) 2622 G_RAID_DEBUG1(2, sc, 2623 "Dumping not supported by %s.", 2624 cp->provider->name); 2625 2626 /* Welcome the "new" disk. */ 2627 g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE); 2628 ddf_meta_create(disk, &mdi->mdio_meta); 2629 sa = ddf_meta_find_sa(&pd->pd_meta, 1); 2630 if (sa != NULL) { 2631 SET32D(&pd->pd_meta, sa->Signature, 2632 DDF_SA_SIGNATURE); 2633 SET8D(&pd->pd_meta, sa->Spare_Type, 0); 2634 SET16D(&pd->pd_meta, sa->Populated_SAEs, 0); 2635 SET16D(&pd->pd_meta, sa->MAX_SAE_Supported, 2636 (GET16(&pd->pd_meta, hdr->Configuration_Record_Length) * 2637 pd->pd_meta.sectorsize - 2638 sizeof(struct ddf_sa_record)) / 2639 sizeof(struct ddf_sa_entry)); 2640 } 2641 if (mdi->mdio_meta.hdr == NULL) 2642 ddf_meta_copy(&mdi->mdio_meta, &pd->pd_meta); 2643 else 2644 ddf_meta_update(&mdi->mdio_meta, &pd->pd_meta); 2645 g_raid_md_write_ddf(md, NULL, NULL, NULL); 2646 g_raid_md_ddf_refill(sc); 2647 } 2648 return (error); 2649 } 2650 return (-100); 2651} 2652 2653static int 2654g_raid_md_write_ddf(struct g_raid_md_object *md, struct g_raid_volume *tvol, 2655 struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk) 2656{ 2657 struct g_raid_softc *sc; 2658 struct g_raid_volume *vol; 2659 struct g_raid_subdisk *sd; 2660 struct g_raid_disk *disk; 2661 struct g_raid_md_ddf_perdisk *pd; 2662 struct g_raid_md_ddf_pervolume *pv; 2663 struct g_raid_md_ddf_object *mdi; 2664 struct ddf_meta *gmeta; 2665 struct ddf_vol_meta *vmeta; 2666 struct ddf_vdc_record *vdc; 2667 struct ddf_sa_record *sa; 2668 uint64_t *val2; 2669 int i, j, pos, bvd, size; 2670 2671 sc = md->mdo_softc; 2672 mdi = (struct g_raid_md_ddf_object *)md; 2673 gmeta = &mdi->mdio_meta; 2674 2675 if (sc->sc_stopping == G_RAID_DESTROY_HARD) 2676 return (0); 2677 2678 /* 2679 * Clear disk flags to let only really needed ones to be reset. 2680 * Do it only if there are no volumes in starting state now, 2681 * as they can update disk statuses yet and we may kill innocent. 2682 */ 2683 if (mdi->mdio_starting == 0) { 2684 for (i = 0; i < GET16(gmeta, pdr->Populated_PDEs); i++) { 2685 if (isff(gmeta->pdr->entry[i].PD_GUID, 24)) 2686 continue; 2687 SET16(gmeta, pdr->entry[i].PD_Type, 2688 GET16(gmeta, pdr->entry[i].PD_Type) & 2689 ~(DDF_PDE_PARTICIPATING | 2690 DDF_PDE_GLOBAL_SPARE | DDF_PDE_CONFIG_SPARE)); 2691 if ((GET16(gmeta, pdr->entry[i].PD_State) & 2692 DDF_PDE_PFA) == 0) 2693 SET16(gmeta, pdr->entry[i].PD_State, 0); 2694 } 2695 } 2696 2697 /* Generate/update new per-volume metadata. */ 2698 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 2699 pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data; 2700 if (vol->v_stopping || !pv->pv_started) 2701 continue; 2702 vmeta = &pv->pv_meta; 2703 2704 SET32(vmeta, vdc->Sequence_Number, 2705 GET32(vmeta, vdc->Sequence_Number) + 1); 2706 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E && 2707 vol->v_disks_count % 2 == 0) 2708 SET16(vmeta, vdc->Primary_Element_Count, 2); 2709 else 2710 SET16(vmeta, vdc->Primary_Element_Count, 2711 vol->v_disks_count); 2712 SET8(vmeta, vdc->Stripe_Size, 2713 ffs(vol->v_strip_size / vol->v_sectorsize) - 1); 2714 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E && 2715 vol->v_disks_count % 2 == 0) { 2716 SET8(vmeta, vdc->Primary_RAID_Level, 2717 DDF_VDCR_RAID1); 2718 SET8(vmeta, vdc->RLQ, 0); 2719 SET8(vmeta, vdc->Secondary_Element_Count, 2720 vol->v_disks_count / 2); 2721 SET8(vmeta, vdc->Secondary_RAID_Level, 0); 2722 } else { 2723 SET8(vmeta, vdc->Primary_RAID_Level, 2724 vol->v_raid_level); 2725 SET8(vmeta, vdc->RLQ, 2726 vol->v_raid_level_qualifier); 2727 SET8(vmeta, vdc->Secondary_Element_Count, 1); 2728 SET8(vmeta, vdc->Secondary_RAID_Level, 0); 2729 } 2730 SET8(vmeta, vdc->Secondary_Element_Seq, 0); 2731 SET64(vmeta, vdc->Block_Count, 0); 2732 SET64(vmeta, vdc->VD_Size, vol->v_mediasize / vol->v_sectorsize); 2733 SET16(vmeta, vdc->Block_Size, vol->v_sectorsize); 2734 2735 SET16(vmeta, vde->VD_Number, vol->v_global_id); 2736 if (vol->v_state <= G_RAID_VOLUME_S_BROKEN) 2737 SET8(vmeta, vde->VD_State, DDF_VDE_FAILED); 2738 else if (vol->v_state <= G_RAID_VOLUME_S_DEGRADED) 2739 SET8(vmeta, vde->VD_State, DDF_VDE_DEGRADED); 2740 else if (vol->v_state <= G_RAID_VOLUME_S_SUBOPTIMAL) 2741 SET8(vmeta, vde->VD_State, DDF_VDE_PARTIAL); 2742 else 2743 SET8(vmeta, vde->VD_State, DDF_VDE_OPTIMAL); 2744 if (vol->v_dirty || 2745 g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_STALE) > 0 || 2746 g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_RESYNC) > 0) 2747 SET8(vmeta, vde->VD_State, 2748 GET8(vmeta, vde->VD_State) | DDF_VDE_DIRTY); 2749 SET8(vmeta, vde->Init_State, DDF_VDE_INIT_FULL); // XXX 2750 ddf_meta_put_name(vmeta, vol->v_name); 2751 2752 for (i = 0; i < vol->v_disks_count; i++) { 2753 sd = &vol->v_subdisks[i]; 2754 bvd = i / GET16(vmeta, vdc->Primary_Element_Count); 2755 pos = i % GET16(vmeta, vdc->Primary_Element_Count); 2756 disk = sd->sd_disk; 2757 if (disk != NULL) { 2758 pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data; 2759 if (vmeta->bvdc[bvd] == NULL) { 2760 size = GET16(vmeta, 2761 hdr->Configuration_Record_Length) * 2762 vmeta->sectorsize; 2763 vmeta->bvdc[bvd] = malloc(size, 2764 M_MD_DDF, M_WAITOK); 2765 memset(vmeta->bvdc[bvd], 0xff, size); 2766 } 2767 memcpy(vmeta->bvdc[bvd], vmeta->vdc, 2768 sizeof(struct ddf_vdc_record)); 2769 SET8(vmeta, bvdc[bvd]->Secondary_Element_Seq, bvd); 2770 SET64(vmeta, bvdc[bvd]->Block_Count, 2771 sd->sd_size / vol->v_sectorsize); 2772 SET32(vmeta, bvdc[bvd]->Physical_Disk_Sequence[pos], 2773 GET32(&pd->pd_meta, pdd->PD_Reference)); 2774 val2 = (uint64_t *)&(vmeta->bvdc[bvd]->Physical_Disk_Sequence[ 2775 GET16(vmeta, hdr->Max_Primary_Element_Entries)]); 2776 SET64P(vmeta, val2 + pos, 2777 sd->sd_offset / vol->v_sectorsize); 2778 } 2779 if (vmeta->bvdc[bvd] == NULL) 2780 continue; 2781 2782 j = ddf_meta_find_pd(gmeta, NULL, 2783 GET32(vmeta, bvdc[bvd]->Physical_Disk_Sequence[pos])); 2784 if (j < 0) 2785 continue; 2786 SET32(gmeta, pdr->entry[j].PD_Type, 2787 GET32(gmeta, pdr->entry[j].PD_Type) | 2788 DDF_PDE_PARTICIPATING); 2789 if (sd->sd_state == G_RAID_SUBDISK_S_NONE) 2790 SET32(gmeta, pdr->entry[j].PD_State, 2791 GET32(gmeta, pdr->entry[j].PD_State) | 2792 (DDF_PDE_FAILED | DDF_PDE_MISSING)); 2793 else if (sd->sd_state == G_RAID_SUBDISK_S_FAILED) 2794 SET32(gmeta, pdr->entry[j].PD_State, 2795 GET32(gmeta, pdr->entry[j].PD_State) | 2796 (DDF_PDE_FAILED | DDF_PDE_PFA)); 2797 else if (sd->sd_state <= G_RAID_SUBDISK_S_REBUILD) 2798 SET32(gmeta, pdr->entry[j].PD_State, 2799 GET32(gmeta, pdr->entry[j].PD_State) | 2800 DDF_PDE_REBUILD); 2801 else 2802 SET32(gmeta, pdr->entry[j].PD_State, 2803 GET32(gmeta, pdr->entry[j].PD_State) | 2804 DDF_PDE_ONLINE); 2805 } 2806 } 2807 2808 /* Mark spare and failed disks as such. */ 2809 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 2810 pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data; 2811 i = ddf_meta_find_pd(gmeta, NULL, 2812 GET32(&pd->pd_meta, pdd->PD_Reference)); 2813 if (i < 0) 2814 continue; 2815 if (disk->d_state == G_RAID_DISK_S_FAILED) { 2816 SET32(gmeta, pdr->entry[i].PD_State, 2817 GET32(gmeta, pdr->entry[i].PD_State) | 2818 (DDF_PDE_FAILED | DDF_PDE_PFA)); 2819 } 2820 if (disk->d_state != G_RAID_DISK_S_SPARE) 2821 continue; 2822 sa = ddf_meta_find_sa(&pd->pd_meta, 0); 2823 if (sa == NULL || 2824 (GET8D(&pd->pd_meta, sa->Spare_Type) & 2825 DDF_SAR_TYPE_DEDICATED) == 0) { 2826 SET16(gmeta, pdr->entry[i].PD_Type, 2827 GET16(gmeta, pdr->entry[i].PD_Type) | 2828 DDF_PDE_GLOBAL_SPARE); 2829 } else { 2830 SET16(gmeta, pdr->entry[i].PD_Type, 2831 GET16(gmeta, pdr->entry[i].PD_Type) | 2832 DDF_PDE_CONFIG_SPARE); 2833 } 2834 SET32(gmeta, pdr->entry[i].PD_State, 2835 GET32(gmeta, pdr->entry[i].PD_State) | 2836 DDF_PDE_ONLINE); 2837 } 2838 2839 /* Remove disks without "participating" flag (unused). */ 2840 for (i = 0, j = -1; i < GET16(gmeta, pdr->Populated_PDEs); i++) { 2841 if (isff(gmeta->pdr->entry[i].PD_GUID, 24)) 2842 continue; 2843 if ((GET16(gmeta, pdr->entry[i].PD_Type) & 2844 (DDF_PDE_PARTICIPATING | 2845 DDF_PDE_GLOBAL_SPARE | DDF_PDE_CONFIG_SPARE)) != 0 || 2846 g_raid_md_ddf_get_disk(sc, 2847 NULL, GET32(gmeta, pdr->entry[i].PD_Reference)) != NULL) 2848 j = i; 2849 else 2850 memset(&gmeta->pdr->entry[i], 0xff, 2851 sizeof(struct ddf_pd_entry)); 2852 } 2853 SET16(gmeta, pdr->Populated_PDEs, j + 1); 2854 2855 /* Update per-disk metadata and write them. */ 2856 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 2857 pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data; 2858 if (disk->d_state != G_RAID_DISK_S_ACTIVE && 2859 disk->d_state != G_RAID_DISK_S_SPARE) 2860 continue; 2861 /* Update PDR. */ 2862 memcpy(pd->pd_meta.pdr, gmeta->pdr, 2863 GET32(&pd->pd_meta, hdr->pdr_length) * 2864 pd->pd_meta.sectorsize); 2865 /* Update VDR. */ 2866 SET16(&pd->pd_meta, vdr->Populated_VDEs, 0); 2867 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 2868 if (vol->v_stopping) 2869 continue; 2870 pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data; 2871 i = ddf_meta_find_vd(&pd->pd_meta, 2872 pv->pv_meta.vde->VD_GUID); 2873 if (i < 0) 2874 i = ddf_meta_find_vd(&pd->pd_meta, NULL); 2875 if (i >= 0) 2876 memcpy(&pd->pd_meta.vdr->entry[i], 2877 pv->pv_meta.vde, 2878 sizeof(struct ddf_vd_entry)); 2879 } 2880 /* Update VDC. */ 2881 if (mdi->mdio_starting == 0) { 2882 /* Remove all VDCs to restore needed later. */ 2883 j = GETCRNUM(&pd->pd_meta); 2884 for (i = 0; i < j; i++) { 2885 vdc = GETVDCPTR(&pd->pd_meta, i); 2886 if (GET32D(&pd->pd_meta, vdc->Signature) != 2887 DDF_VDCR_SIGNATURE) 2888 continue; 2889 SET32D(&pd->pd_meta, vdc->Signature, 0xffffffff); 2890 } 2891 } 2892 TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) { 2893 vol = sd->sd_volume; 2894 if (vol->v_stopping) 2895 continue; 2896 pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data; 2897 vmeta = &pv->pv_meta; 2898 vdc = ddf_meta_find_vdc(&pd->pd_meta, 2899 vmeta->vde->VD_GUID); 2900 if (vdc == NULL) 2901 vdc = ddf_meta_find_vdc(&pd->pd_meta, NULL); 2902 if (vdc != NULL) { 2903 bvd = sd->sd_pos / GET16(vmeta, 2904 vdc->Primary_Element_Count); 2905 memcpy(vdc, vmeta->bvdc[bvd], 2906 GET16(&pd->pd_meta, 2907 hdr->Configuration_Record_Length) * 2908 pd->pd_meta.sectorsize); 2909 } 2910 } 2911 G_RAID_DEBUG(1, "Writing DDF metadata to %s", 2912 g_raid_get_diskname(disk)); 2913 g_raid_md_ddf_print(&pd->pd_meta); 2914 ddf_meta_write(disk->d_consumer, &pd->pd_meta); 2915 } 2916 return (0); 2917} 2918 2919static int 2920g_raid_md_fail_disk_ddf(struct g_raid_md_object *md, 2921 struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk) 2922{ 2923 struct g_raid_softc *sc; 2924 struct g_raid_md_ddf_perdisk *pd; 2925 struct g_raid_subdisk *sd; 2926 int i; 2927 2928 sc = md->mdo_softc; 2929 pd = (struct g_raid_md_ddf_perdisk *)tdisk->d_md_data; 2930 2931 /* We can't fail disk that is not a part of array now. */ 2932 if (tdisk->d_state != G_RAID_DISK_S_ACTIVE) 2933 return (-1); 2934 2935 /* 2936 * Mark disk as failed in metadata and try to write that metadata 2937 * to the disk itself to prevent it's later resurrection as STALE. 2938 */ 2939 G_RAID_DEBUG(1, "Writing DDF metadata to %s", 2940 g_raid_get_diskname(tdisk)); 2941 i = ddf_meta_find_pd(&pd->pd_meta, NULL, GET32(&pd->pd_meta, pdd->PD_Reference)); 2942 SET16(&pd->pd_meta, pdr->entry[i].PD_State, DDF_PDE_FAILED | DDF_PDE_PFA); 2943 if (tdisk->d_consumer != NULL) 2944 ddf_meta_write(tdisk->d_consumer, &pd->pd_meta); 2945 2946 /* Change states. */ 2947 g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED); 2948 TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) { 2949 g_raid_change_subdisk_state(sd, 2950 G_RAID_SUBDISK_S_FAILED); 2951 g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED, 2952 G_RAID_EVENT_SUBDISK); 2953 } 2954 2955 /* Write updated metadata to remaining disks. */ 2956 g_raid_md_write_ddf(md, NULL, NULL, tdisk); 2957 2958 g_raid_md_ddf_refill(sc); 2959 return (0); 2960} 2961 2962static int 2963g_raid_md_free_disk_ddf(struct g_raid_md_object *md, 2964 struct g_raid_disk *disk) 2965{ 2966 struct g_raid_md_ddf_perdisk *pd; 2967 2968 pd = (struct g_raid_md_ddf_perdisk *)disk->d_md_data; 2969 ddf_meta_free(&pd->pd_meta); 2970 free(pd, M_MD_DDF); 2971 disk->d_md_data = NULL; 2972 return (0); 2973} 2974 2975static int 2976g_raid_md_free_volume_ddf(struct g_raid_md_object *md, 2977 struct g_raid_volume *vol) 2978{ 2979 struct g_raid_md_ddf_object *mdi; 2980 struct g_raid_md_ddf_pervolume *pv; 2981 2982 mdi = (struct g_raid_md_ddf_object *)md; 2983 pv = (struct g_raid_md_ddf_pervolume *)vol->v_md_data; 2984 ddf_vol_meta_free(&pv->pv_meta); 2985 if (!pv->pv_started) { 2986 pv->pv_started = 1; 2987 mdi->mdio_starting--; 2988 callout_stop(&pv->pv_start_co); 2989 } 2990 return (0); 2991} 2992 2993static int 2994g_raid_md_free_ddf(struct g_raid_md_object *md) 2995{ 2996 struct g_raid_md_ddf_object *mdi; 2997 2998 mdi = (struct g_raid_md_ddf_object *)md; 2999 if (!mdi->mdio_started) { 3000 mdi->mdio_started = 0; 3001 callout_stop(&mdi->mdio_start_co); 3002 G_RAID_DEBUG1(1, md->mdo_softc, 3003 "root_mount_rel %p", mdi->mdio_rootmount); 3004 root_mount_rel(mdi->mdio_rootmount); 3005 mdi->mdio_rootmount = NULL; 3006 } 3007 ddf_meta_free(&mdi->mdio_meta); 3008 return (0); 3009} 3010 3011G_RAID_MD_DECLARE(g_raid_md_ddf); 3012