128263Spst/* $NetBSD: rf_paritymap.c,v 1.11 2023/09/25 21:59:38 oster Exp $ */ 228263Spst 350472Speter/*- 428263Spst * Copyright (c) 2009 Jed Davis. 528263Spst * All rights reserved. 661981Sbrian * 761981Sbrian * Redistribution and use in source and binary forms, with or without 861981Sbrian * modification, are permitted provided that the following conditions 961981Sbrian * are met: 1061981Sbrian * 1. Redistributions of source code must retain the above copyright 1161981Sbrian * notice, this list of conditions and the following disclaimer. 1228263Spst * 2. Redistributions in binary form must reproduce the above copyright 1361981Sbrian * notice, this list of conditions and the following disclaimer in the 1465843Sbrian * documentation and/or other materials provided with the distribution. 1561981Sbrian * 1661981Sbrian * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 1777592Sdougb * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 1861981Sbrian * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 1961981Sbrian * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20319221Sasomers * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 2161981Sbrian * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 2261981Sbrian * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 2361981Sbrian * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24319221Sasomers * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25319221Sasomers * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26319221Sasomers * POSSIBILITY OF SUCH DAMAGE. 27319221Sasomers */ 28319221Sasomers 2965843Sbrian#include <sys/cdefs.h> 3065843Sbrian__KERNEL_RCSID(0, "$NetBSD: rf_paritymap.c,v 1.11 2023/09/25 21:59:38 oster Exp $"); 3165843Sbrian 3265843Sbrian#include <sys/param.h> 3361981Sbrian#include <sys/callout.h> 3465843Sbrian#include <sys/kmem.h> 3565843Sbrian#include <sys/mutex.h> 3665843Sbrian#include <sys/rwlock.h> 3761981Sbrian#include <sys/systm.h> 3861981Sbrian#include <sys/types.h> 3965843Sbrian 4065843Sbrian#include <dev/raidframe/rf_paritymap.h> 41#include <dev/raidframe/rf_stripelocks.h> 42#include <dev/raidframe/rf_layout.h> 43#include <dev/raidframe/rf_raid.h> 44#include <dev/raidframe/rf_parityscan.h> 45#include <dev/raidframe/rf_kintf.h> 46 47/* Important parameters: */ 48#define REGION_MINSIZE (25ULL << 20) 49#define DFL_TICKMS 40000 50#define DFL_COOLDOWN 8 /* 7-8 intervals of 40s = 5min +/- 20s */ 51 52/* Internal-use flag bits. */ 53#define TICKING 1 54#define TICKED 2 55 56/* Prototypes! */ 57static void rf_paritymap_write_locked(struct rf_paritymap *); 58static void rf_paritymap_tick(void *); 59static u_int rf_paritymap_nreg(RF_Raid_t *); 60 61/* Extract the current status of the parity map. */ 62void 63rf_paritymap_status(struct rf_paritymap *pm, struct rf_pmstat *ps) 64{ 65 memset(ps, 0, sizeof(*ps)); 66 if (pm == NULL) 67 ps->enabled = 0; 68 else { 69 ps->enabled = 1; 70 ps->region_size = pm->region_size; 71 mutex_enter(&pm->lock); 72 memcpy(&ps->params, &pm->params, sizeof(ps->params)); 73 memcpy(ps->dirty, pm->disk_now, sizeof(ps->dirty)); 74 memcpy(&ps->ctrs, &pm->ctrs, sizeof(ps->ctrs)); 75 mutex_exit(&pm->lock); 76 } 77} 78 79/* 80 * Test whether parity in a given sector is suspected of being inconsistent 81 * on disk (assuming that any pending I/O to it is allowed to complete). 82 * This may be of interest to future work on parity scrubbing. 83 */ 84int 85rf_paritymap_test(struct rf_paritymap *pm, daddr_t sector) 86{ 87 unsigned region = sector / pm->region_size; 88 int retval; 89 90 mutex_enter(&pm->lock); 91 retval = isset(pm->disk_boot->bits, region) ? 1 : 0; 92 mutex_exit(&pm->lock); 93 return retval; 94} 95 96/* To be called before a write to the RAID is submitted. */ 97void 98rf_paritymap_begin(struct rf_paritymap *pm, daddr_t offset, daddr_t size) 99{ 100 unsigned i, b, e; 101 102 b = offset / pm->region_size; 103 e = (offset + size - 1) / pm->region_size; 104 105 for (i = b; i <= e; i++) 106 rf_paritymap_begin_region(pm, i); 107} 108 109/* To be called after a write to the RAID completes. */ 110void 111rf_paritymap_end(struct rf_paritymap *pm, daddr_t offset, daddr_t size) 112{ 113 unsigned i, b, e; 114 115 b = offset / pm->region_size; 116 e = (offset + size - 1) / pm->region_size; 117 118 for (i = b; i <= e; i++) 119 rf_paritymap_end_region(pm, i); 120} 121 122void 123rf_paritymap_begin_region(struct rf_paritymap *pm, unsigned region) 124{ 125 int needs_write; 126 127 KASSERT(region < RF_PARITYMAP_NREG); 128 pm->ctrs.nwrite++; 129 130 /* If it was being kept warm, deal with that. */ 131 mutex_enter(&pm->lock); 132 if (pm->current->state[region] < 0) 133 pm->current->state[region] = 0; 134 135 /* This shouldn't happen unless RAIDOUTSTANDING is set too high. */ 136 KASSERT(pm->current->state[region] < 127); 137 pm->current->state[region]++; 138 139 needs_write = isclr(pm->disk_now->bits, region); 140 141 if (needs_write) { 142 KASSERT(pm->current->state[region] == 1); 143 rf_paritymap_write_locked(pm); 144 } 145 146 mutex_exit(&pm->lock); 147} 148 149void 150rf_paritymap_end_region(struct rf_paritymap *pm, unsigned region) 151{ 152 KASSERT(region < RF_PARITYMAP_NREG); 153 154 mutex_enter(&pm->lock); 155 KASSERT(pm->current->state[region] > 0); 156 --pm->current->state[region]; 157 158 if (pm->current->state[region] <= 0) { 159 pm->current->state[region] = -pm->params.cooldown; 160 KASSERT(pm->current->state[region] <= 0); 161 mutex_enter(&pm->lk_flags); 162 if (!(pm->flags & TICKING)) { 163 pm->flags |= TICKING; 164 mutex_exit(&pm->lk_flags); 165 callout_schedule(&pm->ticker, 166 mstohz(pm->params.tickms)); 167 } else 168 mutex_exit(&pm->lk_flags); 169 } 170 mutex_exit(&pm->lock); 171} 172 173/* 174 * Updates the parity map to account for any changes in current activity 175 * and/or an ongoing parity scan, then writes it to disk with appropriate 176 * synchronization. 177 */ 178void 179rf_paritymap_write(struct rf_paritymap *pm) 180{ 181 mutex_enter(&pm->lock); 182 rf_paritymap_write_locked(pm); 183 mutex_exit(&pm->lock); 184} 185 186/* As above, but to be used when pm->lock is already held. */ 187static void 188rf_paritymap_write_locked(struct rf_paritymap *pm) 189{ 190 char w, w0; 191 int i, j, setting, clearing; 192 193 setting = clearing = 0; 194 for (i = 0; i < RF_PARITYMAP_NBYTE; i++) { 195 w0 = pm->disk_now->bits[i]; 196 w = pm->disk_boot->bits[i]; 197 198 for (j = 0; j < NBBY; j++) 199 if (pm->current->state[i * NBBY + j] != 0) 200 w |= 1 << j; 201 202 if (w & ~w0) 203 setting = 1; 204 if (w0 & ~w) 205 clearing = 1; 206 207 pm->disk_now->bits[i] = w; 208 } 209 pm->ctrs.ncachesync += setting + clearing; 210 pm->ctrs.nclearing += clearing; 211 212 /* 213 * If bits are being set in the parity map, then a sync is 214 * required afterwards, so that the regions are marked dirty 215 * on disk before any writes to them take place. If bits are 216 * being cleared, then a sync is required before the write, so 217 * that any writes to those regions are processed before the 218 * region is marked clean. (Synchronization is somewhat 219 * overkill; a write ordering barrier would suffice, but we 220 * currently have no way to express that directly.) 221 */ 222 if (clearing) 223 rf_sync_component_caches(pm->raid, 1); 224 rf_paritymap_kern_write(pm->raid, pm->disk_now); 225 if (setting) 226 rf_sync_component_caches(pm->raid, 1); 227} 228 229/* Mark all parity as being in need of rewrite. */ 230void 231rf_paritymap_invalidate(struct rf_paritymap *pm) 232{ 233 mutex_enter(&pm->lock); 234 memset(pm->disk_boot, (unsigned char)~0, sizeof(*pm->disk_boot)); 235 mutex_exit(&pm->lock); 236} 237 238/* Mark all parity as being correct. */ 239void 240rf_paritymap_forceclean(struct rf_paritymap *pm) 241{ 242 mutex_enter(&pm->lock); 243 memset(pm->disk_boot, 0, sizeof(*pm->disk_boot)); 244 mutex_exit(&pm->lock); 245} 246 247/* 248 * The cooldown callout routine just defers its work to a thread; it can't do 249 * the parity map write itself as it would block, and although mutex-induced 250 * blocking is permitted it seems wise to avoid tying up the softint. 251 */ 252static void 253rf_paritymap_tick(void *arg) 254{ 255 struct rf_paritymap *pm = arg; 256 257 mutex_enter(&pm->lk_flags); 258 pm->flags |= TICKED; 259 mutex_exit(&pm->lk_flags); 260 261 rf_lock_mutex2(pm->raid->iodone_lock); 262 rf_signal_cond2(pm->raid->iodone_cv); /* XXX */ 263 rf_unlock_mutex2(pm->raid->iodone_lock); 264} 265 266/* 267 * This is where the parity cooling work (and rearming the callout if needed) 268 * is done; the raidio thread calls it when woken up, as by the above. 269 */ 270void 271rf_paritymap_checkwork(struct rf_paritymap *pm) 272{ 273 int i, zerop, progressp; 274 275 mutex_enter(&pm->lk_flags); 276 if (pm->flags & TICKED) { 277 zerop = progressp = 0; 278 279 pm->flags &= ~TICKED; 280 mutex_exit(&pm->lk_flags); 281 282 mutex_enter(&pm->lock); 283 for (i = 0; i < RF_PARITYMAP_NREG; i++) { 284 if (pm->current->state[i] < 0) { 285 progressp = 1; 286 pm->current->state[i]++; 287 if (pm->current->state[i] == 0) 288 zerop = 1; 289 } 290 } 291 292 if (progressp) 293 callout_schedule(&pm->ticker, 294 mstohz(pm->params.tickms)); 295 else { 296 mutex_enter(&pm->lk_flags); 297 pm->flags &= ~TICKING; 298 mutex_exit(&pm->lk_flags); 299 } 300 301 if (zerop) 302 rf_paritymap_write_locked(pm); 303 mutex_exit(&pm->lock); 304 } else 305 mutex_exit(&pm->lk_flags); 306} 307 308/* 309 * Set parity map parameters; used both to alter parameters on the fly and to 310 * establish their initial values. Note that setting a parameter to 0 means 311 * to leave the previous setting unchanged, and that if this is done for the 312 * initial setting of "regions", then a default value will be computed based 313 * on the RAID component size. 314 */ 315int 316rf_paritymap_set_params(struct rf_paritymap *pm, 317 const struct rf_pmparams *params, int todisk) 318{ 319 int cooldown, tickms; 320 u_int regions; 321 RF_RowCol_t col; 322 RF_ComponentLabel_t *clabel; 323 RF_Raid_t *raidPtr; 324 325 cooldown = params->cooldown != 0 326 ? params->cooldown : pm->params.cooldown; 327 tickms = params->tickms != 0 328 ? params->tickms : pm->params.tickms; 329 regions = params->regions != 0 330 ? params->regions : pm->params.regions; 331 332 if (cooldown < 1 || cooldown > 128) { 333 printf("raid%d: cooldown %d out of range\n", pm->raid->raidid, 334 cooldown); 335 return (-1); 336 } 337 if (tickms < 10) { 338 printf("raid%d: tick time %dms out of range\n", 339 pm->raid->raidid, tickms); 340 return (-1); 341 } 342 if (regions == 0) { 343 regions = rf_paritymap_nreg(pm->raid); 344 } else if (regions > RF_PARITYMAP_NREG) { 345 printf("raid%d: region count %u too large (more than %u)\n", 346 pm->raid->raidid, regions, RF_PARITYMAP_NREG); 347 return (-1); 348 } 349 350 /* XXX any currently warm parity will be used with the new tickms! */ 351 pm->params.cooldown = cooldown; 352 pm->params.tickms = tickms; 353 /* Apply the initial region count, but do not change it after that. */ 354 if (pm->params.regions == 0) 355 pm->params.regions = regions; 356 357 /* So that the newly set parameters can be tested: */ 358 pm->ctrs.nwrite = pm->ctrs.ncachesync = pm->ctrs.nclearing = 0; 359 360 if (todisk) { 361 raidPtr = pm->raid; 362 for (col = 0; col < raidPtr->numCol; col++) { 363 if (RF_DEAD_DISK(raidPtr->Disks[col].status)) 364 continue; 365 366 clabel = raidget_component_label(raidPtr, col); 367 clabel->parity_map_ntick = cooldown; 368 clabel->parity_map_tickms = tickms; 369 clabel->parity_map_regions = regions; 370 371 /* Don't touch the disk if it's been spared */ 372 if (clabel->status == rf_ds_spared) 373 continue; 374 375 raidflush_component_label(raidPtr, col); 376 } 377 378 /* handle the spares too... */ 379 for (col = 0; col < raidPtr->numSpare; col++) { 380 if (raidPtr->Disks[raidPtr->numCol+col].status == rf_ds_used_spare) { 381 clabel = raidget_component_label(raidPtr, raidPtr->numCol+col); 382 clabel->parity_map_ntick = cooldown; 383 clabel->parity_map_tickms = tickms; 384 clabel->parity_map_regions = regions; 385 raidflush_component_label(raidPtr, raidPtr->numCol+col); 386 } 387 } 388 } 389 return 0; 390} 391 392/* 393 * The number of regions may not be as many as can fit into the map, because 394 * when regions are too small, the overhead of setting parity map bits 395 * becomes significant in comparison to the actual I/O, while the 396 * corresponding gains in parity verification time become negligible. Thus, 397 * a minimum region size (defined above) is imposed. 398 * 399 * Note that, if the number of regions is less than the maximum, then some of 400 * the regions will be "fictional", corresponding to no actual disk; some 401 * parts of the code may process them as normal, but they can not ever be 402 * written to. 403 */ 404static u_int 405rf_paritymap_nreg(RF_Raid_t *raid) 406{ 407 daddr_t bytes_per_disk, nreg; 408 409 bytes_per_disk = raid->sectorsPerDisk << raid->logBytesPerSector; 410 nreg = bytes_per_disk / REGION_MINSIZE; 411 if (nreg > RF_PARITYMAP_NREG) 412 nreg = RF_PARITYMAP_NREG; 413 if (nreg < 1) 414 nreg = 1; 415 416 return (u_int)nreg; 417} 418 419/* 420 * Initialize a parity map given specific parameters. This neither reads nor 421 * writes the parity map config in the component labels; for that, see below. 422 */ 423int 424rf_paritymap_init(struct rf_paritymap *pm, RF_Raid_t *raid, 425 const struct rf_pmparams *params) 426{ 427 daddr_t rstripes; 428 struct rf_pmparams safe; 429 430 pm->raid = raid; 431 pm->params.regions = 0; 432 if (0 != rf_paritymap_set_params(pm, params, 0)) { 433 /* 434 * If the parameters are out-of-range, then bring the 435 * parity map up with something reasonable, so that 436 * the admin can at least go and fix it (or ignore it 437 * entirely). 438 */ 439 safe.cooldown = DFL_COOLDOWN; 440 safe.tickms = DFL_TICKMS; 441 safe.regions = 0; 442 443 if (0 != rf_paritymap_set_params(pm, &safe, 0)) 444 return (-1); 445 } 446 447 rstripes = howmany(raid->Layout.numStripe, pm->params.regions); 448 pm->region_size = rstripes * raid->Layout.dataSectorsPerStripe; 449 450 callout_init(&pm->ticker, CALLOUT_MPSAFE); 451 callout_setfunc(&pm->ticker, rf_paritymap_tick, pm); 452 pm->flags = 0; 453 454 pm->disk_boot = kmem_alloc(sizeof(struct rf_paritymap_ondisk), 455 KM_SLEEP); 456 pm->disk_now = kmem_alloc(sizeof(struct rf_paritymap_ondisk), 457 KM_SLEEP); 458 pm->current = kmem_zalloc(sizeof(struct rf_paritymap_current), 459 KM_SLEEP); 460 461 rf_paritymap_kern_read(pm->raid, pm->disk_boot); 462 memcpy(pm->disk_now, pm->disk_boot, sizeof(*pm->disk_now)); 463 464 mutex_init(&pm->lock, MUTEX_DEFAULT, IPL_NONE); 465 mutex_init(&pm->lk_flags, MUTEX_DEFAULT, IPL_SOFTCLOCK); 466 467 return 0; 468} 469 470/* 471 * Destroys a parity map; unless "force" is set, also cleans parity for any 472 * regions which were still in cooldown (but are not dirty on disk). 473 */ 474void 475rf_paritymap_destroy(struct rf_paritymap *pm, int force) 476{ 477 int i; 478 479 callout_halt(&pm->ticker, NULL); /* XXX stop? halt? */ 480 callout_destroy(&pm->ticker); 481 482 if (!force) { 483 for (i = 0; i < RF_PARITYMAP_NREG; i++) { 484 /* XXX check for > 0 ? */ 485 if (pm->current->state[i] < 0) 486 pm->current->state[i] = 0; 487 } 488 489 rf_paritymap_write_locked(pm); 490 } 491 492 mutex_destroy(&pm->lock); 493 mutex_destroy(&pm->lk_flags); 494 495 kmem_free(pm->disk_boot, sizeof(struct rf_paritymap_ondisk)); 496 kmem_free(pm->disk_now, sizeof(struct rf_paritymap_ondisk)); 497 kmem_free(pm->current, sizeof(struct rf_paritymap_current)); 498} 499 500/* 501 * Rewrite parity, taking parity map into account; this is the equivalent of 502 * the old rf_RewriteParity, and is likewise to be called from a suitable 503 * thread and shouldn't have multiple copies running in parallel and so on. 504 * 505 * Note that the fictional regions are "cleaned" in one shot, so that very 506 * small RAIDs (useful for testing) will not experience potentially severe 507 * regressions in rewrite time. 508 */ 509int 510rf_paritymap_rewrite(struct rf_paritymap *pm) 511{ 512 int i, ret_val = 0; 513 daddr_t reg_b, reg_e; 514 515 /* Process only the actual regions. */ 516 for (i = 0; i < pm->params.regions; i++) { 517 mutex_enter(&pm->lock); 518 if (isset(pm->disk_boot->bits, i)) { 519 mutex_exit(&pm->lock); 520 521 reg_b = i * pm->region_size; 522 reg_e = reg_b + pm->region_size; 523 if (reg_e > pm->raid->totalSectors) 524 reg_e = pm->raid->totalSectors; 525 526 if (rf_RewriteParityRange(pm->raid, reg_b, 527 reg_e - reg_b)) { 528 ret_val = 1; 529 if (pm->raid->waitShutdown) 530 return ret_val; 531 } else { 532 mutex_enter(&pm->lock); 533 clrbit(pm->disk_boot->bits, i); 534 rf_paritymap_write_locked(pm); 535 mutex_exit(&pm->lock); 536 } 537 } else { 538 mutex_exit(&pm->lock); 539 } 540 } 541 542 /* Now, clear the fictional regions, if any. */ 543 rf_paritymap_forceclean(pm); 544 rf_paritymap_write(pm); 545 546 return ret_val; 547} 548 549/* 550 * How to merge the on-disk parity maps when reading them in from the 551 * various components; returns whether they differ. In the case that 552 * they do differ, sets *dst to the union of *dst and *src. 553 * 554 * In theory, it should be safe to take the intersection (or just pick 555 * a single component arbitrarily), but the paranoid approach costs 556 * little. 557 * 558 * Appropriate locking, if any, is the responsibility of the caller. 559 */ 560int 561rf_paritymap_merge(struct rf_paritymap_ondisk *dst, 562 struct rf_paritymap_ondisk *src) 563{ 564 int i, discrep = 0; 565 566 for (i = 0; i < RF_PARITYMAP_NBYTE; i++) { 567 if (dst->bits[i] != src->bits[i]) 568 discrep = 1; 569 dst->bits[i] |= src->bits[i]; 570 } 571 572 return discrep; 573} 574 575/* 576 * Detach a parity map from its RAID. This is not meant to be applied except 577 * when unconfiguring the RAID after all I/O has been resolved, as otherwise 578 * an out-of-date parity map could be treated as current. 579 */ 580void 581rf_paritymap_detach(RF_Raid_t *raidPtr) 582{ 583 if (raidPtr->parity_map == NULL) 584 return; 585 586 rf_lock_mutex2(raidPtr->iodone_lock); 587 struct rf_paritymap *pm = raidPtr->parity_map; 588 raidPtr->parity_map = NULL; 589 rf_unlock_mutex2(raidPtr->iodone_lock); 590 /* XXXjld is that enough locking? Or too much? */ 591 rf_paritymap_destroy(pm, 0); 592 kmem_free(pm, sizeof(*pm)); 593} 594 595/* 596 * Is this RAID set ineligible for parity-map use due to not actually 597 * having any parity? (If so, rf_paritymap_attach is a no-op, but 598 * rf_paritymap_{get,set}_disable will still pointlessly act on the 599 * component labels.) 600 */ 601int 602rf_paritymap_ineligible(RF_Raid_t *raidPtr) 603{ 604 return raidPtr->Layout.map->faultsTolerated == 0; 605} 606 607/* 608 * Attach a parity map to a RAID set if appropriate. Includes 609 * configure-time processing of parity-map fields of component label. 610 */ 611void 612rf_paritymap_attach(RF_Raid_t *raidPtr, int force) 613{ 614 RF_RowCol_t col; 615 int pm_use, pm_zap; 616 int g_tickms, g_ntick, g_regions; 617 int good; 618 RF_ComponentLabel_t *clabel; 619 u_int flags, regions; 620 struct rf_pmparams params; 621 622 if (rf_paritymap_ineligible(raidPtr)) { 623 /* There isn't any parity. */ 624 return; 625 } 626 627 pm_use = 1; 628 pm_zap = 0; 629 g_tickms = DFL_TICKMS; 630 g_ntick = DFL_COOLDOWN; 631 g_regions = 0; 632 633 /* 634 * Collect opinions on the set config. If this is the initial 635 * config (raidctl -C), treat all labels as invalid, since 636 * there may be random data present. 637 */ 638 if (!force) { 639 for (col = 0; col < raidPtr->numCol; col++) { 640 if (RF_DEAD_DISK(raidPtr->Disks[col].status)) 641 continue; 642 clabel = raidget_component_label(raidPtr, col); 643 flags = clabel->parity_map_flags; 644 /* Check for use by non-parity-map kernel. */ 645 if (clabel->parity_map_modcount 646 != clabel->mod_counter) { 647 flags &= ~RF_PMLABEL_WASUSED; 648 } 649 650 if (flags & RF_PMLABEL_VALID) { 651 g_tickms = clabel->parity_map_tickms; 652 g_ntick = clabel->parity_map_ntick; 653 regions = clabel->parity_map_regions; 654 if (g_regions == 0) 655 g_regions = regions; 656 else if (g_regions != regions) { 657 pm_zap = 1; /* important! */ 658 } 659 660 if (flags & RF_PMLABEL_DISABLE) { 661 pm_use = 0; 662 } 663 if (!(flags & RF_PMLABEL_WASUSED)) { 664 pm_zap = 1; 665 } 666 } else { 667 pm_zap = 1; 668 } 669 } 670 } else { 671 pm_zap = 1; 672 } 673 674 /* Finally, create and attach the parity map. */ 675 if (pm_use) { 676 params.cooldown = g_ntick; 677 params.tickms = g_tickms; 678 params.regions = g_regions; 679 680 raidPtr->parity_map = kmem_alloc(sizeof(struct rf_paritymap), 681 KM_SLEEP); 682 if (0 != rf_paritymap_init(raidPtr->parity_map, raidPtr, 683 ¶ms)) { 684 /* It failed; do without. */ 685 kmem_free(raidPtr->parity_map, 686 sizeof(struct rf_paritymap)); 687 raidPtr->parity_map = NULL; 688 return; 689 } 690 691 if (g_regions == 0) 692 /* Pick up the autoconfigured region count. */ 693 g_regions = raidPtr->parity_map->params.regions; 694 695 if (pm_zap) { 696 good = raidPtr->parity_good && !force; 697 698 if (good) 699 rf_paritymap_forceclean(raidPtr->parity_map); 700 else 701 rf_paritymap_invalidate(raidPtr->parity_map); 702 /* This needs to be on disk before WASUSED is set. */ 703 rf_paritymap_write(raidPtr->parity_map); 704 } 705 } 706 707 /* Alter labels in-core to reflect the current view of things. */ 708 for (col = 0; col < raidPtr->numCol; col++) { 709 if (RF_DEAD_DISK(raidPtr->Disks[col].status)) 710 continue; 711 clabel = raidget_component_label(raidPtr, col); 712 713 if (pm_use) 714 flags = RF_PMLABEL_VALID | RF_PMLABEL_WASUSED; 715 else 716 flags = RF_PMLABEL_VALID | RF_PMLABEL_DISABLE; 717 718 clabel->parity_map_flags = flags; 719 clabel->parity_map_tickms = g_tickms; 720 clabel->parity_map_ntick = g_ntick; 721 clabel->parity_map_regions = g_regions; 722 raidflush_component_label(raidPtr, col); 723 } 724 /* Note that we're just in 'attach' here, and there won't 725 be any spare disks at this point. */ 726} 727 728/* 729 * For initializing the parity-map fields of a component label, both on 730 * initial creation and on reconstruct. */ 731void 732rf_paritymap_init_label(struct rf_paritymap *pm, RF_ComponentLabel_t *clabel) 733{ 734 if (pm != NULL) { 735 clabel->parity_map_flags = 736 RF_PMLABEL_VALID | RF_PMLABEL_WASUSED; 737 clabel->parity_map_tickms = pm->params.tickms; 738 clabel->parity_map_ntick = pm->params.cooldown; 739 /* 740 * XXXjld: If the number of regions is changed on disk, and 741 * then a new component is labeled before the next configure, 742 * then it will get the old value and they will conflict on 743 * the next boot (and the default will be used instead). 744 */ 745 clabel->parity_map_regions = pm->params.regions; 746 } else { 747 /* 748 * XXXjld: if the map is disabled, and all the components are 749 * replaced without an intervening unconfigure/reconfigure, 750 * then it will become enabled on the next unconfig/reconfig. 751 */ 752 } 753} 754 755 756/* Will the parity map be disabled next time? */ 757int 758rf_paritymap_get_disable(RF_Raid_t *raidPtr) 759{ 760 RF_ComponentLabel_t *clabel; 761 RF_RowCol_t col; 762 int dis; 763 764 dis = 0; 765 for (col = 0; col < raidPtr->numCol; col++) { 766 if (RF_DEAD_DISK(raidPtr->Disks[col].status)) 767 continue; 768 clabel = raidget_component_label(raidPtr, col); 769 if (clabel->parity_map_flags & RF_PMLABEL_DISABLE) 770 dis = 1; 771 } 772 for (col = 0; col < raidPtr->numSpare; col++) { 773 if (raidPtr->Disks[raidPtr->numCol+col].status != rf_ds_used_spare) 774 continue; 775 clabel = raidget_component_label(raidPtr, raidPtr->numCol+col); 776 if (clabel->parity_map_flags & RF_PMLABEL_DISABLE) 777 dis = 1; 778 } 779 780 return dis; 781} 782 783/* Set whether the parity map will be disabled next time. */ 784void 785rf_paritymap_set_disable(RF_Raid_t *raidPtr, int dis) 786{ 787 RF_ComponentLabel_t *clabel; 788 RF_RowCol_t col; 789 790 for (col = 0; col < raidPtr->numCol; col++) { 791 if (RF_DEAD_DISK(raidPtr->Disks[col].status)) 792 continue; 793 clabel = raidget_component_label(raidPtr, col); 794 if (dis) 795 clabel->parity_map_flags |= RF_PMLABEL_DISABLE; 796 else 797 clabel->parity_map_flags &= ~RF_PMLABEL_DISABLE; 798 raidflush_component_label(raidPtr, col); 799 } 800 801 /* update any used spares as well */ 802 for (col = 0; col < raidPtr->numSpare; col++) { 803 if (raidPtr->Disks[raidPtr->numCol+col].status != rf_ds_used_spare) 804 continue; 805 806 clabel = raidget_component_label(raidPtr, raidPtr->numCol+col); 807 if (dis) 808 clabel->parity_map_flags |= RF_PMLABEL_DISABLE; 809 else 810 clabel->parity_map_flags &= ~RF_PMLABEL_DISABLE; 811 raidflush_component_label(raidPtr, raidPtr->numCol+col); 812 } 813} 814