1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22/* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27#pragma ident "%Z%%M% %I% %E% SMI" 28 29/* 30 * NAME: raid_resync.c 31 * DESCRIPTION: RAID driver source file containing routines related to resync 32 * operation. 33 * ROUTINES PROVIDED FOR EXTERNAL USE: 34 * resync_request() - get resync lock if available 35 * release_resync_request() - relinquish resync lock 36 * erred_check_line() - provide write instruction for erred column 37 * init_pw_area() - initialize pre-write area 38 * copy_pw_area() - copy pre-write area from one device to another 39 */ 40 41#include <sys/param.h> 42#include <sys/systm.h> 43#include <sys/conf.h> 44#include <sys/file.h> 45#include <sys/user.h> 46#include <sys/uio.h> 47#include <sys/t_lock.h> 48#include <sys/buf.h> 49#include <sys/dkio.h> 50#include <sys/vtoc.h> 51#include <sys/kmem.h> 52#include <vm/page.h> 53#include <sys/sysmacros.h> 54#include <sys/types.h> 55#include <sys/mkdev.h> 56#include <sys/stat.h> 57#include <sys/open.h> 58#include <sys/disp.h> 59#include <sys/modctl.h> 60#include <sys/ddi.h> 61#include <sys/sunddi.h> 62#include <sys/lvm/md_raid.h> 63 64#include <sys/sysevent/eventdefs.h> 65#include <sys/sysevent/svm.h> 66 67#define NOCOLUMN (-1) 68 69extern md_set_t md_set[]; 70extern kmem_cache_t *raid_child_cache; 71extern kmem_cache_t *raid_parent_cache; 72extern md_resync_t md_cpr_resync; 73extern major_t md_major; 74extern void raid_parent_init(md_raidps_t *ps); 75extern void raid_child_init(md_raidcs_t *ps); 76 77/* 78 * NAMES: xor 79 * DESCRIPTION: Xor two chunks of data together. The data referenced by 80 * addr1 and addr2 are xor'd together for size and written into 81 * addr1. 82 * PARAMETERS: caddr_t addr1 - address of first chunk of data and destination 83 * caddr_t addr2 - address of second chunk of data 84 * u_int size - number to xor 85 */ 86static void 87xor(caddr_t addr1, caddr_t addr2, size_t size) 88{ 89 while (size--) { 90 *addr1++ ^= *addr2++; 91 } 92} 93 94/* 95 * NAME: release_resync_request 96 * 97 * DESCRIPTION: Release resync active flag and reset unit values accordingly. 98 * 99 * PARAMETERS: minor_t mnum - minor number identity of metadevice 100 * 101 * LOCKS: Expects Unit Writer Lock to be held across call. 102 */ 103void 104release_resync_request( 105 minor_t mnum 106) 107{ 108 mr_unit_t *un; 109 110 un = MD_UNIT(mnum); 111 ASSERT(un != NULL); 112 113 un->c.un_status &= ~MD_UN_RESYNC_ACTIVE; 114 115 un->un_column[un->un_resync_index].un_devflags &= ~MD_RAID_RESYNC; 116 un->un_column[un->un_resync_index].un_devflags &= ~MD_RAID_RESYNC_ERRED; 117 un->un_column[un->un_resync_index].un_devflags &= 118 ~(MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC); 119 120 un->un_resync_line_index = 0; 121 un->un_resync_index = NOCOLUMN; 122} 123 124/* 125 * NAME: resync_request 126 * 127 * DESCRIPTION: Request resync. If resync is available (no current active 128 * resync), mark unit as resync active and initialize. 129 * 130 * PARAMETERS: minor_t mnum - minor number identity of metadevice 131 * int column_index - index of column to resync 132 * int copysize - copysize of ioctl request 133 * md_error_t *ep - error output parameter 134 * 135 * RETURN: 0 if resync is available, 1 otherwise. 136 * 137 * LOCKS: Expects Unit Writer Lock to be held across call. 138 * 139 * NOTE: Sets un_resync_copysize to the input value in copysize, the 140 * existing value from an incomplete previous resync with an 141 * input value in copysize, or the lesser of the unit segment 142 * size or maxio. 143 */ 144/* ARGSUSED */ 145int 146resync_request( 147 minor_t mnum, 148 int column_index, 149 size_t copysize, 150 md_error_t *mde 151) 152{ 153 mr_unit_t *un; 154 155 un = MD_UNIT(mnum); 156 ASSERT(un != NULL); 157 158 /* if resync or grow not already active, set resync active for unit */ 159 if (! (un->un_column[column_index].un_devflags & MD_RAID_RESYNC) && 160 ((un->c.un_status & MD_UN_RESYNC_ACTIVE) || 161 (un->c.un_status & MD_UN_GROW_PENDING) || 162 (un->un_column[column_index].un_devstate & RCS_RESYNC))) { 163 if (mde) 164 return (mdmderror(mde, MDE_GROW_DELAYED, mnum)); 165 return (1); 166 } 167 168 if (un->un_column[column_index].un_devstate & 169 (RCS_ERRED | RCS_LAST_ERRED)) 170 un->un_column[column_index].un_devflags |= MD_RAID_DEV_ERRED; 171 else 172 un->un_column[column_index].un_devflags &= ~MD_RAID_DEV_ERRED; 173 un->c.un_status |= MD_UN_RESYNC_ACTIVE; 174 un->un_resync_index = column_index; 175 un->un_resync_line_index = 0; 176 raid_set_state(un, column_index, RCS_RESYNC, 0); 177 178 return (0); 179} 180 181/* 182 * Name: alloc_bufs 183 * 184 * DESCRIPTION: Initialize resync_comp buffers. 185 * 186 * PARAMETERS: size_t bsize - size of buffer 187 * buf_t *read_buf1 - first read buf 188 * buf_t *read_buf2 - second read buf 189 * buf_t *write_buf - write buf 190 */ 191static void 192alloc_bufs(md_raidcs_t *cs, size_t bsize) 193{ 194 /* allocate buffers, write uses the read_buf1 buffer */ 195 cs->cs_dbuffer = kmem_zalloc(bsize, KM_SLEEP); 196 cs->cs_pbuffer = kmem_zalloc(bsize, KM_SLEEP); 197} 198 199void 200init_buf(buf_t *bp, int flags, size_t size) 201{ 202 /* zero buf */ 203 bzero((caddr_t)bp, sizeof (buf_t)); 204 205 /* set b_back and b_forw to point back to buf */ 206 bp->b_back = bp; 207 bp->b_forw = bp; 208 209 /* set flags size */ 210 bp->b_flags = flags; 211 bp->b_bufsize = size; 212 bp->b_offset = -1; 213 214 /* setup semaphores */ 215 sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 216 sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL); 217} 218 219void 220destroy_buf(buf_t *bp) 221{ 222 sema_destroy(&bp->b_io); 223 sema_destroy(&bp->b_sem); 224} 225 226void 227reset_buf(buf_t *bp, int flags, size_t size) 228{ 229 destroy_buf(bp); 230 init_buf(bp, flags, size); 231} 232 233/* 234 * NAME: free_bufs 235 * 236 * DESCRIPTION: Free up buffers. 237 * 238 * PARAMETERS: size_t bsize - size of buffer 239 * buf_t *read_buf1 - first read buf 240 * buf_t *read_buf2 - second read buf 241 * buf_t *write_buf - write buf 242 */ 243static void 244free_bufs(size_t bsize, md_raidcs_t *cs) 245{ 246 kmem_free(cs->cs_dbuffer, bsize); 247 kmem_free(cs->cs_pbuffer, bsize); 248} 249 250/* 251 * NAME: init_pw_area 252 * 253 * DESCRIPTION: Initialize pre-write area to all zeros. 254 * 255 * PARAMETERS: minor_t mnum - minor number identity of metadevice 256 * md_dev64_t dev_to_write - index of column to resync 257 * int column_index - index of column to resync 258 * 259 * RETURN: 1 if write error on resync device, otherwise 0 260 * 261 * LOCKS: Expects Unit Reader Lock to be held across call. 262 */ 263int 264init_pw_area( 265 mr_unit_t *un, 266 md_dev64_t dev_to_write, 267 diskaddr_t pwstart, 268 uint_t col 269) 270{ 271 buf_t buf; 272 caddr_t databuffer; 273 size_t copysize; 274 size_t bsize; 275 int error = 0; 276 int i; 277 278 ASSERT(un != NULL); 279 ASSERT(un->un_column[col].un_devflags & MD_RAID_DEV_ISOPEN); 280 281 bsize = un->un_iosize; 282 copysize = dbtob(bsize); 283 databuffer = kmem_zalloc(copysize, KM_SLEEP); 284 init_buf(&buf, (B_BUSY | B_WRITE), copysize); 285 286 for (i = 0; i < un->un_pwcnt; i++) { 287 /* magic field is 0 for 4.0 compatability */ 288 RAID_FILLIN_RPW(databuffer, un, 0, 0, 289 0, 0, 0, 290 0, col, 0); 291 buf.b_un.b_addr = (caddr_t)databuffer; 292 buf.b_edev = md_dev64_to_dev(dev_to_write); 293 buf.b_bcount = dbtob(bsize); 294 buf.b_lblkno = pwstart + (i * un->un_iosize); 295 296 /* write buf */ 297 (void) md_call_strategy(&buf, MD_STR_NOTTOP, NULL); 298 299 if (biowait(&buf)) { 300 error = 1; 301 break; 302 } 303 reset_buf(&buf, (B_BUSY | B_WRITE), copysize); 304 } /* for */ 305 306 destroy_buf(&buf); 307 kmem_free(databuffer, copysize); 308 309 return (error); 310} 311 312/* 313 * NAME: raid_open_alt 314 * 315 * DESCRIPTION: opens the alt device used during resync. 316 * 317 * PARAMETERS: un 318 * 319 * RETURN: 0 - successfull 320 * 1 - failed 321 * 322 * LOCKS: requires unit writer lock 323 */ 324 325static int 326raid_open_alt(mr_unit_t *un, int index) 327{ 328 mr_column_t *column = &un->un_column[index]; 329 set_t setno = MD_MIN2SET(MD_SID(un)); 330 side_t side = mddb_getsidenum(setno); 331 md_dev64_t tmpdev = column->un_alt_dev; 332 333 /* correct locks */ 334 ASSERT(UNIT_WRITER_HELD(un)); 335 /* not already writing to */ 336 ASSERT(! (column->un_devflags & MD_RAID_WRITE_ALT)); 337 /* not already open */ 338 ASSERT(! (column->un_devflags & MD_RAID_ALT_ISOPEN)); 339 340 if (tmpdev != NODEV64) { 341 /* 342 * Open by device id. We use orig_key since alt_dev 343 * has been set by the caller to be the same as orig_dev. 344 */ 345 if ((md_getmajor(tmpdev) != md_major) && 346 md_devid_found(setno, side, column->un_orig_key) == 1) { 347 tmpdev = md_resolve_bydevid(MD_SID(un), tmpdev, 348 column->un_orig_key); 349 } 350 if (md_layered_open(MD_SID(un), &tmpdev, MD_OFLG_NULL)) { 351 /* failed open */ 352 column->un_alt_dev = tmpdev; 353 return (1); 354 } else { 355 /* open suceeded */ 356 column->un_alt_dev = tmpdev; 357 column->un_devflags |= MD_RAID_ALT_ISOPEN; 358 return (0); 359 } 360 } else 361 /* no alt device to open */ 362 return (1); 363} 364 365 366/* 367 * NAME: raid_close_alt 368 * 369 * DESCRIPTION: closes the alt device used during resync. 370 * 371 * PARAMETERS: un - raid unit structure 372 * indes - raid column 373 * 374 * RETURN: none 375 * 376 * LOCKS: requires unit writer lock 377 */ 378 379static void 380raid_close_alt(mr_unit_t *un, int index) 381{ 382 mr_column_t *column = &un->un_column[index]; 383 md_dev64_t tmpdev = column->un_alt_dev; 384 385 ASSERT(UNIT_WRITER_HELD(un)); /* correct locks */ 386 ASSERT(! (column->un_devflags & MD_RAID_WRITE_ALT)); /* not writing */ 387 ASSERT(column->un_devflags & MD_RAID_ALT_ISOPEN); /* already open */ 388 ASSERT(tmpdev != NODEV64); /* is a device */ 389 390 md_layered_close(column->un_alt_dev, MD_OFLG_NULL); 391 column->un_devflags &= ~MD_RAID_ALT_ISOPEN; 392 column->un_alt_dev = NODEV64; 393} 394 395static diskaddr_t 396raid_resync_fillin_cs(diskaddr_t line, uint_t line_count, md_raidcs_t *cs) 397{ 398 mr_unit_t *un = cs->cs_un; 399 400 ASSERT(line < un->un_segsincolumn); 401 402 cs->cs_line = line; 403 cs->cs_blkno = line * un->un_segsize; 404 cs->cs_blkcnt = un->un_segsize * line_count; 405 cs->cs_lastblk = cs->cs_blkno + cs->cs_blkcnt - 1; 406 raid_line_reader_lock(cs, 1); 407 408 return (line + line_count); 409} 410 411/* states returned by raid_resync_line */ 412 413#define RAID_RESYNC_OKAY 0 414#define RAID_RESYNC_RDERROR 2 415#define RAID_RESYNC_WRERROR 3 416#define RAID_RESYNC_STATE 4 417 418int 419raid_resync_region( 420 md_raidcs_t *cs, 421 diskaddr_t line, 422 uint_t line_count, 423 int *single_read, 424 hs_cmds_t *hs_state, 425 int *err_col, 426 md_dev64_t dev_to_write, 427 diskaddr_t write_dev_start) 428{ 429 mr_unit_t *un = cs->cs_un; 430 buf_t *readb1 = &cs->cs_pbuf; 431 buf_t *readb2 = &cs->cs_dbuf; 432 buf_t *writeb = &cs->cs_hbuf; 433 diskaddr_t off; 434 size_t tcopysize; 435 size_t copysize; 436 int resync; 437 int quit = 0; 438 size_t leftinseg; 439 int i; 440 441 resync = un->un_resync_index; 442 off = line * un->un_segsize; 443 copysize = un->un_resync_copysize; 444 445 /* find first column to read, skip resync column */ 446 447 leftinseg = un->un_segsize * line_count; 448 while (leftinseg) { 449 450 /* truncate last chunk to end if needed */ 451 if (copysize > leftinseg) 452 tcopysize = leftinseg; 453 else 454 tcopysize = copysize; 455 leftinseg -= tcopysize; 456 457 /* 458 * One of two scenarios: 459 * 1) resync device with hotspare ok. This implies that 460 * we are copying from a good hotspare to a new good original 461 * device. In this case readb1 is used as the buf for 462 * the read from the hotspare device. 463 * 2) For all other cases, including when in case 1) and an 464 * error is detected on the (formerly good) hotspare device, 465 * readb1 is used for the initial read. readb2 is used for 466 * all other reads. Each readb2 buffer is xor'd into the 467 * readb1 buffer. 468 * 469 * In both cases, writeb is used for the write, using readb1's 470 * buffer. 471 * 472 * For case 2, we could alternatively perform the read for all 473 * devices concurrently to improve performance. However, 474 * this could diminish performance for concurrent reads and 475 * writes if low on memory. 476 */ 477 478 /* read first buffer */ 479 480 /* switch to read from good columns if single_read */ 481 if (*single_read) { 482 if (un->un_column[resync].un_dev == NODEV64) 483 return (RAID_RESYNC_RDERROR); 484 485 reset_buf(readb1, B_READ | B_BUSY, 486 dbtob(copysize)); 487 readb1->b_bcount = dbtob(tcopysize); 488 readb1->b_un.b_addr = cs->cs_pbuffer; 489 readb1->b_edev = md_dev64_to_dev( 490 un->un_column[resync].un_dev); 491 readb1->b_lblkno = 492 un->un_column[resync].un_devstart + off; 493 (void) md_call_strategy(readb1, MD_STR_NOTTOP, NULL); 494 if (biowait(readb1)) { 495 /* 496 * at this point just start rebuilding the 497 * data and go on since the other column 498 * are ok. 499 */ 500 *single_read = 0; 501 *hs_state = HS_BAD; 502 un->un_column[resync].un_devflags &= 503 ~MD_RAID_COPY_RESYNC; 504 un->un_column[resync].un_devflags |= 505 MD_RAID_REGEN_RESYNC; 506 } 507 } 508 509 /* if reading from all non-resync columns */ 510 if (!*single_read) { 511 /* for each column, read line and xor into write buf */ 512 bzero(cs->cs_pbuffer, dbtob(tcopysize)); 513 for (i = 0; i < un->un_totalcolumncnt; i++) { 514 515 if (un->un_column[i].un_dev == NODEV64) 516 return (RAID_RESYNC_RDERROR); 517 518 /* skip column getting resync'ed */ 519 if (i == resync) { 520 continue; 521 } 522 reset_buf(readb1, B_READ | B_BUSY, 523 dbtob(copysize)); 524 readb1->b_bcount = dbtob(tcopysize); 525 readb1->b_un.b_addr = cs->cs_dbuffer; 526 readb1->b_edev = md_dev64_to_dev( 527 un->un_column[i].un_dev); 528 readb1->b_lblkno = 529 un->un_column[i].un_devstart + off; 530 531 (void) md_call_strategy(readb1, MD_STR_NOTTOP, 532 NULL); 533 if (biowait(readb1)) { 534 *err_col = i; 535 quit = RAID_RESYNC_RDERROR; 536 } 537 538 if (quit) 539 return (quit); 540 541 /* xor readb2 data into readb1 */ 542 xor(cs->cs_pbuffer, readb1->b_un.b_addr, 543 dbtob(tcopysize)); 544 } /* for */ 545 } 546 547 reset_buf(writeb, B_WRITE | B_BUSY, 548 dbtob(copysize)); 549 writeb->b_bcount = dbtob(tcopysize); 550 writeb->b_un.b_addr = cs->cs_pbuffer; 551 writeb->b_lblkno = off + write_dev_start; 552 writeb->b_edev = md_dev64_to_dev(dev_to_write); 553 554 /* set write block number and perform the write */ 555 (void) md_call_strategy(writeb, MD_STR_NOTTOP, NULL); 556 if (biowait(writeb)) { 557 if (*single_read == 0) { 558 *hs_state = HS_BAD; 559 } 560 return (RAID_RESYNC_WRERROR); 561 } 562 writeb->b_blkno += tcopysize; 563 off += tcopysize; 564 } /* while */ 565 sema_destroy(&readb1->b_io); 566 sema_destroy(&readb1->b_sem); 567 sema_destroy(&readb2->b_io); 568 sema_destroy(&readb2->b_sem); 569 sema_destroy(&writeb->b_io); 570 sema_destroy(&writeb->b_sem); 571 return (RAID_RESYNC_OKAY); 572} 573 574/* 575 * NAME: resync_comp 576 * 577 * DESCRIPTION: Resync the component. Iterate through the raid unit a line at 578 * a time, read from the good device(s) and write the resync 579 * device. 580 * 581 * PARAMETERS: minor_t mnum - minor number identity of metadevice 582 * md_raidcs_t *cs - child save struct 583 * 584 * RETURN: 0 - successfull 585 * 1 - failed 586 * -1 - aborted 587 * 588 * LOCKS: Expects Unit Reader Lock to be held across call. Acquires and 589 * releases Line Reader Lock for per-line I/O. 590 */ 591static void 592resync_comp( 593 minor_t mnum, 594 md_raidcs_t *cs 595) 596{ 597 mdi_unit_t *ui; 598 mr_unit_t *un; 599 mddb_recid_t recids[2]; 600 rcs_state_t state; 601 md_dev64_t dev_to_write; 602 diskaddr_t write_pwstart; 603 diskaddr_t write_devstart; 604 md_dev64_t dev; 605 int resync; 606 int i; 607 int single_read = 0; 608 int err; 609 int err_cnt; 610 int last_err; 611 diskaddr_t line; 612 diskaddr_t segsincolumn; 613 size_t bsize; 614 uint_t line_count; 615 616 /* 617 * hs_state is the state of the hotspare on the column being resynced 618 * dev_state is the state of the resync target 619 */ 620 hs_cmds_t hs_state; 621 int err_col = -1; 622 diskaddr_t resync_end_pos; 623 624 ui = MDI_UNIT(mnum); 625 ASSERT(ui != NULL); 626 627 un = cs->cs_un; 628 629 md_unit_readerexit(ui); 630 un = (mr_unit_t *)md_io_writerlock(ui); 631 un = (mr_unit_t *)md_unit_writerlock(ui); 632 resync = un->un_resync_index; 633 state = un->un_column[resync].un_devstate; 634 line_count = un->un_maxio / un->un_segsize; 635 if (line_count == 0) { /* handle the case of segsize > maxio */ 636 line_count = 1; 637 bsize = un->un_maxio; 638 } else 639 bsize = line_count * un->un_segsize; 640 641 un->un_resync_copysize = (uint_t)bsize; 642 643 ASSERT(un->c.un_status & MD_UN_RESYNC_ACTIVE); 644 ASSERT(un->un_column[resync].un_devflags & 645 (MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC)); 646 647 /* 648 * if the column is not in resync then just bail out. 649 */ 650 if (! (un->un_column[resync].un_devstate & RCS_RESYNC)) { 651 md_unit_writerexit(ui); 652 md_io_writerexit(ui); 653 un = (mr_unit_t *)md_unit_readerlock(ui); 654 return; 655 } 656 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_START, SVM_TAG_METADEVICE, 657 MD_UN2SET(un), MD_SID(un)); 658 659 /* identify device to write and its start block */ 660 661 if (un->un_column[resync].un_alt_dev != NODEV64) { 662 if (raid_open_alt(un, resync)) { 663 raid_set_state(un, resync, state, 0); 664 md_unit_writerexit(ui); 665 md_io_writerexit(ui); 666 un = (mr_unit_t *)md_unit_readerlock(ui); 667 cmn_err(CE_WARN, "md: %s: %s open failed replace " 668 "terminated", md_shortname(MD_SID(un)), 669 md_devname(MD_UN2SET(un), 670 un->un_column[resync].un_alt_dev, 671 NULL, 0)); 672 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_FAILED, 673 SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); 674 return; 675 } 676 ASSERT(un->un_column[resync].un_devflags & MD_RAID_COPY_RESYNC); 677 dev_to_write = un->un_column[resync].un_alt_dev; 678 write_devstart = un->un_column[resync].un_alt_devstart; 679 write_pwstart = un->un_column[resync].un_alt_pwstart; 680 if (un->un_column[resync].un_devflags & MD_RAID_DEV_ERRED) { 681 single_read = 0; 682 hs_state = HS_BAD; 683 } else { 684 hs_state = HS_FREE; 685 single_read = 1; 686 } 687 un->un_column[resync].un_devflags |= MD_RAID_WRITE_ALT; 688 } else { 689 dev_to_write = un->un_column[resync].un_dev; 690 write_devstart = un->un_column[resync].un_devstart; 691 write_pwstart = un->un_column[resync].un_pwstart; 692 single_read = 0; 693 hs_state = HS_FREE; 694 ASSERT(un->un_column[resync].un_devflags & 695 MD_RAID_REGEN_RESYNC); 696 } 697 698 alloc_bufs(cs, dbtob(bsize)); 699 /* initialize pre-write area */ 700 if (init_pw_area(un, dev_to_write, write_pwstart, resync)) { 701 un->un_column[resync].un_devflags &= ~MD_RAID_WRITE_ALT; 702 if (un->un_column[resync].un_alt_dev != NODEV64) { 703 raid_close_alt(un, resync); 704 } 705 md_unit_writerexit(ui); 706 md_io_writerexit(ui); 707 if (dev_to_write == un->un_column[resync].un_dev) 708 hs_state = HS_BAD; 709 err = RAID_RESYNC_WRERROR; 710 goto resync_comp_error; 711 } 712 713 un->c.un_status &= ~MD_UN_RESYNC_CANCEL; 714 segsincolumn = un->un_segsincolumn; 715 err_cnt = raid_state_cnt(un, RCS_ERRED | RCS_LAST_ERRED); 716 717 /* commit the record */ 718 719 md_unit_writerexit(ui); 720 md_io_writerexit(ui); 721 722 723 /* resync each line of the unit */ 724 for (line = 0; line < segsincolumn; line += line_count) { 725 /* 726 * Update address range in child struct and lock the line. 727 * 728 * The reader version of the line lock is used since only 729 * resync will use data beyond un_resync_line_index on the 730 * resync device. 731 */ 732 un = (mr_unit_t *)md_io_readerlock(ui); 733 if (line + line_count > segsincolumn) 734 line_count = segsincolumn - line; 735 resync_end_pos = raid_resync_fillin_cs(line, line_count, cs); 736 (void) md_unit_readerlock(ui); 737 ASSERT(un->un_resync_line_index == resync_end_pos); 738 err = raid_resync_region(cs, line, (int)line_count, 739 &single_read, &hs_state, &err_col, dev_to_write, 740 write_devstart); 741 742 /* 743 * if the column failed to resync then stop writing directly 744 * to the column. 745 */ 746 if (err) 747 un->un_resync_line_index = 0; 748 749 md_unit_readerexit(ui); 750 raid_line_exit(cs); 751 md_io_readerexit(ui); 752 753 if (err) 754 break; 755 756 un = (mr_unit_t *)md_unit_writerlock(ui); 757 758 if (raid_state_cnt(un, RCS_ERRED | RCS_LAST_ERRED) != err_cnt) { 759 err = RAID_RESYNC_STATE; 760 md_unit_writerexit(ui); 761 break; 762 } 763 md_unit_writerexit(ui); 764 } /* for */ 765 766resync_comp_error: 767 un = (mr_unit_t *)md_io_writerlock(ui); 768 (void) md_unit_writerlock(ui); 769 un->un_column[resync].un_devflags &= ~MD_RAID_WRITE_ALT; 770 771 recids[0] = 0; 772 recids[1] = 0; 773 switch (err) { 774 /* 775 * successful resync 776 */ 777 case RAID_RESYNC_OKAY: 778 /* initialize pre-write area */ 779 if ((un->un_column[resync].un_orig_dev != NODEV64) && 780 (un->un_column[resync].un_orig_dev == 781 un->un_column[resync].un_alt_dev)) { 782 /* 783 * replacing a hot spare 784 * release the hot spare, which will close the hotspare 785 * and mark it closed. 786 */ 787 raid_hs_release(hs_state, un, &recids[0], resync); 788 /* 789 * make the resync target the main device and 790 * mark open 791 */ 792 un->un_column[resync].un_hs_id = 0; 793 un->un_column[resync].un_dev = 794 un->un_column[resync].un_orig_dev; 795 un->un_column[resync].un_devstart = 796 un->un_column[resync].un_orig_devstart; 797 un->un_column[resync].un_pwstart = 798 un->un_column[resync].un_orig_pwstart; 799 un->un_column[resync].un_devflags |= MD_RAID_DEV_ISOPEN; 800 /* alt becomes the device so don't close it */ 801 un->un_column[resync].un_devflags &= ~MD_RAID_WRITE_ALT; 802 un->un_column[resync].un_devflags &= 803 ~MD_RAID_ALT_ISOPEN; 804 un->un_column[resync].un_alt_dev = NODEV64; 805 } 806 raid_set_state(un, resync, RCS_OKAY, 0); 807 break; 808 809 case RAID_RESYNC_WRERROR: 810 if (HOTSPARED(un, resync) && single_read && 811 (un->un_column[resync].un_devflags & MD_RAID_COPY_RESYNC)) { 812 /* 813 * this is the case where the resync target is 814 * bad but there is a good hotspare. In this 815 * case keep the hotspare, and go back to okay. 816 */ 817 raid_set_state(un, resync, RCS_OKAY, 0); 818 cmn_err(CE_WARN, "md: %s: %s write error, replace " 819 "terminated", md_shortname(MD_SID(un)), 820 md_devname(MD_UN2SET(un), 821 un->un_column[resync].un_orig_dev, 822 NULL, 0)); 823 break; 824 } 825 if (HOTSPARED(un, resync)) { 826 raid_hs_release(hs_state, un, &recids[0], resync); 827 un->un_column[resync].un_dev = 828 un->un_column[resync].un_orig_dev; 829 un->un_column[resync].un_devstart = 830 un->un_column[resync].un_orig_devstart; 831 un->un_column[resync].un_pwstart = 832 un->un_column[resync].un_orig_pwstart; 833 } 834 raid_set_state(un, resync, RCS_ERRED, 0); 835 if (un->un_column[resync].un_devflags & MD_RAID_REGEN_RESYNC) 836 dev = un->un_column[resync].un_dev; 837 else 838 dev = un->un_column[resync].un_alt_dev; 839 cmn_err(CE_WARN, "md: %s: %s write error replace terminated", 840 md_shortname(MD_SID(un)), md_devname(MD_UN2SET(un), dev, 841 NULL, 0)); 842 break; 843 844 case RAID_RESYNC_STATE: 845 if (HOTSPARED(un, resync) && single_read && 846 (un->un_column[resync].un_devflags & MD_RAID_COPY_RESYNC)) { 847 /* 848 * this is the case where the resync target is 849 * bad but there is a good hotspare. In this 850 * case keep the hotspare, and go back to okay. 851 */ 852 raid_set_state(un, resync, RCS_OKAY, 0); 853 cmn_err(CE_WARN, "md: %s: needs maintenance, replace " 854 "terminated", md_shortname(MD_SID(un))); 855 break; 856 } 857 if (HOTSPARED(un, resync)) { 858 raid_hs_release(hs_state, un, &recids[0], resync); 859 un->un_column[resync].un_dev = 860 un->un_column[resync].un_orig_dev; 861 un->un_column[resync].un_devstart = 862 un->un_column[resync].un_orig_devstart; 863 un->un_column[resync].un_pwstart = 864 un->un_column[resync].un_orig_pwstart; 865 } 866 break; 867 case RAID_RESYNC_RDERROR: 868 if (HOTSPARED(un, resync)) { 869 raid_hs_release(hs_state, un, &recids[0], resync); 870 un->un_column[resync].un_dev = 871 un->un_column[resync].un_orig_dev; 872 un->un_column[resync].un_devstart = 873 un->un_column[resync].un_orig_devstart; 874 un->un_column[resync].un_pwstart = 875 un->un_column[resync].un_orig_pwstart; 876 } 877 878 if ((resync != err_col) && (err_col != NOCOLUMN)) 879 raid_set_state(un, err_col, RCS_ERRED, 0); 880 break; 881 882 default: 883 ASSERT(0); 884 } 885 if (un->un_column[resync].un_alt_dev != NODEV64) { 886 raid_close_alt(un, resync); 887 } 888 889 /* 890 * an io operation may have gotten an error and placed a 891 * column in erred state. This will abort the resync, which 892 * will end up in last erred. This is ugly so go through 893 * the columns and do cleanup 894 */ 895 err_cnt = 0; 896 last_err = 0; 897 for (i = 0; i < un->un_totalcolumncnt; i++) { 898 if (un->un_column[i].un_devstate & RCS_OKAY) 899 continue; 900 if (i == resync) { 901 raid_set_state(un, i, RCS_ERRED, 1); 902 err_cnt++; 903 } else if (err == RAID_RESYNC_OKAY) { 904 err_cnt++; 905 } else { 906 raid_set_state(un, i, RCS_LAST_ERRED, 1); 907 last_err++; 908 } 909 } 910 if ((err_cnt == 0) && (last_err == 0)) 911 un->un_state = RUS_OKAY; 912 else if (last_err == 0) { 913 un->un_state = RUS_ERRED; 914 ASSERT(err_cnt == 1); 915 } else if (last_err > 0) { 916 un->un_state = RUS_LAST_ERRED; 917 } 918 919 uniqtime32(&un->un_column[resync].un_devtimestamp); 920 un->un_resync_copysize = 0; 921 un->un_column[resync].un_devflags &= 922 ~(MD_RAID_REGEN_RESYNC | MD_RAID_COPY_RESYNC); 923 raid_commit(un, recids); 924 /* release unit writer lock and acquire unit reader lock */ 925 md_unit_writerexit(ui); 926 md_io_writerexit(ui); 927 (void) md_unit_readerlock(ui); 928 if (err == RAID_RESYNC_OKAY) { 929 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_DONE, 930 SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); 931 } else { 932 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_FAILED, 933 SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); 934 if (raid_state_cnt(un, RCS_ERRED | 935 RCS_LAST_ERRED) > 1) { 936 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, 937 SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); 938 } else { 939 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, 940 SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); 941 } 942 } 943 944 free_bufs(dbtob(bsize), cs); 945} 946 947/* 948 * NAME: resync_unit 949 * 950 * DESCRIPTION: Start of RAID resync thread. Perform up front allocations, 951 * initializations and consistency checking, then call 952 * resync_comp to resync the component. 953 * 954 * PARAMETERS: minor_t mnum - minor number identity of metadevice 955 * 956 * LOCKS: Acquires and releases Unit Reader Lock to maintain unit 957 * existence during resync. 958 * Acquires and releases the resync count lock for cpr. 959 */ 960static void 961resync_unit( 962 minor_t mnum 963) 964{ 965 mdi_unit_t *ui; 966 mr_unit_t *un; 967 md_raidps_t *ps = NULL; 968 md_raidcs_t *cs = NULL; 969 int resync; 970 971 /* 972 * Increment the raid resync count for cpr 973 */ 974 mutex_enter(&md_cpr_resync.md_resync_mutex); 975 md_cpr_resync.md_raid_resync++; 976 mutex_exit(&md_cpr_resync.md_resync_mutex); 977 978 ui = MDI_UNIT(mnum); 979 ASSERT(ui != NULL); 980 981 un = (mr_unit_t *)md_unit_readerlock(ui); 982 983 /* 984 * Allocate parent and child memory pool structures. These are 985 * only needed to lock raid lines, so only the minimal 986 * required fields for this purpose are initialized. 987 * 988 * Do not use the reserve pool for resync. 989 */ 990 ps = kmem_cache_alloc(raid_parent_cache, MD_ALLOCFLAGS); 991 raid_parent_init(ps); 992 cs = kmem_cache_alloc(raid_child_cache, MD_ALLOCFLAGS); 993 raid_child_init(cs); 994 resync = un->un_resync_index; 995 ps->ps_un = un; 996 ps->ps_ui = ui; 997 ps->ps_flags = MD_RPS_INUSE; 998 cs->cs_ps = ps; 999 cs->cs_un = un; 1000 1001 ASSERT(!(un->un_column[resync].un_devflags & MD_RAID_WRITE_ALT)); 1002 1003 resync_comp(mnum, cs); 1004 release_resync_request(mnum); 1005 1006 kmem_cache_free(raid_child_cache, cs); 1007 kmem_cache_free(raid_parent_cache, ps); 1008 1009 md_unit_readerexit(ui); 1010 1011 /* close raid unit */ 1012 (void) raid_internal_close(mnum, OTYP_LYR, 0, 0); 1013 1014 /* poke hot spare daemon */ 1015 (void) raid_hotspares(); 1016 1017 /* 1018 * Decrement the raid resync count for cpr 1019 */ 1020 mutex_enter(&md_cpr_resync.md_resync_mutex); 1021 md_cpr_resync.md_raid_resync--; 1022 mutex_exit(&md_cpr_resync.md_resync_mutex); 1023 1024 thread_exit(); 1025} 1026 1027/* 1028 * NAME: raid_resync_unit 1029 * 1030 * DESCRIPTION: RAID metadevice specific resync routine. 1031 * Open the unit and start resync_unit as a separate thread. 1032 * 1033 * PARAMETERS: minor_t mnum - minor number identity of metadevice 1034 * md_error_t *ep - output error parameter 1035 * 1036 * RETURN: On error return 1 or set ep to nonzero, otherwise return 0. 1037 * 1038 * LOCKS: Acquires and releases Unit Writer Lock. 1039 */ 1040int 1041raid_resync_unit( 1042 minor_t mnum, 1043 md_error_t *ep 1044) 1045{ 1046 mdi_unit_t *ui; 1047 set_t setno = MD_MIN2SET(mnum); 1048 mr_unit_t *un; 1049 1050 ui = MDI_UNIT(mnum); 1051 un = MD_UNIT(mnum); 1052 1053 if (md_get_setstatus(setno) & MD_SET_STALE) 1054 return (mdmddberror(ep, MDE_DB_STALE, mnum, setno)); 1055 1056 ASSERT(un->un_column[un->un_resync_index].un_devflags & 1057 (MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC)); 1058 1059 /* Don't start a resync if the device is not available */ 1060 if ((ui == NULL) || (ui->ui_tstate & MD_DEV_ERRORED)) { 1061 return (mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum)); 1062 } 1063 1064 if (raid_internal_open(mnum, FREAD | FWRITE, OTYP_LYR, 0)) { 1065 (void) md_unit_writerlock(ui); 1066 release_resync_request(mnum); 1067 md_unit_writerexit(ui); 1068 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE, 1069 setno, MD_SID(un)); 1070 return (mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum)); 1071 } 1072 1073 /* start resync_unit thread */ 1074 (void) thread_create(NULL, 0, resync_unit, (void *)(uintptr_t)mnum, 1075 0, &p0, TS_RUN, minclsyspri); 1076 1077 return (0); 1078} 1079