1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26#pragma ident "%Z%%M% %I% %E% SMI" 27 28/* 29 * NAME: raid_replay.c 30 * 31 * DESCRIPTION: RAID driver source file containing routines related to replay 32 * operation. 33 * 34 * ROUTINES PROVIDED FOR EXTERNAL USE: 35 * raid_replay() - replay all the pre write entries in the unit. 36 */ 37 38#include <sys/param.h> 39#include <sys/systm.h> 40#include <sys/conf.h> 41#include <sys/file.h> 42#include <sys/user.h> 43#include <sys/uio.h> 44#include <sys/t_lock.h> 45#include <sys/buf.h> 46#include <sys/dkio.h> 47#include <sys/vtoc.h> 48#include <sys/kmem.h> 49#include <vm/page.h> 50#include <sys/sysmacros.h> 51#include <sys/types.h> 52#include <sys/mkdev.h> 53#include <sys/stat.h> 54#include <sys/open.h> 55#include <sys/modctl.h> 56#include <sys/ddi.h> 57#include <sys/sunddi.h> 58 59#include <sys/lvm/md_raid.h> 60 61#include <sys/sysevent/eventdefs.h> 62#include <sys/sysevent/svm.h> 63 64/* functions forward declarations */ 65static int raid_replay_error(mr_unit_t *un, int column); 66 67int raid_total_rply_entries = 0; 68 69/* 70 * NAMES: raid_rply_dealloc, raid_rply_alloc 71 * DESCRIPTION: RAID metadevice replay buffer allocation/deallocation routines 72 * PARAMETERS: mr_unit_t *un - pointer to the unit structure 73 * mr_unit_t *un - pointer to the unit structure 74 * RETURNS: 75 */ 76static void 77raid_rply_dealloc(mr_unit_t *un, 78 raid_rplybuf_t **bufs, 79 raid_rplybuf_t *rwbuf1, 80 raid_rplybuf_t *rwbuf2) 81{ 82 int i; 83 raid_rplybuf_t *tmp; 84 85 for (i = 0, tmp = *bufs; i < un->un_totalcolumncnt; i++, tmp++) { 86 if (tmp->rpl_data) { 87 kmem_free(tmp->rpl_data, DEV_BSIZE); 88 tmp->rpl_data = NULL; 89 } 90 if (tmp->rpl_buf) { 91 kmem_free(tmp->rpl_buf, sizeof (buf_t)); 92 tmp->rpl_buf = NULL; 93 } 94 } 95 kmem_free(*bufs, sizeof (raid_rplybuf_t) * un->un_totalcolumncnt); 96 *bufs = NULL; 97 if (rwbuf1->rpl_data) { 98 kmem_free(rwbuf1->rpl_data, dbtob(un->un_iosize)); 99 rwbuf1->rpl_data = NULL; 100 } 101 if (rwbuf1->rpl_buf) { 102 kmem_free((caddr_t)rwbuf1->rpl_buf, sizeof (buf_t)); 103 rwbuf1->rpl_buf = NULL; 104 } 105 if (rwbuf2->rpl_data) { 106 kmem_free(rwbuf2->rpl_data, dbtob(un->un_iosize)); 107 rwbuf2->rpl_data = NULL; 108 } 109 if (rwbuf2->rpl_buf) { 110 kmem_free((caddr_t)rwbuf2->rpl_buf, sizeof (buf_t)); 111 rwbuf2->rpl_buf = NULL; 112 } 113} 114 115static void 116raid_rply_alloc(mr_unit_t *un, 117 raid_rplybuf_t **bufs, 118 raid_rplybuf_t *rwbuf1, 119 raid_rplybuf_t *rwbuf2) 120{ 121 int i; 122 raid_rplybuf_t *tmp; 123 buf_t *bp; 124 125 /* intialization */ 126 *bufs = kmem_zalloc(sizeof (raid_rplybuf_t) * un->un_totalcolumncnt, 127 KM_SLEEP); 128 ASSERT(*bufs != NULL); 129 bzero((caddr_t)rwbuf1, sizeof (raid_rplybuf_t)); 130 bzero((caddr_t)rwbuf2, sizeof (raid_rplybuf_t)); 131 132 /* allocate all the buffers required for the replay processing */ 133 for (i = 0, tmp = *bufs; i < un->un_totalcolumncnt; i++, tmp++) { 134 tmp->rpl_data = kmem_zalloc(DEV_BSIZE, KM_SLEEP); 135 ASSERT(tmp->rpl_data != NULL); 136 tmp->rpl_buf = kmem_zalloc(sizeof (buf_t), KM_SLEEP); 137 ASSERT(tmp->rpl_buf != NULL); 138 bp = (buf_t *)tmp->rpl_buf; 139 bp->b_back = bp; 140 bp->b_forw = bp; 141 bp->b_flags = B_BUSY; 142 bp->b_offset = -1; 143 /* Initialize semaphores */ 144 sema_init(&bp->b_io, 0, NULL, 145 SEMA_DEFAULT, NULL); 146 sema_init(&bp->b_sem, 0, NULL, 147 SEMA_DEFAULT, NULL); 148 } 149 150 rwbuf1->rpl_data = kmem_zalloc(dbtob(un->un_iosize), KM_SLEEP); 151 ASSERT(rwbuf1->rpl_data != NULL); 152 rwbuf1->rpl_buf = kmem_zalloc(sizeof (buf_t), KM_SLEEP); 153 ASSERT(rwbuf1->rpl_buf != NULL); 154 rwbuf2->rpl_data = kmem_zalloc(dbtob(un->un_iosize), KM_SLEEP); 155 ASSERT(rwbuf2->rpl_data != NULL); 156 rwbuf2->rpl_buf = kmem_zalloc(sizeof (buf_t), KM_SLEEP); 157 ASSERT(rwbuf2->rpl_buf != NULL); 158 159 bp = (buf_t *)rwbuf1->rpl_buf; 160 bp->b_back = bp; 161 bp->b_forw = bp; 162 bp->b_flags = B_BUSY; 163 bp->b_offset = -1; 164 /* Initialize semaphores */ 165 sema_init(&bp->b_io, 0, NULL, 166 SEMA_DEFAULT, NULL); 167 sema_init(&bp->b_sem, 0, NULL, 168 SEMA_DEFAULT, NULL); 169 bp = (buf_t *)rwbuf2->rpl_buf; 170 bp->b_back = bp; 171 bp->b_forw = bp; 172 bp->b_flags = B_BUSY; 173 bp->b_offset = -1; 174 /* Initialize semaphores */ 175 sema_init(&bp->b_io, 0, NULL, 176 SEMA_DEFAULT, NULL); 177 sema_init(&bp->b_sem, 0, NULL, 178 SEMA_DEFAULT, NULL); 179} 180 181/* 182 * NAMES: rpl_insert, rpl_delete, rpl_find 183 * DESCRIPTION: RAID metadevice replay list processing APIs 184 * PARAMETERS: raid_rplylst_t *list - pointer to the replay list. 185 * raid_pwhdr_t *pwptr - pointer to a pre-write header. 186 * RETURNS: 187 */ 188static void 189rpl_insert(raid_rplylst_t **listp, raid_rplylst_t *newp) 190{ 191 raid_rplylst_t *tmp, **prevp; 192 193 for (prevp = listp; ((tmp = *prevp) != NULL); prevp = &tmp->rpl_next) { 194 if (tmp->rpl_id > newp->rpl_id) { 195 break; 196 } 197 } 198 newp->rpl_next = tmp; 199 *prevp = newp; 200} 201 202static void 203rpl_delete(raid_rplylst_t **prevp, raid_rplylst_t *oldp) 204{ 205 206 ASSERT((caddr_t)oldp); 207 raid_total_rply_entries --; 208 *prevp = oldp->rpl_next; 209 kmem_free((caddr_t)oldp, sizeof (raid_rplylst_t)); 210} 211 212static raid_rplylst_t * 213rpl_find(raid_rplylst_t *list, long long pw_id) 214{ 215 raid_rplylst_t *tmp; 216 217 for (tmp = list; tmp; tmp = tmp->rpl_next) { 218 if (pw_id == tmp->rpl_id) { 219 return (tmp); 220 } 221 } 222 return ((raid_rplylst_t *)NULL); 223} 224 225/* 226 * NAMES: enq_rplylst 227 * DESCRIPTION: Enqueue a pre-write header into the replay list. 228 * PARAMETERS: raid_rplylst_t *list - pointer to the replay list. 229 * raid_pwhdr_t *pwptr - pointer to a pre-write header. 230 * RETURNS: 231 */ 232static void 233enq_rplylst(raid_rplylst_t **listp, raid_pwhdr_t *pwhp, 234 uint_t slot, int column) 235{ 236 raid_rplylst_t *newp, *oldp; 237 238 /* check if the pre-write existed in the list */ 239 if ((pwhp->rpw_colcount <= 2) && 240 (oldp = rpl_find(*listp, pwhp->rpw_id))) { 241 bcopy((caddr_t)pwhp, (caddr_t)&oldp->rpl_pwhdr2, 242 sizeof (raid_pwhdr_t)); 243 oldp->rpl_slot2 = slot; 244 oldp->rpl_column2 = column; 245 } else { 246 raid_total_rply_entries ++; 247 newp = (raid_rplylst_t *)kmem_zalloc(sizeof (raid_rplylst_t), 248 KM_SLEEP); 249 ASSERT(newp != NULL); 250 bcopy((caddr_t)pwhp, (caddr_t)&newp->rpl_pwhdr1, 251 sizeof (raid_pwhdr_t)); 252 bzero((caddr_t)&newp->rpl_pwhdr2, sizeof (raid_pwhdr_t)); 253 254 newp->rpl_id = pwhp->rpw_id; 255 newp->rpl_column1 = column; 256 newp->rpl_slot1 = slot; 257 newp->rpl_next = (raid_rplylst_t *)NULL; 258 newp->rpl_colcnt = pwhp->rpw_colcount; 259 rpl_insert(listp, newp); 260 } 261} 262 263/* 264 * NAMES: pw_read_done and pw_write_done 265 * DESCRIPTION: don't know the usage yet ??? (TBD) 266 * PARAMETERS: 267 * RETURNS: 268 */ 269static int 270pw_read_done(buf_t *bp) 271{ 272 ASSERT(SEMA_HELD(&bp->b_sem)); 273 ASSERT((bp->b_flags & B_DONE) == 0); 274 275 bp->b_flags |= B_DONE; 276 277 if (bp->b_flags & B_ASYNC) 278 sema_v(&bp->b_sem); 279 else 280 /* wakeup the thread waiting on this buf */ 281 sema_v(&bp->b_io); 282 return (0); 283} 284 285static int 286pw_write_done(buf_t *bp) 287{ 288 ASSERT(SEMA_HELD(&bp->b_sem)); 289 ASSERT((bp->b_flags & B_DONE) == 0); 290 291 bp->b_flags |= B_DONE; 292 293 if (bp->b_flags & B_ASYNC) 294 sema_v(&bp->b_sem); 295 else 296 /* wakeup the thread waiting on this buf */ 297 sema_v(&bp->b_io); 298 299 return (0); 300} 301 302/* 303 * NAMES: raid_pwhdr_read 304 * DESCRIPTION: issue a syncronous read to read a pre-write header 305 * PARAMETERS: mr_unit_t *un - pointer to the unit structure 306 * int pw_slot - pre-write entry slot number 307 * int column - column number for the pre-write entry 308 * raid_rplybuf_t *bufp - pointer to the replay buffer structure 309 * RETURNS: 310 */ 311static void 312raid_pwhdr_read(mr_unit_t *un, int pw_slot, int column, raid_rplybuf_t *bufp) 313{ 314 buf_t *bp; 315 316 /* set up pointers from raid_rplybuf_t *bufp */ 317 bp = (buf_t *)bufp->rpl_buf; 318 319 /* calculate the data address or block number */ 320 bp->b_un.b_addr = bufp->rpl_data; 321 bp->b_lblkno = un->un_column[column].un_pwstart + 322 pw_slot * un->un_iosize; 323 bp->b_edev = md_dev64_to_dev(un->un_column[column].un_dev); 324 bp->b_bufsize = DEV_BSIZE; 325 bp->b_bcount = DEV_BSIZE; 326 bp->b_flags = (B_READ | B_BUSY); 327 bp->b_iodone = pw_read_done; 328 (void) md_call_strategy(bp, 0, NULL); 329} 330 331/* 332 * NAMES: raid_pw_read 333 * DESCRIPTION: issue a syncronous read to read a pre-write entry 334 * PARAMETERS: mr_unit_t *un - pointer to the unit structure 335 * int column - column number for the pre-write entry 336 * u_int slot - pre-write entry slot number 337 * raid_rplybuf_t *bufp - pointer to the replay buffer structure 338 * RETURNS: 339 */ 340static int 341raid_pw_read(mr_unit_t *un, int column, uint_t slot, raid_rplybuf_t *bufp) 342{ 343 buf_t *bp; 344 int error; 345 uint_t blkcnt = un->un_iosize; 346 uint_t bytecnt = blkcnt * DEV_BSIZE; 347 348 /* if this column is no longer accessible, return */ 349 if (!COLUMN_ISUP(un, column)) 350 return (RAID_RPLY_COMPREPLAY); 351 352 /* set up pointers from raid_rplybuf_t *bufp */ 353 bp = (buf_t *)bufp->rpl_buf; 354 355 /* calculate the data address or block number */ 356 bp->b_un.b_addr = bufp->rpl_data; 357 bp->b_bufsize = bytecnt; 358 bp->b_bcount = bytecnt; 359 bp->b_flags = (B_READ | B_BUSY); 360 bp->b_edev = md_dev64_to_dev(un->un_column[column].un_dev); 361 bp->b_lblkno = un->un_column[column].un_pwstart + (slot * blkcnt); 362 bp->b_iodone = pw_read_done; 363 (void) md_call_strategy(bp, 0, NULL); 364 if (biowait(bp)) { 365 error = raid_replay_error(un, column); 366 return (error); 367 } 368 return (0); 369} 370 371/* 372 * NAMES: raid_pw_write 373 * DESCRIPTION: issue a syncronous write to write a pre-write entry 374 * PARAMETERS: mr_unit_t *un - pointer to the unit structure 375 * int column - column number for the pre-write entry 376 * raid_pwhdr_t *pwhp - needed for some infos about the pw header 377 * raid_rplybuf_t *bufp - pointer to the replay buffer structure 378 * RETURNS: 379 */ 380static int 381raid_pw_write(mr_unit_t *un, int column, raid_pwhdr_t *pwhp, 382 raid_rplybuf_t *bufp) 383{ 384 buf_t *bp; 385 int error; 386 387 /* if this column is no longer accessible, return */ 388 if (!COLUMN_ISUP(un, column)) 389 return (RAID_RPLY_COMPREPLAY); 390 391 /* set up pointers from raid_rplybuf_t *bufp */ 392 bp = (buf_t *)bufp->rpl_buf; 393 394 /* calculate the data address or block number */ 395 bp->b_un.b_addr = bufp->rpl_data + DEV_BSIZE; 396 bp->b_bufsize = dbtob(pwhp->rpw_blkcnt); 397 bp->b_bcount = dbtob(pwhp->rpw_blkcnt); 398 bp->b_flags = (B_WRITE | B_BUSY); 399 bp->b_edev = md_dev64_to_dev(un->un_column[column].un_dev); 400 bp->b_lblkno = un->un_column[column].un_devstart + pwhp->rpw_blkno; 401 bp->b_iodone = pw_write_done; 402 (void) md_call_strategy(bp, 0, NULL); 403 if (biowait(bp)) { 404 error = raid_replay_error(un, column); 405 return (error); 406 } 407 return (0); 408} 409 410/* 411 * NAMES: genchecksum 412 * DESCRIPTION: generate check sum for a pre-write entry 413 * PARAMETERS: caddr_t addr - where the data bytes are 414 * int bcount - number of bytes in the pre-write entry 415 * RETURNS: 416 */ 417static uint_t 418genchecksum(caddr_t addr, size_t bcount) 419{ 420 uint_t *dbuf; 421 size_t wordcnt; 422 uint_t dsum = 0; 423 424 wordcnt = bcount / sizeof (uint_t); 425 dbuf = (uint_t *)(void *)(addr); 426 427 while (wordcnt--) { 428 dsum ^= *dbuf; 429 dbuf++; 430 } 431 return (dsum); 432} 433 434/* 435 * NAMES: raid_rply_verify 436 * DESCRIPTION: verify the pre-write entry for replay 437 * PARAMETERS: mr_unit_t *un - pointer to unit structure 438 * int col1 - column number 1 439 * int goodsum1 - flag to indicate good checksum 440 * int *do_1 - flag to indicate whether we should replay 441 * the first pre-write 442 * int col2 - column number 2 443 * int goodsum2 - flag to indicate good checksum 444 * int *do_2 - flag to indicate whether we should replay 445 * the first pre-write 446 * RETURNS: 447 */ 448static void 449raid_rply_verify(mr_unit_t *un, int col1, int goodsum1, int *do_1, 450 int col2, int goodsum2, int *do_2) 451{ 452 int good_state1 = 0; 453 int good_state2 = 0; 454 455 *do_1 = 0; *do_2 = 0; /* prepare for the worst */ 456 if (COLUMN_ISUP(un, col1)) { 457 good_state1 = 1; 458 } 459 if (COLUMN_ISUP(un, col2)) { 460 good_state2 = 1; 461 } 462 if ((good_state1 & good_state2) && (goodsum1 & goodsum2)) { 463 /* if both columns check out, do it */ 464 *do_1 = 1; *do_2 = 1; 465 } else if ((good_state1 & goodsum1) && !good_state2) { 466 /* if one column is okay and the other is errored, do it */ 467 *do_1 = 1; *do_2 = 0; 468 } else if ((good_state2 & goodsum2) && !good_state1) { 469 /* if one column is okay and the other is errored, do it */ 470 *do_2 = 1; *do_1 = 0; 471 } 472} 473 474/* 475 * NAMES: raid_rplyeach 476 * DESCRIPTION: issue a syncronous read to read a pre-write header 477 * PARAMETERS: mr_unit_t *un - pointer to the unit structure 478 * raid_rplylst_t *eachp - pointer to the replay list entry 479 * raid_rplybuf_t *rwbuf1 - pointer to the replay buffer structure 480 * raid_rplybuf_t *rwbuf2 - pointer to the replay buffer structure 481 * RETURNS: 482 */ 483static int 484raid_rplyeach( 485 mr_unit_t *un, 486 raid_rplylst_t *eachp, 487 raid_rplybuf_t *rwbuf1, 488 raid_rplybuf_t *rwbuf2 489) 490{ 491 raid_pwhdr_t *pwhp1; 492 raid_pwhdr_t *pwhp2; 493 uint_t dsum1 = 0; 494 uint_t dsum2 = 0; 495 int good_pw1 = 0; 496 int good_pw2 = 0; 497 int do_1 = 0; 498 int do_2 = 0; 499 int error = 0; 500 501 /* First verify the normal case - two pre-write entries are all good */ 502 if ((eachp->rpl_pwhdr1.rpw_magic == RAID_PWMAGIC && 503 eachp->rpl_pwhdr2.rpw_magic == RAID_PWMAGIC) && 504 (eachp->rpl_pwhdr1.rpw_blkcnt == eachp->rpl_pwhdr2.rpw_blkcnt)) { 505 506 ASSERT(eachp->rpl_pwhdr1.rpw_id == eachp->rpl_pwhdr2.rpw_id); 507 508 /* read the pre-write entries */ 509 error = raid_pw_read(un, eachp->rpl_column1, 510 eachp->rpl_slot1, rwbuf1); 511 pwhp1 = &eachp->rpl_pwhdr1; 512 if (error) { 513 if (error != RAID_RPLY_COMPREPLAY) 514 return (error); 515 good_pw1 = FALSE; 516 } else { 517 /* generate checksum for each pre-write entry */ 518 dsum1 = genchecksum(rwbuf1->rpl_data + DEV_BSIZE, 519 dbtob(pwhp1->rpw_blkcnt)); 520 good_pw1 = (dsum1 == pwhp1->rpw_sum); 521 } 522 523 error = raid_pw_read(un, eachp->rpl_column2, eachp->rpl_slot2, 524 rwbuf2); 525 pwhp2 = &eachp->rpl_pwhdr2; 526 if (error) { 527 if (error != RAID_RPLY_COMPREPLAY) 528 return (error); 529 good_pw2 = FALSE; 530 } else { 531 /* generate checksum for pre-write entry */ 532 dsum2 = genchecksum(rwbuf2->rpl_data + DEV_BSIZE, 533 dbtob(pwhp2->rpw_blkcnt)); 534 good_pw2 = (dsum2 == pwhp2->rpw_sum); 535 } 536 537 /* verify the checksums and states */ 538 raid_rply_verify(un, eachp->rpl_column1, good_pw1, &do_1, 539 eachp->rpl_column2, good_pw2, &do_2); 540 541 /* write (replay) the pre-write entries */ 542 if (do_1) { 543 error = raid_pw_write(un, eachp->rpl_column1, 544 &eachp->rpl_pwhdr1, rwbuf1); 545 if (error && (error != RAID_RPLY_COMPREPLAY)) { 546 return (error); 547 } 548 } 549 if (do_2) { 550 error = raid_pw_write(un, eachp->rpl_column2, 551 &eachp->rpl_pwhdr2, rwbuf2); 552 if (error && (error != RAID_RPLY_COMPREPLAY)) { 553 return (error); 554 } 555 } 556 return (0); 557 } 558 if (eachp->rpl_pwhdr1.rpw_magic == RAID_PWMAGIC) { 559 /* 560 * if partner was errored at time of write 561 * or due to open or replay, replay this entry 562 */ 563 if ((eachp->rpl_pwhdr1.rpw_columnnum == -1) || 564 (! COLUMN_ISUP(un, eachp->rpl_pwhdr1.rpw_columnnum))) { 565 /* read the pre-write entry */ 566 error = raid_pw_read(un, eachp->rpl_column1, 567 eachp->rpl_slot1, rwbuf1); 568 if (error) 569 return (error); 570 /* generate checksum for the pre-write entry */ 571 pwhp1 = &eachp->rpl_pwhdr1; 572 dsum1 = genchecksum(rwbuf1->rpl_data + DEV_BSIZE, 573 dbtob(pwhp1->rpw_blkcnt)); 574 if (dsum1 == pwhp1->rpw_sum) { 575 error = raid_pw_write(un, eachp->rpl_column1, 576 &eachp->rpl_pwhdr1, rwbuf1); 577 if (error && (error != RAID_RPLY_COMPREPLAY)) { 578 return (error); 579 } 580 } 581 } 582 return (0); 583 } 584 585 return (0); 586} 587 588static int 589replay_line(mr_unit_t *un, raid_rplylst_t *eachp, raid_rplybuf_t *rplybuf) 590{ 591 raid_pwhdr_t *pwhdr1, *pwhdr2; 592 raid_rplylst_t *eachpn; 593 int i; 594 int cnt; 595 diskaddr_t blkno; 596 uint_t blkcnt; 597 long long id; 598 int dsum; 599 int error; 600 int colcnt, col, col2; 601 int down; 602 603 if (eachp->rpl_id == 0) 604 return (0); 605 /* 606 * check: 1 - enough equal ids 607 * 2 - all have same columncnt 608 * 3 - all have same blkno 609 * 4 - all have same blkcnt 610 * 611 * read each and check the checksum 612 * write each 613 */ 614 615 cnt = eachp->rpl_colcnt; 616 id = eachp->rpl_id; 617 pwhdr1 = &eachp->rpl_pwhdr1; 618 blkno = pwhdr1->rpw_blkno; 619 blkcnt = pwhdr1->rpw_blkcnt; 620 621 error = raid_pw_read(un, eachp->rpl_column1, eachp->rpl_slot1, rplybuf); 622 dsum = genchecksum(rplybuf->rpl_data + DEV_BSIZE, 623 dbtob(pwhdr1->rpw_blkcnt)); 624 625 if (dsum != pwhdr1->rpw_sum) 626 return (0); 627 628 if (error) { 629 if (error == RAID_RPLY_COMPREPLAY) 630 return (0); 631 else 632 return (1); 633 } 634 635 eachpn = eachp->rpl_next; 636 for (i = 1; i < cnt; i++) { 637 if (eachpn == NULL) 638 break; 639 col2 = eachpn->rpl_column1; 640 ASSERT(col2 < un->un_totalcolumncnt); 641 pwhdr2 = &eachpn->rpl_pwhdr1; 642 if ((pwhdr2->rpw_blkno != blkno) || 643 (pwhdr2->rpw_blkcnt != blkcnt) || 644 (eachpn->rpl_id != id) || 645 (pwhdr2->rpw_colcount != cnt)) { 646 return (0); 647 } 648 649 error = raid_pw_read(un, col2, eachpn->rpl_slot1, rplybuf); 650 dsum = genchecksum(rplybuf->rpl_data + DEV_BSIZE, 651 dbtob(pwhdr2->rpw_blkcnt)); 652 if (dsum != pwhdr2->rpw_sum) 653 return (0); 654 eachpn = eachpn->rpl_next; 655 } 656 colcnt = i; 657 658 if (error) 659 return (0); 660 661 down = raid_state_cnt(un, RCS_ERRED); 662 if ((i != un->un_totalcolumncnt) && 663 (i != (un->un_totalcolumncnt - down))) 664 return (0); 665 666 /* there ara enough columns to write correctly */ 667 eachpn = eachp; 668 for (i = 0; i < colcnt; i++) { 669 col = eachpn->rpl_column1; 670 error = raid_pw_read(un, col, eachpn->rpl_slot1, rplybuf); 671 error = raid_pw_write(un, col, &eachpn->rpl_pwhdr1, rplybuf); 672 eachpn->rpl_id = 0; 673 if (error && (error != RAID_RPLY_COMPREPLAY)) 674 return (1); 675 eachpn = eachpn->rpl_next; 676 } 677 return (0); 678} 679 680/* 681 * NAMES: raid_replay_error 682 * DESCRIPTION: RAID metadevice replay error handling routine (TBD) 683 * PARAMETERS: 684 * RETURNS: 685 */ 686static int 687raid_replay_error(mr_unit_t *un, int column) 688{ 689 int error = RAID_RPLY_COMPREPLAY; 690 691 raid_set_state(un, column, RCS_ERRED, 0); 692 raid_commit(un, NULL); 693 694 if (UNIT_STATE(un) == RUS_LAST_ERRED) { 695 error = RAID_RPLY_READONLY; 696 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, SVM_TAG_METADEVICE, 697 MD_UN2SET(un), MD_SID(un)); 698 } else if (UNIT_STATE(un) == RUS_ERRED) { 699 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_METADEVICE, 700 MD_UN2SET(un), MD_SID(un)); 701 } 702 703 return (error); 704} 705 706/* 707 * NAMES: raid_replay 708 * DESCRIPTION: RAID metadevice main replay processing routine 709 * PARAMETERS: mr_unit_t *un - pointer to an unit structure 710 * RETURNS: 711 */ 712 713int 714raid_replay(mr_unit_t *un) 715{ 716 raid_rplylst_t *rplylst = NULL; 717 raid_rplylst_t **prevp, *eachp; 718 raid_rplybuf_t *rplybuf; 719 raid_rplybuf_t rwbuf1; 720 raid_rplybuf_t rwbuf2; 721 mr_column_t *colptr; 722 raid_pwhdr_t pwhdr; 723 raid_pwhdr_t *pwhdrp = &pwhdr; 724 int error = 0; 725 int i, j; 726 diskaddr_t max_blkno = un->un_segsize * un->un_segsincolumn; 727 int totalcolumns = un->un_totalcolumncnt; 728 729 raid_rply_alloc(un, &rplybuf, &rwbuf1, &rwbuf2); 730 731 /* build a replay list based on the order of pre-write id */ 732 for (i = 0; i < un->un_pwcnt; i++) { 733 /* issue a synchronous read for each column */ 734 for (j = 0; j < un->un_totalcolumncnt; j++) { 735 if (COLUMN_ISUP(un, j)) { 736 raid_pwhdr_read(un, i, j, &rplybuf[j]); 737 /* wait for I/O completion for each column */ 738 if (biowait((buf_t *)rplybuf[j].rpl_buf)) { 739 /* potential state transition */ 740 error = raid_replay_error(un, j); 741 if (error == RAID_RPLY_COMPREPLAY) 742 continue; 743 else 744 goto replay_failed; 745 } 746 if (un->c.un_revision & MD_64BIT_META_DEV) { 747 pwhdrp = (raid_pwhdr_t *) 748 rplybuf[j].rpl_data; 749 } else { 750 RAID_CONVERT_RPW((raid_pwhdr32_od_t *) 751 rplybuf[j].rpl_data, 752 pwhdrp); 753 } 754 755 /* first check pre-write magic number */ 756 if (pwhdrp->rpw_magic != RAID_PWMAGIC) { 757 continue; 758 } 759 if (pwhdrp->rpw_column != j) { 760 continue; 761 } 762 if (pwhdrp->rpw_id == (long long) 0) { 763 continue; 764 } 765 if (pwhdrp->rpw_blkcnt > (un->un_iosize - 1)) { 766 continue; 767 } 768 if (pwhdrp->rpw_blkcnt == 0) { 769 continue; 770 } 771 if (pwhdrp->rpw_blkno > max_blkno) { 772 continue; 773 } 774 if ((pwhdrp->rpw_columnnum < 0) || 775 (pwhdrp->rpw_columnnum > totalcolumns)) { 776 continue; 777 } 778 if (((pwhdrp->rpw_colcount != 1) && 779 (pwhdrp->rpw_colcount != 2) && 780 (pwhdrp->rpw_colcount != totalcolumns))) { 781 continue; 782 } 783 784 enq_rplylst(&rplylst, pwhdrp, i, j); 785 } 786 } 787 } 788 789 /* replay each entry in the replay list */ 790 prevp = &rplylst; 791 while ((eachp = *prevp) != NULL) { 792 /* zero out the pre-write headers in the buffer */ 793 bzero((caddr_t)rwbuf1.rpl_data, sizeof (raid_pwhdr_t)); 794 bzero((caddr_t)rwbuf2.rpl_data, sizeof (raid_pwhdr_t)); 795 796 if (eachp->rpl_colcnt <= 2) 797 error = raid_rplyeach(un, eachp, &rwbuf1, &rwbuf2); 798 else 799 error = replay_line(un, eachp, &rwbuf1); 800 801 if (error && (error != RAID_RPLY_COMPREPLAY)) { 802 goto replay_failed; 803 } 804 805 /* free the processed replay list entry */ 806 rpl_delete(prevp, eachp); 807 prevp = &rplylst; 808 } 809 810 /* zero out all pre-write entries in this unit */ 811 for (j = 0; j < un->un_totalcolumncnt; j++) { 812 if (COLUMN_ISUP(un, j)) { 813 colptr = &un->un_column[j]; 814 if (init_pw_area(un, colptr->un_dev, 815 colptr->un_pwstart, j)) 816 break; 817 } 818 } 819 820 /* deallocate all the buffer resource allocated in this routine */ 821 raid_rply_dealloc(un, &rplybuf, &rwbuf1, &rwbuf2); 822 823 return (RAID_RPLY_SUCCESS); 824 825replay_failed: 826 827 /* first release the list */ 828 prevp = &rplylst; 829 while ((eachp = *prevp) != NULL) { 830 rpl_delete(prevp, eachp); 831 prevp = &rplylst; 832 } 833 834 /* then release buffers */ 835 raid_rply_dealloc(un, &rplybuf, &rwbuf1, &rwbuf2); 836 837 /* also reset the pre-write id variable to one */ 838 un->un_pwid = 1; 839 raid_total_rply_entries = 0; 840 841 return (error); 842} 843