1/* 2 * Copyright (c) 2001-2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * The contents of this file constitute Original Code as defined in and 7 * are subject to the Apple Public Source License Version 1.1 (the 8 * "License"). You may not use this file except in compliance with the 9 * License. Please obtain a copy of the License at 10 * http://www.apple.com/publicsource and read it before using this file. 11 * 12 * This Original Code and all software distributed under the License are 13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER 14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the 17 * License for the specific language governing rights and limitations 18 * under the License. 19 * 20 * @APPLE_LICENSE_HEADER_END@ 21 */ 22 23#include "AppleRAID.h" 24 25#define super AppleRAIDSet 26OSDefineMetaClassAndStructors(AppleRAIDMirrorSet, AppleRAIDSet); 27 28AppleRAIDSet * AppleRAIDMirrorSet::createRAIDSet(AppleRAIDMember * firstMember) 29{ 30 AppleRAIDMirrorSet *raidSet = new AppleRAIDMirrorSet; 31 32 IOLog1("AppleRAIDMirrorSet::createRAIDSet(%p) called, new set = %p *********\n", firstMember, raidSet); 33 34 while (raidSet){ 35 36 if (!raidSet->init()) break; 37 if (!raidSet->initWithHeader(firstMember->getHeader(), true)) break; 38 if (raidSet->resizeSet(raidSet->getMemberCount())) return raidSet; 39 40 break; 41 } 42 43 if (raidSet) raidSet->release(); 44 45 return 0; 46} 47 48bool AppleRAIDMirrorSet::init() 49{ 50 IOLog1("AppleRAIDMirrorSet::init() called\n"); 51 52 if (super::init() == false) return false; 53 54 arRebuildThreadCall = 0; 55 arSetCompleteThreadCall = 0; 56 arExpectingLiveAdd = 0; 57 arMaxReadRequestFactor = 32; // with the default 32KB blocksize -> 1 MB 58 59 queue_init(&arFailedRequestQueue); 60 61 setProperty(kAppleRAIDLevelNameKey, kAppleRAIDLevelNameMirror); 62 63 arAllocateRequestMethod = OSMemberFunctionCast(IOCommandGate::Action, this, &AppleRAIDSet::allocateRAIDRequest); 64 65 return true; 66} 67 68bool AppleRAIDMirrorSet::initWithHeader(OSDictionary * header, bool firstTime) 69{ 70 if (super::initWithHeader(header, firstTime) == false) return false; 71 72 setProperty(kAppleRAIDSetAutoRebuildKey, header->getObject(kAppleRAIDSetAutoRebuildKey)); 73 setProperty(kAppleRAIDSetTimeoutKey, header->getObject(kAppleRAIDSetTimeoutKey)); 74 75 // arQuickRebuildBitSize = 0; //XXX 76 77 // schedule a timeout to start up degraded sets 78 if (firstTime) startSetCompleteTimer(); 79 80 return true; 81} 82 83void AppleRAIDMirrorSet::free(void) 84{ 85 if (arRebuildThreadCall) thread_call_free(arRebuildThreadCall); 86 arRebuildThreadCall = 0; 87 if (arSetCompleteThreadCall) thread_call_free(arSetCompleteThreadCall); 88 arSetCompleteThreadCall = 0; 89 90 if (arLastSeek) IODelete(arLastSeek, UInt64, arLastAllocCount); 91 if (arSkippedIOCount) IODelete(arSkippedIOCount, UInt64, arLastAllocCount); 92 93 assert(queue_empty(&arFailedRequestQueue)); 94 95 super::free(); 96} 97 98//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 99//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 100//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 101 102IOBufferMemoryDescriptor * AppleRAIDMirrorSet::readPrimaryMetaData(AppleRAIDMember * member) 103{ 104 IOBufferMemoryDescriptor * primaryBuffer = super::readPrimaryMetaData(member); 105 106 // XXX 107 108 return primaryBuffer; 109} 110 111IOReturn AppleRAIDMirrorSet::writePrimaryMetaData(IOBufferMemoryDescriptor * primaryBuffer) 112{ 113 114 // XXX 115 116 return super::writePrimaryMetaData(primaryBuffer); 117} 118 119//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 120//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 121//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 122 123bool AppleRAIDMirrorSet::addMember(AppleRAIDMember * member) 124{ 125 if (arExpectingLiveAdd) { 126 // for mirrors the set is not paused for adding while adding new 127 // members, mark it as a spare here to avoid having it marked broken 128 member->changeMemberState(kAppleRAIDMemberStateSpare, true); 129 } 130 131 if (super::addMember(member) == false) return false; 132 133 // set block count = member block count 134 OSNumber * number = OSDynamicCast(OSNumber, member->getHeaderProperty(kAppleRAIDChunkCountKey)); 135 if (!number) return false; 136 arSetBlockCount = number->unsigned64BitValue(); 137 arSetMediaSize = arSetBlockCount * arSetBlockSize; 138 139 if (arOpenLevel == kIOStorageAccessNone) startSetCompleteTimer(); 140 141 return true; 142} 143 144bool AppleRAIDMirrorSet::removeMember(AppleRAIDMember * member, IOOptionBits options) 145{ 146 if (!super::removeMember(member, options)) return false; 147 148 // if the set is not currently in use act like we are still gathering members 149 if (arOpenLevel == kIOStorageAccessNone) { 150 startSetCompleteTimer(); 151 arController->restartSet(this, false); 152 } 153 154 return true; 155} 156 157bool AppleRAIDMirrorSet::resizeSet(UInt32 newMemberCount) 158{ 159 UInt32 oldMemberCount = arMemberCount; 160 161 // if downsizing, just hold on to the extra space 162 if (arLastAllocCount < newMemberCount) { 163 if (arLastSeek) IODelete(arLastSeek, UInt64, arLastAllocCount); 164 arLastSeek = IONew(UInt64, newMemberCount); 165 if (!arLastSeek) return false; 166 167 if (arSkippedIOCount) IODelete(arSkippedIOCount, UInt64, arLastAllocCount); 168 arSkippedIOCount = IONew(UInt64, newMemberCount); 169 if (!arSkippedIOCount) return false; 170 } 171 bzero(arLastSeek, sizeof(UInt64) * newMemberCount); 172 bzero(arSkippedIOCount, sizeof(UInt64) * newMemberCount); 173 174 if (super::resizeSet(newMemberCount) == false) return false; 175 176 if (oldMemberCount && arMemberCount > oldMemberCount) arExpectingLiveAdd += arMemberCount - oldMemberCount; 177 178 return true; 179} 180 181UInt32 AppleRAIDMirrorSet::nextSetState(void) 182{ 183 UInt32 nextState = super::nextSetState(); 184 185 if (nextState == kAppleRAIDSetStateOnline) { 186 if (arActiveCount < arMemberCount) { 187 nextState = kAppleRAIDSetStateDegraded; 188 } 189 } 190 191 return nextState; 192} 193 194OSDictionary * AppleRAIDMirrorSet::getSetProperties(void) 195{ 196 OSDictionary * props = super::getSetProperties(); 197 198 if (props) { 199 props->setObject(kAppleRAIDSetAutoRebuildKey, getProperty(kAppleRAIDSetAutoRebuildKey)); 200 props->setObject(kAppleRAIDSetTimeoutKey, getProperty(kAppleRAIDSetTimeoutKey)); 201// props->setObject(kAppleRAIDSetQuickRebuildKey, kOSBooleanTrue); // XXX 202 } 203 204 return props; 205} 206 207bool AppleRAIDMirrorSet::startSet(void) 208{ 209 IOLog1("AppleRAIDMirrorSet::startSet() - parallel read request max %lld bytes.\n", getSmallestMaxByteCount()); 210 arMaxReadRequestFactor = getSmallestMaxByteCount() / arSetBlockSize; 211 212 if (super::startSet() == false) return false; 213 214 if (getSetState() == kAppleRAIDSetStateDegraded) { 215 216 if (getSpareCount()) rebuildStart(); 217 218 } else { 219 // clear the timeout once the set is complete 220 arSetCompleteTimeout = kARSetCompleteTimeoutNone; 221 } 222 223 return true; 224} 225 226bool AppleRAIDMirrorSet::publishSet(void) 227{ 228 if (arExpectingLiveAdd) { 229 IOLog1("AppleRAIDMirror::publishSet() publish ignored.\n"); 230 return false; 231 } 232 233 return super::publishSet(); 234} 235 236bool AppleRAIDMirrorSet::isSetComplete(void) 237{ 238 if (super::isSetComplete()) return true; 239 240 // if timeout is still active return false 241 if (arSetCompleteTimeout) return false; 242 243 // set specific checks 244 return arActiveCount != 0; 245} 246 247bool AppleRAIDMirrorSet::bumpOnError(void) 248{ 249 return true; 250} 251 252//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 253//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 254//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 255 256void AppleRAIDMirrorSet::activeReadMembers(AppleRAIDMember ** activeMembers, UInt64 byteStart, UInt32 byteCount) 257{ 258 // this code try's to do three things: 259 // 1) send large single sequential i/o requests to each disk (arMaxReadRequestFactor) 260 // 2) send i/o requests to the disk with the smallest seek distance (arLastSeek) 261 // 3) balance the number of i/o requests between the available drives (arSkippedIOCount) 262 // 263 // this code completely ignores the effects of writes on the head position since writes move 264 // the heads on all disks. if the disk is doing track caching then ignoring the writes can 265 // still get us to a disk that may have that data already cached. 266 // 267 // note that arLastSeek is the last previously scheduled head position, the head may not 268 // be anywhere near there yet, hence this code can schedule multiple future i/o requests 269 270#define isOnline(member) ((UInt32)(member) >= 0x1000) 271#define isOffline(member) ((UInt32)(member) < 0x1000) 272 273 UInt64 distances[arMemberCount]; 274 275 for (UInt32 index = 0; index < arMemberCount; index++) { 276 277 AppleRAIDMember * member = arMembers[index]; 278 if (member) { 279 280 UInt32 memberState = member->getMemberState(); 281 if (memberState == kAppleRAIDMemberStateOpen || memberState == kAppleRAIDMemberStateClosing) { 282 283// UInt64 distance = (arLastSeek[index] <= byteStart) ? (byteStart - arLastSeek[index]) : 0xfffffffffffffffeULL; // elevator 284 UInt64 distance = max(arLastSeek[index], byteStart) - min(arLastSeek[index], byteStart); 285// if (arSkippedIOCount[index] >= (arMaxReadRequestFactor / 2)) distance = 0; 286 if (arSkippedIOCount[index] >= 12) distance = 1; 287 288 UInt32 sort = index; 289 while (sort) { 290 291 if (isOnline((uintptr_t)activeMembers[sort-1]) && distance > distances[sort-1]) break; 292 293 activeMembers[sort] = activeMembers[sort-1]; 294 distances[sort] = distances[sort-1]; 295 296 sort--; 297 } 298 activeMembers[sort] = member; 299 distances[sort] = distance; 300 continue; 301 } 302 } 303 activeMembers[index] = (AppleRAIDMember *)index; 304 distances[index] = 0xffffffffffffffffULL; 305 } 306 307 assert((arActiveCount != arMemberCount) ? (isOffline((uintptr_t)activeMembers[arActiveCount])) : (isOnline((uintptr_t)activeMembers[arMemberCount-1]))); 308 309 // adjust last seeked to pointers and skipped counts 310 UInt64 balancedBlockCount = arSetBlockSize * arMaxReadRequestFactor; 311 UInt64 perMemberCount = byteCount / balancedBlockCount / arActiveCount * balancedBlockCount; 312 UInt64 count = 0; 313 314 for (UInt32 virtualIndex = 0; virtualIndex < arActiveCount; virtualIndex++) { 315 316 AppleRAIDMember * member = activeMembers[virtualIndex]; 317 if (isOffline((uintptr_t)member)) break; 318 UInt32 memberIndex = member->getMemberIndex(); 319 320 count = perMemberCount ? min(byteCount, perMemberCount) : min(byteCount, balancedBlockCount); 321 if (count) { 322 byteStart += count; 323 byteCount -= count; 324 arLastSeek[memberIndex] = byteStart; 325 arSkippedIOCount[memberIndex] = 0; 326 } else { 327 arSkippedIOCount[memberIndex]++; 328 } 329 } 330 assert(byteCount == 0); 331 332#ifdef DEBUG2 333 static UInt32 sumCount = 0, skippedSum0 = 0, skippedSum1 = 0, overflowCount = 0; 334 static UInt64 averageSeekSum = 0; 335 336 skippedSum0 += arSkippedIOCount[0]; 337 skippedSum1 += arSkippedIOCount[1]; 338 averageSeekSum += distances[0]; 339 if (perMemberCount) overflowCount++; 340 if (sumCount++ >= 99) { 341 printf("skip0=%ld skip1=%ld, over=%ld, lastseek0=%llx lastseek1=%llx, avseek=%llx\n", 342 skippedSum0, skippedSum1, overflowCount, 343 arLastSeek[0], arLastSeek[1], averageSeekSum/100); 344 sumCount = skippedSum0 = skippedSum1 = overflowCount = 0; 345 averageSeekSum = 0; 346 } 347#endif 348} 349 350//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 351//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 352//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 353 354void AppleRAIDMirrorSet::completeRAIDRequest(AppleRAIDStorageRequest *storageRequest) 355{ 356 UInt32 cnt; 357 UInt64 byteCount; 358 UInt64 expectedByteCount; 359 IOReturn status; 360 bool isWrite; 361 362 isWrite = (storageRequest->srMemoryDescriptorDirection == kIODirectionOut); 363 byteCount = 0; 364 expectedByteCount = isWrite ? storageRequest->srByteCount * storageRequest->srActiveCount : storageRequest->srByteCount; 365 status = kIOReturnSuccess; 366 367 // Collect the status and byte count for each member. 368 for (cnt = 0; cnt < arMemberCount; cnt++) { 369 370 // Ignore missing members. 371 if (arMembers[cnt] == 0) continue; 372 373 // rebuilding members 374 if (arMembers[cnt]->getMemberState() == kAppleRAIDMemberStateRebuilding) { 375 376 if (!isWrite) { 377 assert(storageRequest->srRequestByteCounts[cnt] == 0); 378 continue; 379 } 380 381 if (storageRequest->srRequestStatus[cnt] != kIOReturnSuccess || 382 storageRequest->srRequestByteCounts[cnt] != storageRequest->srByteCount) { 383 384 // This will terminate the rebuild thread 385 arMembers[cnt]->changeMemberState(kAppleRAIDMemberStateBroken); 386 IOLog("AppleRAID::completeRAIDRequest - write error 0x%x detected during rebuild for set \"%s\" (%s) on member %s, set byte offset = %llu.\n", 387 storageRequest->srRequestStatus[cnt], getSetNameString(), getUUIDString(), 388 arMembers[cnt]->getUUIDString(), storageRequest->srByteStart); 389 } 390 continue; 391 } 392 393 // offline members 394 if (arMembers[cnt]->getMemberState() != kAppleRAIDMemberStateOpen) { 395 IOLogRW("AppleRAIDMirrorSet::completeRAIDRequest - [%u] tbc 0x%llx, sbc 0x%llx bc 0x%llx, member %p, member state %u\n", 396 (uint32_t)cnt, storageRequest->srByteCount, storageRequest->srRequestByteCounts[cnt], 397 byteCount, arMembers[cnt], (uint32_t)arMembers[cnt]->getMemberState()); 398 399 status = kIOReturnIOError; 400 401 continue; 402 } 403 404 // failing members 405 if (storageRequest->srRequestStatus[cnt] != kIOReturnSuccess) { 406 IOLog("AppleRAID::completeRAIDRequest - error 0x%x detected for set \"%s\" (%s), member %s, set byte offset = %llu.\n", 407 storageRequest->srRequestStatus[cnt], getSetNameString(), getUUIDString(), 408 arMembers[cnt]->getUUIDString(), storageRequest->srByteStart); 409 410 status = storageRequest->srRequestStatus[cnt]; 411 412 // mark this member to be removed 413 arMembers[cnt]->changeMemberState(kAppleRAIDMemberStateClosing); 414 continue; 415 } 416 417 byteCount += storageRequest->srRequestByteCounts[cnt]; 418 419 IOLogRW("AppleRAIDMirrorSet::completeRAIDRequest - [%u] tbc 0x%llx, sbc 0x%llx bc 0x%llx, member %p\n", 420 (uint32_t)cnt, storageRequest->srByteCount, storageRequest->srRequestByteCounts[cnt], 421 byteCount, arMembers[cnt]); 422 } 423 424 // Return an underrun error if the byte count is not complete. 425 // dkreadwrite should clip any requests beyond our published size 426 // however we still see underruns with pulled disks (bug?) 427 428 if (status == kIOReturnSuccess) { 429 430 if (byteCount != expectedByteCount) { 431 IOLog("AppleRAID::completeRAIDRequest - underrun detected on set = \"%s\" (%s)\n", getSetNameString(), getUUIDString()); 432 IOLog1("AppleRAID::completeRAIDRequest - total expected = 0x%llx (0x%llx), actual = 0x%llx\n", 433 expectedByteCount, storageRequest->srByteCount, byteCount); 434 status = kIOReturnUnderrun; 435 byteCount = 0; 436 437 } else { 438 439 // fix up write byte count 440 byteCount = storageRequest->srByteCount; 441 } 442 443 } else { 444 445 IOLog1("AppleRAID::completeRAIDRequest - error detected\n"); 446 447 UInt32 stillAliveCount = 0; 448 449 for (cnt = 0; cnt < arMemberCount; cnt++) { 450 451 if (arMembers[cnt] == 0) continue; 452 453 if (arMembers[cnt]->getMemberState() == kAppleRAIDMemberStateOpen) { 454 stillAliveCount++; 455 } 456 } 457 458 // if we haven't lost the entire set, retry the failed requests 459 if (stillAliveCount) { 460 461 bool recoveryActive = queue_empty(&arFailedRequestQueue) != true; 462 463 arStorageRequestsPending--; 464 queue_enter(&arFailedRequestQueue, storageRequest, AppleRAIDStorageRequest *, fCommandChain); 465 arSetCommandGate->commandWakeup(&arStorageRequestPool, /* oneThread */ false); 466 467 // kick off the recovery thread if it isn't already active 468 if (!recoveryActive) { 469 recoverStart(); 470 } 471 472 return; 473 474 } else { 475 476 // or let the recovery thread finish off the set 477 recoverStart(); 478 } 479 480 byteCount = 0; 481 } 482 483 storageRequest->srMemoryDescriptor->release(); 484 returnRAIDRequest(storageRequest); 485 486 // Call the clients completion routine, bad status is returned here. 487 IOStorage::complete(&storageRequest->srClientsCompletion, status, byteCount); 488} 489 490void AppleRAIDMirrorSet::getRecoverQueue(queue_head_t *oldRequestQueue, queue_head_t *newRequestQueue) 491{ 492 queue_new_head(oldRequestQueue, newRequestQueue, AppleRAIDStorageRequest *, fCommandChain); 493 queue_init(oldRequestQueue); 494} 495 496bool AppleRAIDMirrorSet::recover() 497{ 498 // this is on a separate thread 499 // the set is paused. 500 501 // move failed i/o queue now in case we lose the set 502 queue_head_t safeFailedRequestQueue; 503 IOCommandGate::Action getRecoverQMethod = OSMemberFunctionCast(IOCommandGate::Action, this, &AppleRAIDMirrorSet::getRecoverQueue); 504 arSetCommandGate->runAction(getRecoverQMethod, &arFailedRequestQueue, &safeFailedRequestQueue); 505 506 // remove the bad members and rebuild the set 507 bool stillHere = super::recover(); 508 509 // the set no longer paused. 510 511 IOLog1("AppleRAIDMirrorSet::recover() entered.\n"); 512 513 // requeue any previously failed i/o's 514 while (!queue_empty(&safeFailedRequestQueue)) { 515 AppleRAIDStorageRequest * oldStorageRequest; 516 queue_remove_first(&safeFailedRequestQueue, oldStorageRequest, AppleRAIDStorageRequest *, fCommandChain); 517 518 IOLog1("AppleRAIDMirrorSet::recover() requeuing request %p\n", oldStorageRequest); 519 520 IOService *client; 521 UInt64 byteStart; 522 IOMemoryDescriptor *buffer; 523 IOStorageCompletion completion; 524 525 oldStorageRequest->extractRequest(&client, &byteStart, &buffer, &completion); 526 oldStorageRequest->release(); 527 528 if (stillHere) { 529 530 AppleRAIDStorageRequest * newStorageRequest; 531 arSetCommandGate->runAction(arAllocateRequestMethod, &newStorageRequest); 532 if (newStorageRequest) { 533 534 // retry failed request 535 if (buffer->getDirection() == kIODirectionOut) { 536 newStorageRequest->write(client, byteStart, buffer, NULL, &completion); 537 } else { 538 newStorageRequest->read(client, byteStart, buffer, NULL, &completion); 539 } 540 541 continue; 542 } 543 } 544 545 // give up, return an error 546 IOStorage::complete(&completion, kIOReturnIOError, 0); 547 } 548 549 IOLog1("AppleRAIDMirrorSet::recover exiting\n"); 550 return true; 551} 552 553 554//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 555//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 556//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 557 558void AppleRAIDMirrorSet::startSetCompleteTimer() 559{ 560 IOLog1("AppleRAIDMirrorSet::startSetCompleteTimer(%p) - timer %s running.\n", 561 this, arSetCompleteTimeout ? "is already" : "was not"); 562 563 // prevent timer from firing with no backing object 564 retain(); 565 566 // once the set is live, arSetCompleteTimeout must stay zero 567 OSNumber * number = OSDynamicCast(OSNumber, getProperty(kAppleRAIDSetTimeoutKey)); 568 if (number) arSetCompleteTimeout = number->unsigned32BitValue(); 569 if (!arSetCompleteTimeout) arSetCompleteTimeout = kARSetCompleteTimeoutDefault; 570 571 // set up the timer (first time only) 572 if (!arSetCompleteThreadCall) { 573 thread_call_func_t setCompleteMethod = OSMemberFunctionCast(thread_call_func_t, this, &AppleRAIDMirrorSet::setCompleteTimeout); 574 arSetCompleteThreadCall = thread_call_allocate(setCompleteMethod, (thread_call_param_t)this); 575 } 576 577 // start timer 578 AbsoluteTime deadline; 579 clock_interval_to_deadline(arSetCompleteTimeout, kSecondScale, &deadline); 580 // an overlapping timer request will cancel the earlier request 581 bool overlap = thread_call_enter_delayed(arSetCompleteThreadCall, deadline); 582 if (overlap) release(); 583} 584 585void AppleRAIDMirrorSet::setCompleteTimeout(void) 586{ 587 IOLog1("AppleRAIDMirrorSet::setCompleteTimeout(%p) - the timeout is %sactive.\n", this, arSetCompleteTimeout ? "":"in"); 588 589 // this code is outside the global lock and the workloop 590 // to simplify handling race conditions with cancelling the timeout 591 // we always let it fire and only release the set here. 592 593 arSetCompleteTimeout = kARSetCompleteTimeoutNone; 594 595 arController->degradeSet(this); 596 release(); 597} 598 599void AppleRAIDMirrorSet::rebuildStart(void) 600{ 601 IOLog1("AppleRAIDMirrorSet::rebuildStart(%p) - entered\n", this); 602 603 // are we already rebuilding a member 604 if (arRebuildingMember) return; 605 606 // sanity checks 607 if (getSpareCount() == 0) return; 608 if (arActiveCount == 0) return; 609 610 // find a missing member that can be replaced 611 UInt32 memberIndex; 612 for (memberIndex = 0; memberIndex < arMemberCount; memberIndex++) { 613 if (arMembers[memberIndex] == 0) { 614 break; 615 } 616 } 617 if (memberIndex >= arMemberCount) return; 618 619 // find a spare that is usable 620 AppleRAIDMember * target = 0; 621 bool autoRebuild = OSDynamicCast(OSBoolean, getProperty(kAppleRAIDSetAutoRebuildKey)) == kOSBooleanTrue; 622 OSCollectionIterator * iter = OSCollectionIterator::withCollection(arSpareMembers); 623 if (!iter) return; 624 625 while (AppleRAIDMember * candidate = (AppleRAIDMember *)iter->getNextObject()) { 626 627 if (candidate->isBroken()) { 628 IOLog1("AppleRAIDMirrorSet::rebuildStart(%p) - skipping candidate %p, it is broken.\n", this, candidate); 629 continue; 630 } 631 632 // live adds have priority over regular spares 633 if (arExpectingLiveAdd) { 634 635 OSNumber * number = OSDynamicCast(OSNumber, candidate->getHeaderProperty(kAppleRAIDMemberIndexKey)); 636 if (!number) continue; 637 UInt32 candidateIndex = number->unsigned32BitValue(); 638 if (arMembers[candidateIndex]) continue; 639 memberIndex = candidateIndex; 640 candidate->changeMemberState(kAppleRAIDMemberStateSpare); 641 arExpectingLiveAdd--; 642 643 } else { 644 645 // if autorebuild is not on, only use current spares 646 if (!autoRebuild) { 647 if (candidate->isSpare()) { 648 OSNumber * number = OSDynamicCast(OSNumber, candidate->getHeaderProperty(kAppleRAIDSequenceNumberKey)); 649 if (!number) continue; 650 UInt32 sequenceNumber = number->unsigned32BitValue(); 651 if (sequenceNumber != getSequenceNumber()) { 652 IOLog1("AppleRAIDMirrorSet::rebuildStart(%p) - skipping candidate %p, expired seq num %d.\n", 653 this, candidate, (int)sequenceNumber); 654 continue; 655 } 656 } else { 657 IOLog1("AppleRAIDMirrorSet::rebuildStart(%p) - skipping candidate %p, autorebuild is off.\n", this, candidate); 658 continue; 659 } 660 } 661 } 662 663 arSpareMembers->removeObject(candidate); // must break, this breaks iter 664 target = candidate; 665 break; 666 } 667 iter->release(); 668 if (!target) return; 669 670 // pull the spare uuid out of the spare uuid list, only for v2 headers 671 OSArray * spareUUIDs = OSDynamicCast(OSArray, getProperty(kAppleRAIDSparesKey)); 672 if (spareUUIDs) spareUUIDs = OSArray::withArray(spareUUIDs); 673 if (spareUUIDs) { 674 UInt32 spareCount = spareUUIDs ? spareUUIDs->getCount() : 0; 675 for (UInt32 i = 0; i < spareCount; i++) { 676 OSString * uuid = OSDynamicCast(OSString, spareUUIDs->getObject(i)); 677 if (uuid && uuid->isEqualTo(target->getUUID())) { 678 spareUUIDs->removeObject(i); 679 } 680 } 681 setProperty(kAppleRAIDSparesKey, spareUUIDs); 682 spareUUIDs->release(); 683 } 684 685 // if this member was part of the set, rebuild it at it's old index 686 OSArray * memberUUIDs = OSDynamicCast(OSArray, getProperty(kAppleRAIDMembersKey)); 687 if (memberUUIDs) memberUUIDs = OSArray::withArray(memberUUIDs); 688 if (memberUUIDs) { 689 UInt32 memberCount = memberUUIDs ? memberUUIDs->getCount() : 0; 690 for (UInt32 i = 0; i < memberCount; i++) { 691 OSString * uuid = OSDynamicCast(OSString, memberUUIDs->getObject(i)); 692 if (uuid && uuid->isEqualTo(target->getUUID())) { 693 if (arMembers[i] == NULL) { 694 memberIndex = i; 695 break; 696 } 697 IOLog("AppleRAIDMirrorSet::rebuildStart() - spare already active at index = %d?\n", (int)memberIndex); 698 assert(0); // this should never happen 699 return; 700 } 701 } 702 } 703 704 target->setMemberIndex(memberIndex); 705 target->setHeaderProperty(kAppleRAIDSequenceNumberKey, getSequenceNumber(), 32); 706 707 IOLog1("AppleRAIDMirrorSet::rebuildStart(%p) - found a target %p for index = %d\n", this, target, (int)memberIndex); 708 709 // let any current i/o's finish before reconfiguring the mirror as writes then are expected to go to the rebuilding member. 710 arSetCommandGate->runAction(OSMemberFunctionCast(IOCommandGate::Action, this, &AppleRAIDMirrorSet::pauseSet), (void *)false); 711 712 arRebuildingMember = target; 713 714 // add member to set at the index we are rebuilding 715 // note that arActiveCount is not bumped 716 if (memberUUIDs) { 717 memberUUIDs->replaceObject(memberIndex, target->getUUID()); 718 setProperty(kAppleRAIDMembersKey, memberUUIDs); 719 memberUUIDs->release(); 720 } 721 arMembers[memberIndex] = target; 722 arMembers[memberIndex]->changeMemberState(kAppleRAIDMemberStateRebuilding); 723 724 arSetCommandGate->runAction(OSMemberFunctionCast(IOCommandGate::Action, this, &AppleRAIDMirrorSet::unpauseSet)); 725 726 if (!arRebuildThreadCall) { 727 thread_call_func_t rebuildMethod = OSMemberFunctionCast(thread_call_func_t, this, &AppleRAIDMirrorSet::rebuild); 728 arRebuildThreadCall = thread_call_allocate(rebuildMethod, (thread_call_param_t)this); 729 } 730 731 // the rebuild runs outside the workloop and global raid lock 732 // if the whole set goes, it has no idea, this keeps the set 733 // from disappearing underneath the rebuild 734 retain(); 735 736 if (arRebuildThreadCall) (void)thread_call_enter(arRebuildThreadCall); 737} 738 739 740// *** this in not inside the workloop *** 741 742void AppleRAIDMirrorSet::rebuild() 743{ 744 IOLog1("AppleRAIDMirrorSet::rebuild(%p) - entered\n", this); 745 746 AppleRAIDMember * target = arRebuildingMember; 747 AppleRAIDMember * source = 0; 748 bool targetOpen = false; 749 bool sourceOpen = false; 750 UInt32 sourceIndex = 0; 751 IOBufferMemoryDescriptor * rebuildBuffer = 0; 752 UInt64 offset = 0; 753 IOReturn rc; 754 755 // the rebuild is officially started 756 messageClients(kAppleRAIDMessageSetChanged); 757 758 // all failures need to call rebuildComplete 759 760 while (true) { 761 762 // XXX this code should be double buffered 763 764 // there is a race between the code that kicks off this thread and this thread. 765 // the other thread is updating the raid headers and if the set is not opened 766 // it closes the members when it is done. since there is no open/close counting 767 // that causes problems in this code by closing the member underneath us. 768 // since the other thread is holding the global lock if we also try to grab the 769 // lock this code will block until the headers are updated. 770 gAppleRAIDGlobals.lock(); 771 // shake your head in disgust 772 gAppleRAIDGlobals.unlock(); 773 774 // allocate copy buffers 775 rebuildBuffer = IOBufferMemoryDescriptor::withCapacity(arSetBlockSize, kIODirectionNone); 776 if (rebuildBuffer == 0) break; 777 778 // Open the target member 779 targetOpen = target->open(this, 0, kIOStorageAccessReaderWriter); 780 if (!targetOpen) break; 781 782 // clear the on disk spare state and reset the sequence number 783 target->setHeaderProperty(kAppleRAIDMemberTypeKey, kAppleRAIDMembersKey); 784 target->setHeaderProperty(kAppleRAIDSequenceNumberKey, 0, 32); 785 target->writeRAIDHeader(); 786 787 offset = arBaseOffset; 788 clock_sec_t oldTime = 0; 789 while (offset < arSetMediaSize) { 790 791 IOLog2("AppleRAIDMirrorSet::rebuild(%p) - offset = %llu bs=%llu\n", this, offset, arSetBlockSize); 792 793 // if the set is idle pause regular i/o 794 IOCommandGate::Action pauseMethod = OSMemberFunctionCast(IOCommandGate::Action, this, &AppleRAIDMirrorSet::pauseSet); 795 while (arSetCommandGate->runAction(pauseMethod, (void *)true) == false) { 796 IOSleep(100); 797 } 798 799 // check if we failed during normal i/o 800 if (target->getMemberState() != kAppleRAIDMemberStateRebuilding) break; 801 802 // find a source drive, also check if it changed 803 // the set is paused here, this should be safe 804 if (!sourceOpen || !arMembers[sourceIndex]) { 805 if (sourceOpen) close(this, 0); 806 sourceOpen = false; 807 for (sourceIndex = 0; sourceIndex < arMemberCount; sourceIndex++) { 808 if (arMembers[sourceIndex] == target) continue; 809 if ((source = arMembers[sourceIndex])) break; 810 } 811 if (!source) break; 812 sourceOpen = open(this, 0, kIOStorageAccessReader); 813 if (!sourceOpen) break; 814 } 815 816 // Fill the read buffer 817 rebuildBuffer->setDirection(kIODirectionIn); 818 rc = source->IOStorage::read((IOService *)this, offset, rebuildBuffer); 819 if (rc) { 820 IOLog("AppleRAIDMirrorSet::rebuild() - read failed with 0x%x on member %s, member byte offset = %llu\n", 821 rc, source->getUUIDString(), offset); 822 break; 823 } 824 825 rebuildBuffer->setDirection(kIODirectionOut); 826 rc = target->IOStorage::write((IOService *)this, offset, rebuildBuffer); 827 if (rc) { 828 // give up 829 IOLog("AppleRAIDMirrorSet::rebuild() - write failed with 0x%x on member %s, member byte offset = %llu\n", 830 rc, target->getUUIDString(), offset); 831 break; 832 } 833 834 arSetCommandGate->runAction(OSMemberFunctionCast(IOCommandGate::Action, this, &AppleRAIDMirrorSet::unpauseSet)); 835 836 // update rebuild status once a second 837 clock_sec_t newTime; 838 clock_usec_t dontcare; 839 clock_get_system_microtime(&newTime, &dontcare); 840 if (newTime != oldTime) { 841 oldTime = newTime; 842 843 OSNumber * bytesCompleted = OSDynamicCast(OSNumber, target->getProperty(kAppleRAIDRebuildStatus)); 844 if (bytesCompleted) { 845 // avoids a race with getMemberProperties 846 bytesCompleted->setValue(offset); 847 } else { 848 bytesCompleted = OSNumber::withNumber(offset, 64); 849 if (bytesCompleted) { 850 target->setProperty(kAppleRAIDRebuildStatus, bytesCompleted); 851 bytesCompleted->release(); 852 } 853 } 854 } 855 856 // keep requests aligned (header != block size) 857 if ((offset % arSetBlockSize) != 0) offset = (offset / arSetBlockSize) * arSetBlockSize; 858 859 offset += arSetBlockSize; 860 } 861 862 break; 863 } 864 865 // rebuilding member state changes: spare -> rebuilding -> rebuilding (open) -> closed -> open or broken 866 867 // clean up 868 if (rebuildBuffer) { 869 rebuildBuffer->release(); 870 rebuildBuffer = 0; 871 } 872 873 if (sourceOpen) close(this, 0); 874 if (targetOpen) target->close(this, 0); 875 876 // if the target state went back to spare that means the member is being removed from the set 877 bool aborting = target->getMemberState() == kAppleRAIDMemberStateSpare; 878 if (aborting) target->changeMemberState(kAppleRAIDMemberStateBroken); 879 880 if (arSetIsPaused) arSetCommandGate->runAction(OSMemberFunctionCast(IOCommandGate::Action, this, &AppleRAIDMirrorSet::unpauseSet)); 881 882 if (aborting) { 883 // calling rebuildComplete hangs on the global lock, just bail out 884 arRebuildingMember = 0; 885 } else { 886 bool success = offset >= arSetMediaSize; 887 IOCommandGate::Action rebuildCompleteMethod = OSMemberFunctionCast(IOCommandGate::Action, this, &AppleRAIDMirrorSet::rebuildComplete); 888 arSetCommandGate->runAction(rebuildCompleteMethod, (void *)success); 889 } 890 891 if (getSpareCount()) { 892 gAppleRAIDGlobals.lock(); 893 rebuildStart(); 894 gAppleRAIDGlobals.unlock(); 895 } 896 897 // just in case the set's status does not need to change 898 messageClients(kAppleRAIDMessageSetChanged); 899 900 release(); 901} 902 903void AppleRAIDMirrorSet::rebuildComplete(bool rebuiltComplete) 904{ 905 AppleRAIDMember * target = arRebuildingMember; 906 UInt32 memberIndex = target->getMemberIndex(); 907 908 // this is running in the workloop 909 // target is closed 910 911 pauseSet(false); 912 913 // clear rebuild progress from target 914 target->removeProperty(kAppleRAIDRebuildStatus); 915 916 // remove from set 917 this->detach(arMembers[memberIndex]); 918 arMembers[memberIndex] = 0; 919 920 gAppleRAIDGlobals.lock(); 921 922 // add member back into the raid set, update raid headers 923 if (rebuiltComplete && upgradeMember(target)) { 924 925 arController->restartSet(this, true); 926 927 IOLog("AppleRAIDMirrorSet::rebuild complete for set \"%s\" (%s).\n", getSetNameString(), getUUIDString()); 928 929 } else { 930 931 IOLog("AppleRAIDMirrorSet::rebuild: copy failed for set \"%s\" (%s).\n", getSetNameString(), getUUIDString()); 932 933 // just leave this member in the set's member uuid list 934 // but mark member as broken 935 target->changeMemberState(kAppleRAIDMemberStateBroken); 936 937 // and toss it back in the spare pile 938 addSpare(target); 939 } 940 941 gAppleRAIDGlobals.unlock(); 942 943 unpauseSet(); 944 945 // kick off next rebuild (if needed) 946 arRebuildingMember = 0; 947} 948 949 950AppleRAIDMemoryDescriptor * AppleRAIDMirrorSet::allocateMemoryDescriptor(AppleRAIDStorageRequest *storageRequest, UInt32 memberIndex) 951{ 952 return AppleRAIDMirrorMemoryDescriptor::withStorageRequest(storageRequest, memberIndex); 953} 954 955 956// AppleRAIDMirrorMemoryDescriptor 957// AppleRAIDMirrorMemoryDescriptor 958// AppleRAIDMirrorMemoryDescriptor 959 960#undef super 961#define super AppleRAIDMemoryDescriptor 962OSDefineMetaClassAndStructors(AppleRAIDMirrorMemoryDescriptor, AppleRAIDMemoryDescriptor); 963 964AppleRAIDMemoryDescriptor * 965AppleRAIDMirrorMemoryDescriptor::withStorageRequest(AppleRAIDStorageRequest *storageRequest, UInt32 memberIndex) 966{ 967 AppleRAIDMemoryDescriptor *memoryDescriptor = new AppleRAIDMirrorMemoryDescriptor; 968 969 if (memoryDescriptor != 0) { 970 if (!memoryDescriptor->initWithStorageRequest(storageRequest, memberIndex)) { 971 memoryDescriptor->release(); 972 memoryDescriptor = 0; 973 } 974 } 975 976 return memoryDescriptor; 977} 978 979bool AppleRAIDMirrorMemoryDescriptor::initWithStorageRequest(AppleRAIDStorageRequest *storageRequest, UInt32 memberIndex) 980{ 981 if (!super::initWithStorageRequest(storageRequest, memberIndex)) return false; 982 983 mdSetBlockSize = storageRequest->srSetBlockSize; 984 985 return true; 986} 987 988bool AppleRAIDMirrorMemoryDescriptor::configureForMemoryDescriptor(IOMemoryDescriptor *memoryDescriptor, UInt64 byteStart, UInt32 activeIndex) 989{ 990 UInt32 byteCount = memoryDescriptor->getLength(); 991 UInt32 blockCount, memberBlockCount; 992 UInt64 setBlockStop, memberBlockStart; 993 UInt32 extraBlocks, setBlockStopOffset; 994 UInt32 startIndex, virtualIndex; 995 UInt32 activeCount = mdStorageRequest->srActiveCount; 996 997 _flags = (_flags & ~kIOMemoryDirectionMask) | memoryDescriptor->getDirection(); 998 999 if (_flags & kIODirectionOut) { 1000 mdMemberByteStart = byteStart; 1001 _length = byteCount; 1002 } else { 1003 mdSetBlockStart = byteStart / mdSetBlockSize; 1004 mdSetBlockOffset = byteStart % mdSetBlockSize; 1005 setBlockStop = (byteStart + byteCount - 1) / mdSetBlockSize; 1006 setBlockStopOffset = (byteStart + byteCount - 1) % mdSetBlockSize; 1007 blockCount = setBlockStop - mdSetBlockStart + 1; 1008 memberBlockCount = blockCount / activeCount; 1009 extraBlocks = blockCount % activeCount; 1010 startIndex = mdSetBlockStart % activeCount; 1011 1012 // per member stuff 1013 1014 // find our index relative to this starting member for this request 1015 virtualIndex = (activeCount + activeIndex - startIndex) % activeCount; 1016 memberBlockStart = mdSetBlockStart + virtualIndex * memberBlockCount + min(virtualIndex, extraBlocks); 1017 if (virtualIndex < extraBlocks) memberBlockCount++; 1018 1019 // find the transfer size for this member 1020 mdMemberByteStart = memberBlockStart * mdSetBlockSize; 1021 _length = memberBlockCount * mdSetBlockSize; 1022 1023 // adjust for the starting inter-block offset 1024 if (virtualIndex == 0) { 1025 mdMemberByteStart += mdSetBlockOffset; // XXX same as byteStart? 1026 _length -= mdSetBlockOffset; 1027 } 1028 1029 // adjust for ending inter-block offset 1030 if (virtualIndex == min(blockCount - 1, activeCount - 1)) _length -= mdSetBlockSize - setBlockStopOffset - 1; 1031 1032 IOLogRW("mirror activeIndex = %u, mdMemberByteStart = %llu _length = 0x%x\n", (uint32_t)activeIndex, mdMemberByteStart, (uint32_t)_length); 1033 } 1034 1035 mdMemoryDescriptor = memoryDescriptor; 1036 1037 return _length != 0; 1038} 1039 1040addr64_t AppleRAIDMirrorMemoryDescriptor::getPhysicalSegment(IOByteCount offset, IOByteCount *length, IOOptionBits options) 1041{ 1042 IOByteCount setOffset = offset; 1043 addr64_t physAddress; 1044 UInt32 memberBlockStart, memberBlockOffset, blockCount; 1045 1046 if (_flags & kIODirectionIn) { 1047 memberBlockStart = (mdMemberByteStart + offset) / mdSetBlockSize; 1048 memberBlockOffset = (mdMemberByteStart + offset) % mdSetBlockSize; 1049 blockCount = memberBlockStart - mdSetBlockStart; 1050 setOffset = blockCount * mdSetBlockSize + memberBlockOffset - mdSetBlockOffset; 1051 } 1052 1053 physAddress = mdMemoryDescriptor->getPhysicalSegment(setOffset, length, options); 1054 1055 return physAddress; 1056} 1057