1/*
2 * Copyright (c) 2001-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License").  You may not use this file except in compliance with the
9 * License.  Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22
23#include "AppleRAID.h"
24
25#define super AppleRAIDSet
26OSDefineMetaClassAndStructors(AppleRAIDMirrorSet, AppleRAIDSet);
27
28AppleRAIDSet * AppleRAIDMirrorSet::createRAIDSet(AppleRAIDMember * firstMember)
29{
30    AppleRAIDMirrorSet *raidSet = new AppleRAIDMirrorSet;
31
32    IOLog1("AppleRAIDMirrorSet::createRAIDSet(%p) called, new set = %p  *********\n", firstMember, raidSet);
33
34    while (raidSet){
35
36	if (!raidSet->init()) break;
37	if (!raidSet->initWithHeader(firstMember->getHeader(), true)) break;
38	if (raidSet->resizeSet(raidSet->getMemberCount())) return raidSet;
39
40	break;
41    }
42
43    if (raidSet) raidSet->release();
44
45    return 0;
46}
47
48bool AppleRAIDMirrorSet::init()
49{
50    IOLog1("AppleRAIDMirrorSet::init() called\n");
51
52    if (super::init() == false) return false;
53
54    arRebuildThreadCall = 0;
55    arSetCompleteThreadCall = 0;
56    arExpectingLiveAdd = 0;
57    arMaxReadRequestFactor = 32;	// with the default 32KB blocksize -> 1 MB
58
59    queue_init(&arFailedRequestQueue);
60
61    setProperty(kAppleRAIDLevelNameKey, kAppleRAIDLevelNameMirror);
62
63    arAllocateRequestMethod = OSMemberFunctionCast(IOCommandGate::Action, this, &AppleRAIDSet::allocateRAIDRequest);
64
65    return true;
66}
67
68bool AppleRAIDMirrorSet::initWithHeader(OSDictionary * header, bool firstTime)
69{
70    if (super::initWithHeader(header, firstTime) == false) return false;
71
72    setProperty(kAppleRAIDSetAutoRebuildKey, header->getObject(kAppleRAIDSetAutoRebuildKey));
73    setProperty(kAppleRAIDSetTimeoutKey, header->getObject(kAppleRAIDSetTimeoutKey));
74
75    //	arQuickRebuildBitSize = 0;  //XXX
76
77    // schedule a timeout to start up degraded sets
78    if (firstTime) startSetCompleteTimer();
79
80    return true;
81}
82
83void AppleRAIDMirrorSet::free(void)
84{
85    if (arRebuildThreadCall) thread_call_free(arRebuildThreadCall);
86    arRebuildThreadCall = 0;
87    if (arSetCompleteThreadCall) thread_call_free(arSetCompleteThreadCall);
88    arSetCompleteThreadCall = 0;
89
90    if (arLastSeek) IODelete(arLastSeek, UInt64, arLastAllocCount);
91    if (arSkippedIOCount) IODelete(arSkippedIOCount, UInt64, arLastAllocCount);
92
93    assert(queue_empty(&arFailedRequestQueue));
94
95    super::free();
96}
97
98//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
99//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
100//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
101
102IOBufferMemoryDescriptor * AppleRAIDMirrorSet::readPrimaryMetaData(AppleRAIDMember * member)
103{
104    IOBufferMemoryDescriptor * primaryBuffer = super::readPrimaryMetaData(member);
105
106    // XXX
107
108    return primaryBuffer;
109}
110
111IOReturn AppleRAIDMirrorSet::writePrimaryMetaData(IOBufferMemoryDescriptor * primaryBuffer)
112{
113
114    // XXX
115
116    return super::writePrimaryMetaData(primaryBuffer);
117}
118
119//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
120//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
121//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
122
123bool AppleRAIDMirrorSet::addMember(AppleRAIDMember * member)
124{
125    if (arExpectingLiveAdd) {
126	// for mirrors the set is not paused for adding while adding new
127	// members, mark it as a spare here to avoid having it marked broken
128	member->changeMemberState(kAppleRAIDMemberStateSpare, true);
129    }
130
131    if (super::addMember(member) == false) return false;
132
133    // set block count = member block count
134    OSNumber * number = OSDynamicCast(OSNumber, member->getHeaderProperty(kAppleRAIDChunkCountKey));
135    if (!number) return false;
136    arSetBlockCount = number->unsigned64BitValue();
137    arSetMediaSize = arSetBlockCount * arSetBlockSize;
138
139    if (arOpenLevel == kIOStorageAccessNone) startSetCompleteTimer();
140
141    return true;
142}
143
144bool AppleRAIDMirrorSet::removeMember(AppleRAIDMember * member, IOOptionBits options)
145{
146    if (!super::removeMember(member, options)) return false;
147
148    // if the set is not currently in use act like we are still gathering members
149    if (arOpenLevel == kIOStorageAccessNone) {
150	startSetCompleteTimer();
151	arController->restartSet(this, false);
152    }
153
154    return true;
155}
156
157bool AppleRAIDMirrorSet::resizeSet(UInt32 newMemberCount)
158{
159    UInt32 oldMemberCount = arMemberCount;
160
161    // if downsizing, just hold on to the extra space
162    if (arLastAllocCount < newMemberCount) {
163	if (arLastSeek) IODelete(arLastSeek, UInt64, arLastAllocCount);
164	arLastSeek = IONew(UInt64, newMemberCount);
165	if (!arLastSeek) return false;
166
167	if (arSkippedIOCount) IODelete(arSkippedIOCount, UInt64, arLastAllocCount);
168	arSkippedIOCount = IONew(UInt64, newMemberCount);
169	if (!arSkippedIOCount) return false;
170    }
171    bzero(arLastSeek, sizeof(UInt64) * newMemberCount);
172    bzero(arSkippedIOCount, sizeof(UInt64) * newMemberCount);
173
174    if (super::resizeSet(newMemberCount) == false) return false;
175
176    if (oldMemberCount && arMemberCount > oldMemberCount) arExpectingLiveAdd += arMemberCount - oldMemberCount;
177
178    return true;
179}
180
181UInt32 AppleRAIDMirrorSet::nextSetState(void)
182{
183    UInt32 nextState = super::nextSetState();
184
185    if (nextState == kAppleRAIDSetStateOnline) {
186	if (arActiveCount < arMemberCount) {
187	    nextState = kAppleRAIDSetStateDegraded;
188	}
189    }
190
191    return nextState;
192}
193
194OSDictionary * AppleRAIDMirrorSet::getSetProperties(void)
195{
196    OSDictionary * props = super::getSetProperties();
197
198    if (props) {
199	props->setObject(kAppleRAIDSetAutoRebuildKey, getProperty(kAppleRAIDSetAutoRebuildKey));
200	props->setObject(kAppleRAIDSetTimeoutKey, getProperty(kAppleRAIDSetTimeoutKey));
201//	props->setObject(kAppleRAIDSetQuickRebuildKey, kOSBooleanTrue);  // XXX
202    }
203
204    return props;
205}
206
207bool AppleRAIDMirrorSet::startSet(void)
208{
209    IOLog1("AppleRAIDMirrorSet::startSet() - parallel read request max %lld bytes.\n", getSmallestMaxByteCount());
210    arMaxReadRequestFactor = getSmallestMaxByteCount() / arSetBlockSize;
211
212    if (super::startSet() == false) return false;
213
214    if (getSetState() == kAppleRAIDSetStateDegraded) {
215
216	if (getSpareCount()) rebuildStart();
217
218    } else {
219	// clear the timeout once the set is complete
220	arSetCompleteTimeout = kARSetCompleteTimeoutNone;
221    }
222
223    return true;
224}
225
226bool AppleRAIDMirrorSet::publishSet(void)
227{
228    if (arExpectingLiveAdd) {
229	IOLog1("AppleRAIDMirror::publishSet() publish ignored.\n");
230	return false;
231    }
232
233    return super::publishSet();
234}
235
236bool AppleRAIDMirrorSet::isSetComplete(void)
237{
238    if (super::isSetComplete()) return true;
239
240    // if timeout is still active return false
241    if (arSetCompleteTimeout) return false;
242
243    // set specific checks
244    return arActiveCount != 0;
245}
246
247bool AppleRAIDMirrorSet::bumpOnError(void)
248{
249    return true;
250}
251
252//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
253//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
254//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
255
256void AppleRAIDMirrorSet::activeReadMembers(AppleRAIDMember ** activeMembers, UInt64 byteStart, UInt32 byteCount)
257{
258    // this code try's to do three things:
259    //   1) send large single sequential i/o requests to each disk		(arMaxReadRequestFactor)
260    //   2) send i/o requests to the disk with the smallest seek distance	(arLastSeek)
261    //   3) balance the number of i/o requests between the available drives	(arSkippedIOCount)
262    //
263    // this code completely ignores the effects of writes on the head position since writes move
264    // the heads on all disks.  if the disk is doing track caching then ignoring the writes can
265    // still get us to a disk that may have that data already cached.
266    //
267    // note that arLastSeek is the last previously scheduled head position, the head may not
268    // be anywhere near there yet, hence this code can schedule multiple future i/o requests
269
270#define isOnline(member) ((UInt32)(member) >= 0x1000)
271#define isOffline(member) ((UInt32)(member) < 0x1000)
272
273    UInt64 distances[arMemberCount];
274
275    for (UInt32 index = 0; index < arMemberCount; index++) {
276
277	AppleRAIDMember * member = arMembers[index];
278	if (member) {
279
280	    UInt32 memberState = member->getMemberState();
281	    if (memberState == kAppleRAIDMemberStateOpen || memberState == kAppleRAIDMemberStateClosing) {
282
283//		UInt64 distance = (arLastSeek[index] <= byteStart) ? (byteStart - arLastSeek[index]) : 0xfffffffffffffffeULL;  // elevator
284		UInt64 distance = max(arLastSeek[index], byteStart) - min(arLastSeek[index], byteStart);
285//		if (arSkippedIOCount[index] >= (arMaxReadRequestFactor / 2)) distance = 0;
286		if (arSkippedIOCount[index] >= 12) distance = 1;
287
288		UInt32 sort = index;
289		while (sort) {
290
291		    if (isOnline((uintptr_t)activeMembers[sort-1]) && distance > distances[sort-1]) break;
292
293		    activeMembers[sort] = activeMembers[sort-1];
294		    distances[sort] = distances[sort-1];
295
296		    sort--;
297		}
298		activeMembers[sort] = member;
299		distances[sort] = distance;
300		continue;
301	    }
302	}
303	activeMembers[index] = (AppleRAIDMember *)index;
304	distances[index] = 0xffffffffffffffffULL;
305    }
306
307    assert((arActiveCount != arMemberCount) ? (isOffline((uintptr_t)activeMembers[arActiveCount])) : (isOnline((uintptr_t)activeMembers[arMemberCount-1])));
308
309    // adjust last seeked to pointers and skipped counts
310    UInt64 balancedBlockCount = arSetBlockSize * arMaxReadRequestFactor;
311    UInt64 perMemberCount = byteCount / balancedBlockCount / arActiveCount * balancedBlockCount;
312    UInt64 count = 0;
313
314    for (UInt32 virtualIndex = 0; virtualIndex < arActiveCount; virtualIndex++) {
315
316	AppleRAIDMember * member = activeMembers[virtualIndex];
317	if (isOffline((uintptr_t)member)) break;
318	UInt32 memberIndex = member->getMemberIndex();
319
320	count = perMemberCount ? min(byteCount, perMemberCount) : min(byteCount, balancedBlockCount);
321	if (count) {
322	    byteStart += count;
323	    byteCount -= count;
324	    arLastSeek[memberIndex] = byteStart;
325	    arSkippedIOCount[memberIndex] = 0;
326	} else {
327	    arSkippedIOCount[memberIndex]++;
328	}
329    }
330    assert(byteCount == 0);
331
332#ifdef DEBUG2
333    static UInt32 sumCount = 0, skippedSum0 = 0, skippedSum1 = 0, overflowCount = 0;
334    static UInt64 averageSeekSum = 0;
335
336    skippedSum0 += arSkippedIOCount[0];
337    skippedSum1 += arSkippedIOCount[1];
338    averageSeekSum += distances[0];
339    if (perMemberCount) overflowCount++;
340    if (sumCount++ >= 99) {
341	printf("skip0=%ld skip1=%ld, over=%ld, lastseek0=%llx lastseek1=%llx, avseek=%llx\n",
342	       skippedSum0, skippedSum1, overflowCount,
343	       arLastSeek[0], arLastSeek[1], averageSeekSum/100);
344	sumCount = skippedSum0 = skippedSum1 = overflowCount = 0;
345	averageSeekSum = 0;
346    }
347#endif
348}
349
350//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
351//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
352//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
353
354void AppleRAIDMirrorSet::completeRAIDRequest(AppleRAIDStorageRequest *storageRequest)
355{
356    UInt32		cnt;
357    UInt64              byteCount;
358    UInt64              expectedByteCount;
359    IOReturn            status;
360    bool		isWrite;
361
362    isWrite = (storageRequest->srMemoryDescriptorDirection == kIODirectionOut);
363    byteCount = 0;
364    expectedByteCount = isWrite ? storageRequest->srByteCount * storageRequest->srActiveCount : storageRequest->srByteCount;
365    status = kIOReturnSuccess;
366
367    // Collect the status and byte count for each member.
368    for (cnt = 0; cnt < arMemberCount; cnt++) {
369
370	// Ignore missing members.
371	if (arMembers[cnt] == 0) continue;
372
373	// rebuilding members
374	if (arMembers[cnt]->getMemberState() == kAppleRAIDMemberStateRebuilding) {
375
376	    if (!isWrite) {
377		assert(storageRequest->srRequestByteCounts[cnt] == 0);
378		continue;
379	    }
380
381	    if (storageRequest->srRequestStatus[cnt] != kIOReturnSuccess ||
382		storageRequest->srRequestByteCounts[cnt] != storageRequest->srByteCount) {
383
384		// This will terminate the rebuild thread
385		arMembers[cnt]->changeMemberState(kAppleRAIDMemberStateBroken);
386		IOLog("AppleRAID::completeRAIDRequest - write error 0x%x detected during rebuild for set \"%s\" (%s) on member %s, set byte offset = %llu.\n",
387		      storageRequest->srRequestStatus[cnt], getSetNameString(), getUUIDString(),
388		      arMembers[cnt]->getUUIDString(), storageRequest->srByteStart);
389	    }
390	    continue;
391	}
392
393	// offline members
394	if (arMembers[cnt]->getMemberState() != kAppleRAIDMemberStateOpen) {
395	    IOLogRW("AppleRAIDMirrorSet::completeRAIDRequest - [%u] tbc 0x%llx, sbc 0x%llx bc 0x%llx, member %p, member state %u\n",
396		    (uint32_t)cnt, storageRequest->srByteCount, storageRequest->srRequestByteCounts[cnt],
397		    byteCount, arMembers[cnt], (uint32_t)arMembers[cnt]->getMemberState());
398
399	    status = kIOReturnIOError;
400
401	    continue;
402	}
403
404        // failing members
405        if (storageRequest->srRequestStatus[cnt] != kIOReturnSuccess) {
406	    IOLog("AppleRAID::completeRAIDRequest - error 0x%x detected for set \"%s\" (%s), member %s, set byte offset = %llu.\n",
407		  storageRequest->srRequestStatus[cnt], getSetNameString(), getUUIDString(),
408		  arMembers[cnt]->getUUIDString(), storageRequest->srByteStart);
409
410            status = storageRequest->srRequestStatus[cnt];
411
412	    // mark this member to be removed
413	    arMembers[cnt]->changeMemberState(kAppleRAIDMemberStateClosing);
414	    continue;
415        }
416
417	byteCount += storageRequest->srRequestByteCounts[cnt];
418
419	IOLogRW("AppleRAIDMirrorSet::completeRAIDRequest - [%u] tbc 0x%llx, sbc 0x%llx bc 0x%llx, member %p\n",
420		(uint32_t)cnt, storageRequest->srByteCount, storageRequest->srRequestByteCounts[cnt],
421		byteCount, arMembers[cnt]);
422    }
423
424    // Return an underrun error if the byte count is not complete.
425    // dkreadwrite should clip any requests beyond our published size
426    // however we still see underruns with pulled disks (bug?)
427
428    if (status == kIOReturnSuccess) {
429
430	if (byteCount != expectedByteCount) {
431	    IOLog("AppleRAID::completeRAIDRequest - underrun detected on set = \"%s\" (%s)\n", getSetNameString(), getUUIDString());
432	    IOLog1("AppleRAID::completeRAIDRequest - total expected = 0x%llx (0x%llx), actual = 0x%llx\n",
433		   expectedByteCount, storageRequest->srByteCount, byteCount);
434	    status = kIOReturnUnderrun;
435	    byteCount = 0;
436
437	} else {
438
439	    // fix up write byte count
440	    byteCount = storageRequest->srByteCount;
441	}
442
443    } else {
444
445	IOLog1("AppleRAID::completeRAIDRequest - error detected\n");
446
447	UInt32 stillAliveCount = 0;
448
449	for (cnt = 0; cnt < arMemberCount; cnt++) {
450
451	    if (arMembers[cnt] == 0) continue;
452
453	    if (arMembers[cnt]->getMemberState() == kAppleRAIDMemberStateOpen) {
454		stillAliveCount++;
455	    }
456	}
457
458	// if we haven't lost the entire set, retry the failed requests
459	if (stillAliveCount) {
460
461	    bool recoveryActive = queue_empty(&arFailedRequestQueue) != true;
462
463	    arStorageRequestsPending--;
464	    queue_enter(&arFailedRequestQueue, storageRequest, AppleRAIDStorageRequest *, fCommandChain);
465	    arSetCommandGate->commandWakeup(&arStorageRequestPool, /* oneThread */ false);
466
467	    // kick off the recovery thread if it isn't already active
468	    if (!recoveryActive) {
469		recoverStart();
470	    }
471
472	    return;
473
474	} else {
475
476	    // or let the recovery thread finish off the set
477	    recoverStart();
478	}
479
480	byteCount = 0;
481    }
482
483    storageRequest->srMemoryDescriptor->release();
484    returnRAIDRequest(storageRequest);
485
486    // Call the clients completion routine, bad status is returned here.
487    IOStorage::complete(&storageRequest->srClientsCompletion, status, byteCount);
488}
489
490void AppleRAIDMirrorSet::getRecoverQueue(queue_head_t *oldRequestQueue, queue_head_t *newRequestQueue)
491{
492    queue_new_head(oldRequestQueue, newRequestQueue, AppleRAIDStorageRequest *, fCommandChain);
493    queue_init(oldRequestQueue);
494}
495
496bool AppleRAIDMirrorSet::recover()
497{
498    // this is on a separate thread
499    // the set is paused.
500
501    // move failed i/o queue now in case we lose the set
502    queue_head_t safeFailedRequestQueue;
503    IOCommandGate::Action getRecoverQMethod = OSMemberFunctionCast(IOCommandGate::Action, this, &AppleRAIDMirrorSet::getRecoverQueue);
504    arSetCommandGate->runAction(getRecoverQMethod, &arFailedRequestQueue, &safeFailedRequestQueue);
505
506    // remove the bad members and rebuild the set
507    bool stillHere = super::recover();
508
509    // the set no longer paused.
510
511    IOLog1("AppleRAIDMirrorSet::recover() entered.\n");
512
513    // requeue any previously failed i/o's
514    while (!queue_empty(&safeFailedRequestQueue)) {
515	AppleRAIDStorageRequest * oldStorageRequest;
516	queue_remove_first(&safeFailedRequestQueue, oldStorageRequest, AppleRAIDStorageRequest *, fCommandChain);
517
518	IOLog1("AppleRAIDMirrorSet::recover() requeuing request %p\n", oldStorageRequest);
519
520	IOService *client;
521	UInt64 byteStart;
522	IOMemoryDescriptor *buffer;
523	IOStorageCompletion completion;
524
525	oldStorageRequest->extractRequest(&client, &byteStart, &buffer, &completion);
526	oldStorageRequest->release();
527
528	if (stillHere) {
529
530	    AppleRAIDStorageRequest * newStorageRequest;
531	    arSetCommandGate->runAction(arAllocateRequestMethod, &newStorageRequest);
532	    if (newStorageRequest) {
533
534		// retry failed request
535		if (buffer->getDirection() == kIODirectionOut) {
536		    newStorageRequest->write(client, byteStart, buffer, NULL, &completion);
537		} else {
538		    newStorageRequest->read(client, byteStart, buffer, NULL, &completion);
539		}
540
541		continue;
542	    }
543	}
544
545	// give up, return an error
546	IOStorage::complete(&completion, kIOReturnIOError, 0);
547    }
548
549    IOLog1("AppleRAIDMirrorSet::recover exiting\n");
550    return true;
551}
552
553
554//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
555//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
556//8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
557
558void AppleRAIDMirrorSet::startSetCompleteTimer()
559{
560    IOLog1("AppleRAIDMirrorSet::startSetCompleteTimer(%p) - timer %s running.\n",
561	   this, arSetCompleteTimeout ? "is already" : "was not");
562
563    // prevent timer from firing with no backing object
564    retain();
565
566    // once the set is live, arSetCompleteTimeout must stay zero
567    OSNumber * number = OSDynamicCast(OSNumber, getProperty(kAppleRAIDSetTimeoutKey));
568    if (number) arSetCompleteTimeout = number->unsigned32BitValue();
569    if (!arSetCompleteTimeout) arSetCompleteTimeout = kARSetCompleteTimeoutDefault;
570
571    // set up the timer (first time only)
572    if (!arSetCompleteThreadCall) {
573	thread_call_func_t setCompleteMethod = OSMemberFunctionCast(thread_call_func_t, this, &AppleRAIDMirrorSet::setCompleteTimeout);
574	arSetCompleteThreadCall = thread_call_allocate(setCompleteMethod, (thread_call_param_t)this);
575    }
576
577    // start timer
578    AbsoluteTime deadline;
579    clock_interval_to_deadline(arSetCompleteTimeout, kSecondScale, &deadline);
580    // an overlapping timer request will cancel the earlier request
581    bool overlap = thread_call_enter_delayed(arSetCompleteThreadCall, deadline);
582    if (overlap) release();
583}
584
585void AppleRAIDMirrorSet::setCompleteTimeout(void)
586{
587    IOLog1("AppleRAIDMirrorSet::setCompleteTimeout(%p) - the timeout is %sactive.\n", this, arSetCompleteTimeout ? "":"in");
588
589    // this code is outside the global lock and the workloop
590    // to simplify handling race conditions with cancelling the timeout
591    // we always let it fire and only release the set here.
592
593    arSetCompleteTimeout = kARSetCompleteTimeoutNone;
594
595    arController->degradeSet(this);
596    release();
597}
598
599void AppleRAIDMirrorSet::rebuildStart(void)
600{
601    IOLog1("AppleRAIDMirrorSet::rebuildStart(%p) - entered\n", this);
602
603    // are we already rebuilding a member
604    if (arRebuildingMember) return;
605
606    // sanity checks
607    if (getSpareCount() == 0) return;
608    if (arActiveCount == 0) return;
609
610    // find a missing member that can be replaced
611    UInt32 memberIndex;
612    for (memberIndex = 0; memberIndex < arMemberCount; memberIndex++) {
613	if (arMembers[memberIndex] == 0) {
614	    break;
615	}
616    }
617    if (memberIndex >= arMemberCount) return;
618
619    // find a spare that is usable
620    AppleRAIDMember * target = 0;
621    bool autoRebuild = OSDynamicCast(OSBoolean, getProperty(kAppleRAIDSetAutoRebuildKey)) == kOSBooleanTrue;
622    OSCollectionIterator * iter = OSCollectionIterator::withCollection(arSpareMembers);
623    if (!iter) return;
624
625    while (AppleRAIDMember * candidate = (AppleRAIDMember *)iter->getNextObject()) {
626
627	if (candidate->isBroken()) {
628	    IOLog1("AppleRAIDMirrorSet::rebuildStart(%p) - skipping candidate %p, it is broken.\n", this, candidate);
629	    continue;
630	}
631
632	// live adds have priority over regular spares
633	if (arExpectingLiveAdd) {
634
635	    OSNumber * number = OSDynamicCast(OSNumber, candidate->getHeaderProperty(kAppleRAIDMemberIndexKey));
636	    if (!number) continue;
637	    UInt32 candidateIndex = number->unsigned32BitValue();
638	    if (arMembers[candidateIndex]) continue;
639	    memberIndex = candidateIndex;
640	    candidate->changeMemberState(kAppleRAIDMemberStateSpare);
641	    arExpectingLiveAdd--;
642
643	} else {
644
645	    // if autorebuild is not on, only use current spares
646	    if (!autoRebuild) {
647		if (candidate->isSpare()) {
648		    OSNumber * number = OSDynamicCast(OSNumber, candidate->getHeaderProperty(kAppleRAIDSequenceNumberKey));
649		    if (!number) continue;
650		    UInt32 sequenceNumber = number->unsigned32BitValue();
651		    if (sequenceNumber != getSequenceNumber()) {
652			IOLog1("AppleRAIDMirrorSet::rebuildStart(%p) - skipping candidate %p, expired seq num %d.\n",
653			       this, candidate, (int)sequenceNumber);
654			continue;
655		    }
656		} else {
657		    IOLog1("AppleRAIDMirrorSet::rebuildStart(%p) - skipping candidate %p, autorebuild is off.\n", this, candidate);
658		    continue;
659		}
660	    }
661	}
662
663	arSpareMembers->removeObject(candidate);  // must break, this breaks iter
664	target = candidate;
665	break;
666    }
667    iter->release();
668    if (!target) return;
669
670    // pull the spare uuid out of the spare uuid list, only for v2 headers
671    OSArray * spareUUIDs = OSDynamicCast(OSArray, getProperty(kAppleRAIDSparesKey));
672    if (spareUUIDs) spareUUIDs = OSArray::withArray(spareUUIDs);
673    if (spareUUIDs) {
674	UInt32 spareCount = spareUUIDs ? spareUUIDs->getCount() : 0;
675	for (UInt32 i = 0; i < spareCount; i++) {
676	    OSString * uuid = OSDynamicCast(OSString, spareUUIDs->getObject(i));
677	    if (uuid && uuid->isEqualTo(target->getUUID())) {
678		spareUUIDs->removeObject(i);
679	    }
680	}
681	setProperty(kAppleRAIDSparesKey, spareUUIDs);
682	spareUUIDs->release();
683    }
684
685    // if this member was part of the set, rebuild it at it's old index
686    OSArray * memberUUIDs = OSDynamicCast(OSArray, getProperty(kAppleRAIDMembersKey));
687    if (memberUUIDs) memberUUIDs = OSArray::withArray(memberUUIDs);
688    if (memberUUIDs) {
689	UInt32 memberCount = memberUUIDs ? memberUUIDs->getCount() : 0;
690	for (UInt32 i = 0; i < memberCount; i++) {
691	    OSString * uuid = OSDynamicCast(OSString, memberUUIDs->getObject(i));
692	    if (uuid && uuid->isEqualTo(target->getUUID())) {
693		if (arMembers[i] == NULL) {
694		    memberIndex = i;
695		    break;
696		}
697		IOLog("AppleRAIDMirrorSet::rebuildStart() - spare already active at index = %d?\n", (int)memberIndex);
698		assert(0);  // this should never happen
699		return;
700	    }
701	}
702    }
703
704    target->setMemberIndex(memberIndex);
705    target->setHeaderProperty(kAppleRAIDSequenceNumberKey, getSequenceNumber(), 32);
706
707    IOLog1("AppleRAIDMirrorSet::rebuildStart(%p) - found a target %p for index = %d\n", this, target, (int)memberIndex);
708
709    // let any current i/o's finish before reconfiguring the mirror as writes then are expected to go to the rebuilding member.
710    arSetCommandGate->runAction(OSMemberFunctionCast(IOCommandGate::Action, this, &AppleRAIDMirrorSet::pauseSet), (void *)false);
711
712    arRebuildingMember = target;
713
714    // add member to set at the index we are rebuilding
715    // note that arActiveCount is not bumped
716    if (memberUUIDs) {
717	memberUUIDs->replaceObject(memberIndex, target->getUUID());
718	setProperty(kAppleRAIDMembersKey, memberUUIDs);
719	memberUUIDs->release();
720    }
721    arMembers[memberIndex] = target;
722    arMembers[memberIndex]->changeMemberState(kAppleRAIDMemberStateRebuilding);
723
724    arSetCommandGate->runAction(OSMemberFunctionCast(IOCommandGate::Action, this, &AppleRAIDMirrorSet::unpauseSet));
725
726    if (!arRebuildThreadCall) {
727	thread_call_func_t rebuildMethod = OSMemberFunctionCast(thread_call_func_t, this, &AppleRAIDMirrorSet::rebuild);
728	arRebuildThreadCall = thread_call_allocate(rebuildMethod, (thread_call_param_t)this);
729    }
730
731    // the rebuild runs outside the workloop and global raid lock
732    // if the whole set goes, it has no idea, this keeps the set
733    // from disappearing underneath the rebuild
734    retain();
735
736    if (arRebuildThreadCall) (void)thread_call_enter(arRebuildThreadCall);
737}
738
739
740// *** this in not inside the workloop ***
741
742void AppleRAIDMirrorSet::rebuild()
743{
744    IOLog1("AppleRAIDMirrorSet::rebuild(%p) - entered\n", this);
745
746    AppleRAIDMember * target = arRebuildingMember;
747    AppleRAIDMember * source = 0;
748    bool targetOpen = false;
749    bool sourceOpen = false;
750    UInt32 sourceIndex = 0;
751    IOBufferMemoryDescriptor * rebuildBuffer = 0;
752    UInt64 offset = 0;
753    IOReturn rc;
754
755    // the rebuild is officially started
756    messageClients(kAppleRAIDMessageSetChanged);
757
758    // all failures need to call rebuildComplete
759
760    while (true) {
761
762	// XXX this code should be double buffered
763
764	// there is a race between the code that kicks off this thread and this thread.
765	// the other thread is updating the raid headers and if the set is not opened
766	// it closes the members when it is done.  since there is no open/close counting
767	// that causes problems in this code by closing the member underneath us.
768	// since the other thread is holding the global lock if we also try to grab the
769	// lock this code will block until the headers are updated.
770	gAppleRAIDGlobals.lock();
771	// shake your head in disgust
772	gAppleRAIDGlobals.unlock();
773
774	// allocate copy buffers
775	rebuildBuffer = IOBufferMemoryDescriptor::withCapacity(arSetBlockSize, kIODirectionNone);
776	if (rebuildBuffer == 0) break;
777
778	// Open the target member
779	targetOpen = target->open(this, 0, kIOStorageAccessReaderWriter);
780	if (!targetOpen) break;
781
782	// clear the on disk spare state and reset the sequence number
783	target->setHeaderProperty(kAppleRAIDMemberTypeKey, kAppleRAIDMembersKey);
784	target->setHeaderProperty(kAppleRAIDSequenceNumberKey, 0, 32);
785	target->writeRAIDHeader();
786
787	offset = arBaseOffset;
788	clock_sec_t oldTime = 0;
789	while (offset < arSetMediaSize) {
790
791	    IOLog2("AppleRAIDMirrorSet::rebuild(%p) - offset = %llu bs=%llu\n", this, offset, arSetBlockSize);
792
793	    // if the set is idle pause regular i/o
794	    IOCommandGate::Action pauseMethod = OSMemberFunctionCast(IOCommandGate::Action, this, &AppleRAIDMirrorSet::pauseSet);
795	    while (arSetCommandGate->runAction(pauseMethod, (void *)true) == false) {
796		IOSleep(100);
797	    }
798
799	    // check if we failed during normal i/o
800	    if (target->getMemberState() != kAppleRAIDMemberStateRebuilding) break;
801
802	    // find a source drive, also check if it changed
803	    // the set is paused here, this should be safe
804	    if (!sourceOpen || !arMembers[sourceIndex]) {
805		if (sourceOpen) close(this, 0);
806		sourceOpen = false;
807		for (sourceIndex = 0; sourceIndex < arMemberCount; sourceIndex++) {
808		    if (arMembers[sourceIndex] == target) continue;
809		    if ((source = arMembers[sourceIndex])) break;
810		}
811		if (!source) break;
812		sourceOpen = open(this, 0, kIOStorageAccessReader);
813		if (!sourceOpen) break;
814	    }
815
816	    // Fill the read buffer
817	    rebuildBuffer->setDirection(kIODirectionIn);
818	    rc = source->IOStorage::read((IOService *)this, offset, rebuildBuffer);
819	    if (rc) {
820		    IOLog("AppleRAIDMirrorSet::rebuild() - read failed with 0x%x on member %s, member byte offset = %llu\n",
821			  rc, source->getUUIDString(), offset);
822		break;
823	    }
824
825	    rebuildBuffer->setDirection(kIODirectionOut);
826	    rc = target->IOStorage::write((IOService *)this, offset, rebuildBuffer);
827	    if (rc) {
828		// give up
829		IOLog("AppleRAIDMirrorSet::rebuild() - write failed with 0x%x on member %s, member byte offset = %llu\n",
830		      rc, target->getUUIDString(), offset);
831		break;
832	    }
833
834	    arSetCommandGate->runAction(OSMemberFunctionCast(IOCommandGate::Action, this, &AppleRAIDMirrorSet::unpauseSet));
835
836	    // update rebuild status once a second
837	    clock_sec_t newTime;
838	    clock_usec_t dontcare;
839	    clock_get_system_microtime(&newTime, &dontcare);
840	    if (newTime != oldTime) {
841		oldTime = newTime;
842
843		OSNumber * bytesCompleted = OSDynamicCast(OSNumber, target->getProperty(kAppleRAIDRebuildStatus));
844		if (bytesCompleted) {
845		    // avoids a race with getMemberProperties
846		    bytesCompleted->setValue(offset);
847		} else {
848		    bytesCompleted = OSNumber::withNumber(offset, 64);
849		    if (bytesCompleted) {
850			target->setProperty(kAppleRAIDRebuildStatus, bytesCompleted);
851			bytesCompleted->release();
852		    }
853		}
854	    }
855
856	    // keep requests aligned (header != block size)
857	    if ((offset % arSetBlockSize) != 0) offset = (offset / arSetBlockSize) * arSetBlockSize;
858
859	    offset += arSetBlockSize;
860	}
861
862	break;
863    }
864
865    // rebuilding member state changes: spare -> rebuilding -> rebuilding (open) -> closed -> open or broken
866
867    // clean up
868    if (rebuildBuffer) {
869	rebuildBuffer->release();
870	rebuildBuffer = 0;
871    }
872
873    if (sourceOpen) close(this, 0);
874    if (targetOpen) target->close(this, 0);
875
876    // if the target state went back to spare that means the member is being removed from the set
877    bool aborting = target->getMemberState() == kAppleRAIDMemberStateSpare;
878    if (aborting) target->changeMemberState(kAppleRAIDMemberStateBroken);
879
880    if (arSetIsPaused) arSetCommandGate->runAction(OSMemberFunctionCast(IOCommandGate::Action, this, &AppleRAIDMirrorSet::unpauseSet));
881
882    if (aborting) {
883	// calling rebuildComplete hangs on the global lock, just bail out
884	arRebuildingMember = 0;
885    } else {
886	bool success = offset >= arSetMediaSize;
887	IOCommandGate::Action rebuildCompleteMethod = OSMemberFunctionCast(IOCommandGate::Action, this, &AppleRAIDMirrorSet::rebuildComplete);
888	arSetCommandGate->runAction(rebuildCompleteMethod, (void *)success);
889    }
890
891    if (getSpareCount()) {
892	gAppleRAIDGlobals.lock();
893	rebuildStart();
894	gAppleRAIDGlobals.unlock();
895    }
896
897    // just in case the set's status does not need to change
898    messageClients(kAppleRAIDMessageSetChanged);
899
900    release();
901}
902
903void AppleRAIDMirrorSet::rebuildComplete(bool rebuiltComplete)
904{
905    AppleRAIDMember * target = arRebuildingMember;
906    UInt32 memberIndex = target->getMemberIndex();
907
908    // this is running in the workloop
909    // target is closed
910
911    pauseSet(false);
912
913    // clear rebuild progress from target
914    target->removeProperty(kAppleRAIDRebuildStatus);
915
916    // remove from set
917    this->detach(arMembers[memberIndex]);
918    arMembers[memberIndex] = 0;
919
920    gAppleRAIDGlobals.lock();
921
922    // add member back into the raid set, update raid headers
923    if (rebuiltComplete && upgradeMember(target)) {
924
925	arController->restartSet(this, true);
926
927	IOLog("AppleRAIDMirrorSet::rebuild complete for set \"%s\" (%s).\n", getSetNameString(), getUUIDString());
928
929    } else {
930
931	IOLog("AppleRAIDMirrorSet::rebuild: copy failed for set \"%s\" (%s).\n", getSetNameString(), getUUIDString());
932
933	// just leave this member in the set's member uuid list
934	// but mark member as broken
935	target->changeMemberState(kAppleRAIDMemberStateBroken);
936
937	// and toss it back in the spare pile
938	addSpare(target);
939    }
940
941    gAppleRAIDGlobals.unlock();
942
943    unpauseSet();
944
945    // kick off next rebuild (if needed)
946    arRebuildingMember = 0;
947}
948
949
950AppleRAIDMemoryDescriptor * AppleRAIDMirrorSet::allocateMemoryDescriptor(AppleRAIDStorageRequest *storageRequest, UInt32 memberIndex)
951{
952    return AppleRAIDMirrorMemoryDescriptor::withStorageRequest(storageRequest, memberIndex);
953}
954
955
956// AppleRAIDMirrorMemoryDescriptor
957// AppleRAIDMirrorMemoryDescriptor
958// AppleRAIDMirrorMemoryDescriptor
959
960#undef super
961#define super AppleRAIDMemoryDescriptor
962OSDefineMetaClassAndStructors(AppleRAIDMirrorMemoryDescriptor, AppleRAIDMemoryDescriptor);
963
964AppleRAIDMemoryDescriptor *
965AppleRAIDMirrorMemoryDescriptor::withStorageRequest(AppleRAIDStorageRequest *storageRequest, UInt32 memberIndex)
966{
967    AppleRAIDMemoryDescriptor *memoryDescriptor = new AppleRAIDMirrorMemoryDescriptor;
968
969    if (memoryDescriptor != 0) {
970        if (!memoryDescriptor->initWithStorageRequest(storageRequest, memberIndex)) {
971            memoryDescriptor->release();
972            memoryDescriptor = 0;
973        }
974    }
975
976    return memoryDescriptor;
977}
978
979bool AppleRAIDMirrorMemoryDescriptor::initWithStorageRequest(AppleRAIDStorageRequest *storageRequest, UInt32 memberIndex)
980{
981    if (!super::initWithStorageRequest(storageRequest, memberIndex)) return false;
982
983    mdSetBlockSize = storageRequest->srSetBlockSize;
984
985    return true;
986}
987
988bool AppleRAIDMirrorMemoryDescriptor::configureForMemoryDescriptor(IOMemoryDescriptor *memoryDescriptor, UInt64 byteStart, UInt32 activeIndex)
989{
990    UInt32 byteCount = memoryDescriptor->getLength();
991    UInt32 blockCount, memberBlockCount;
992    UInt64 setBlockStop, memberBlockStart;
993    UInt32 extraBlocks, setBlockStopOffset;
994    UInt32 startIndex, virtualIndex;
995    UInt32 activeCount = mdStorageRequest->srActiveCount;
996
997    _flags = (_flags & ~kIOMemoryDirectionMask) | memoryDescriptor->getDirection();
998
999    if (_flags & kIODirectionOut) {
1000	mdMemberByteStart = byteStart;
1001	_length = byteCount;
1002    } else {
1003	mdSetBlockStart		= byteStart / mdSetBlockSize;
1004	mdSetBlockOffset	= byteStart % mdSetBlockSize;
1005	setBlockStop		= (byteStart + byteCount - 1) / mdSetBlockSize;
1006	setBlockStopOffset	= (byteStart + byteCount - 1) % mdSetBlockSize;
1007	blockCount		= setBlockStop - mdSetBlockStart + 1;
1008	memberBlockCount	= blockCount / activeCount;
1009	extraBlocks		= blockCount % activeCount;
1010	startIndex		= mdSetBlockStart % activeCount;
1011
1012	// per member stuff
1013
1014	// find our index relative to this starting member for this request
1015	virtualIndex = (activeCount + activeIndex - startIndex) % activeCount;
1016	memberBlockStart = mdSetBlockStart + virtualIndex * memberBlockCount + min(virtualIndex, extraBlocks);
1017	if (virtualIndex < extraBlocks) memberBlockCount++;
1018
1019	// find the transfer size for this member
1020	mdMemberByteStart = memberBlockStart * mdSetBlockSize;
1021	_length = memberBlockCount * mdSetBlockSize;
1022
1023	// adjust for the starting inter-block offset
1024	if (virtualIndex == 0) {
1025	    mdMemberByteStart += mdSetBlockOffset;   // XXX same as byteStart?
1026	    _length	      -= mdSetBlockOffset;
1027	}
1028
1029	// adjust for ending inter-block offset
1030	if (virtualIndex == min(blockCount - 1, activeCount - 1)) _length -= mdSetBlockSize - setBlockStopOffset - 1;
1031
1032	IOLogRW("mirror activeIndex = %u, mdMemberByteStart = %llu _length = 0x%x\n", (uint32_t)activeIndex, mdMemberByteStart, (uint32_t)_length);
1033    }
1034
1035    mdMemoryDescriptor = memoryDescriptor;
1036
1037    return _length != 0;
1038}
1039
1040addr64_t AppleRAIDMirrorMemoryDescriptor::getPhysicalSegment(IOByteCount offset, IOByteCount *length, IOOptionBits options)
1041{
1042    IOByteCount		setOffset = offset;
1043    addr64_t		physAddress;
1044    UInt32		memberBlockStart, memberBlockOffset, blockCount;
1045
1046    if (_flags & kIODirectionIn) {
1047        memberBlockStart = (mdMemberByteStart + offset) / mdSetBlockSize;
1048        memberBlockOffset = (mdMemberByteStart + offset) % mdSetBlockSize;
1049        blockCount = memberBlockStart - mdSetBlockStart;
1050        setOffset = blockCount * mdSetBlockSize + memberBlockOffset - mdSetBlockOffset;
1051    }
1052
1053    physAddress = mdMemoryDescriptor->getPhysicalSegment(setOffset, length, options);
1054
1055    return physAddress;
1056}
1057