rf_chaindecluster.c revision 1.13
1193323Sed/*	$NetBSD: rf_chaindecluster.c,v 1.13 2005/12/11 12:23:37 christos Exp $	*/
2193323Sed/*
3193323Sed * Copyright (c) 1995 Carnegie-Mellon University.
4193323Sed * All rights reserved.
5193323Sed *
6193323Sed * Author: Khalil Amiri
7193323Sed *
8193323Sed * Permission to use, copy, modify and distribute this software and
9193323Sed * its documentation is hereby granted, provided that both the copyright
10193323Sed * notice and this permission notice appear in all copies of the
11193323Sed * software, derivative works or modified versions, and any portions
12193323Sed * thereof, and that both notices appear in supporting documentation.
13193323Sed *
14193323Sed * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15193323Sed * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16193323Sed * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17193323Sed *
18193323Sed * Carnegie Mellon requests users of this software to return to
19193323Sed *
20193323Sed *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21193323Sed *  School of Computer Science
22193323Sed *  Carnegie Mellon University
23193323Sed *  Pittsburgh PA 15213-3890
24193323Sed *
25193323Sed * any improvements or extensions that they make and grant Carnegie the
26193323Sed * rights to redistribute these changes.
27193323Sed */
28193323Sed
29193323Sed/******************************************************************************
30193323Sed *
31193323Sed * rf_chaindecluster.c -- implements chained declustering
32193323Sed *
33193323Sed *****************************************************************************/
34193323Sed
35193323Sed#include <sys/cdefs.h>
36198090Srdivacky__KERNEL_RCSID(0, "$NetBSD: rf_chaindecluster.c,v 1.13 2005/12/11 12:23:37 christos Exp $");
37193323Sed
38193323Sed#include "rf_archs.h"
39193323Sed
40193323Sed#if (RF_INCLUDE_CHAINDECLUSTER > 0)
41193323Sed
42193323Sed#include <dev/raidframe/raidframevar.h>
43193323Sed
44193323Sed#include "rf_raid.h"
45193323Sed#include "rf_chaindecluster.h"
46193323Sed#include "rf_dag.h"
47193323Sed#include "rf_dagutils.h"
48193323Sed#include "rf_dagffrd.h"
49193323Sed#include "rf_dagffwr.h"
50193323Sed#include "rf_dagdegrd.h"
51193323Sed#include "rf_dagfuncs.h"
52193323Sed#include "rf_general.h"
53193323Sed#include "rf_utils.h"
54193323Sed
55193323Sedtypedef struct RF_ChaindeclusterConfigInfo_s {
56193323Sed	RF_RowCol_t **stripeIdentifier;	/* filled in at config time and used
57193323Sed					 * by IdentifyStripe */
58193323Sed	RF_StripeCount_t numSparingRegions;
59193323Sed	RF_StripeCount_t stripeUnitsPerSparingRegion;
60193323Sed	RF_SectorNum_t mirrorStripeOffset;
61193323Sed}       RF_ChaindeclusterConfigInfo_t;
62193323Sed
63193323Sedint
64193323Sedrf_ConfigureChainDecluster(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
65193323Sed			   RF_Config_t *cfgPtr)
66193323Sed{
67193323Sed	RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
68193323Sed	RF_StripeCount_t num_used_stripeUnitsPerDisk;
69193323Sed	RF_ChaindeclusterConfigInfo_t *info;
70193323Sed	RF_RowCol_t i;
71193323Sed
72193323Sed	/* create a Chained Declustering configuration structure */
73193323Sed	RF_MallocAndAdd(info, sizeof(RF_ChaindeclusterConfigInfo_t), (RF_ChaindeclusterConfigInfo_t *), raidPtr->cleanupList);
74198090Srdivacky	if (info == NULL)
75193323Sed		return (ENOMEM);
76193323Sed	layoutPtr->layoutSpecificInfo = (void *) info;
77193323Sed
78193323Sed	/* fill in the config structure.  */
79193323Sed	info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, 2, raidPtr->cleanupList);
80193323Sed	if (info->stripeIdentifier == NULL)
81193323Sed		return (ENOMEM);
82193323Sed	for (i = 0; i < raidPtr->numCol; i++) {
83193323Sed		info->stripeIdentifier[i][0] = i % raidPtr->numCol;
84193323Sed		info->stripeIdentifier[i][1] = (i + 1) % raidPtr->numCol;
85193323Sed	}
86193323Sed
87193323Sed	/* fill in the remaining layout parameters */
88193323Sed	num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - (layoutPtr->stripeUnitsPerDisk %
89193323Sed	    (2 * raidPtr->numCol - 2));
90193323Sed	info->numSparingRegions = num_used_stripeUnitsPerDisk / (2 * raidPtr->numCol - 2);
91193323Sed	info->stripeUnitsPerSparingRegion = raidPtr->numCol * (raidPtr->numCol - 1);
92193323Sed	info->mirrorStripeOffset = info->numSparingRegions * (raidPtr->numCol - 1);
93193323Sed	layoutPtr->numStripe = info->numSparingRegions * info->stripeUnitsPerSparingRegion;
94193323Sed	layoutPtr->numDataCol = 1;
95193323Sed	layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
96193323Sed	layoutPtr->numParityCol = 1;
97193323Sed
98193323Sed	layoutPtr->dataStripeUnitsPerDisk = num_used_stripeUnitsPerDisk;
99193323Sed
100193323Sed	raidPtr->sectorsPerDisk =
101193323Sed	    num_used_stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
102193323Sed
103193323Sed	raidPtr->totalSectors =
104193323Sed	    (layoutPtr->numStripe) * layoutPtr->sectorsPerStripeUnit;
105193323Sed
106193323Sed	layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit;
107193323Sed
108193323Sed	return (0);
109193323Sed}
110193323Sed
111193323SedRF_ReconUnitCount_t
112193323Sedrf_GetNumSpareRUsChainDecluster(RF_Raid_t *raidPtr)
113193323Sed{
114193323Sed	RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
115193323Sed
116193323Sed	/*
117193323Sed         * The layout uses two stripe units per disk as spare within each
118193323Sed         * sparing region.
119193323Sed         */
120193323Sed	return (2 * info->numSparingRegions);
121193323Sed}
122193323Sed
123193323Sed
124193323Sed/* Maps to the primary copy of the data, i.e. the first mirror pair */
125193323Sedvoid
126193323Sedrf_MapSectorChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
127193323Sed			   RF_RowCol_t *col, RF_SectorNum_t *diskSector,
128193323Sed			   int remap)
129193323Sed{
130193323Sed	RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
131193323Sed	RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
132193323Sed	RF_SectorNum_t index_within_region, index_within_disk;
133193323Sed	RF_StripeNum_t sparing_region_id;
134193323Sed	int     col_before_remap;
135193323Sed
136193323Sed	sparing_region_id = SUID / info->stripeUnitsPerSparingRegion;
137193323Sed	index_within_region = SUID % info->stripeUnitsPerSparingRegion;
138193323Sed	index_within_disk = index_within_region / raidPtr->numCol;
139193323Sed	col_before_remap = SUID % raidPtr->numCol;
140193323Sed
141193323Sed	if (!remap) {
142193323Sed		*col = col_before_remap;
143193323Sed		*diskSector = (index_within_disk + ((raidPtr->numCol - 1) * sparing_region_id)) *
144193323Sed		    raidPtr->Layout.sectorsPerStripeUnit;
145193323Sed		*diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
146193323Sed	} else {
147193323Sed		/* remap sector to spare space... */
148193323Sed		*diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit;
149193323Sed		*diskSector += (raidPtr->numCol - 1) * raidPtr->Layout.sectorsPerStripeUnit;
150193323Sed		*diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
151193323Sed		index_within_disk = index_within_region / raidPtr->numCol;
152193323Sed		if (index_within_disk < col_before_remap)
153193323Sed			*col = index_within_disk;
154193323Sed		else
155193323Sed			if (index_within_disk == raidPtr->numCol - 2) {
156193323Sed				*col = (col_before_remap + raidPtr->numCol - 1) % raidPtr->numCol;
157193323Sed				*diskSector += raidPtr->Layout.sectorsPerStripeUnit;
158193323Sed			} else
159193323Sed				*col = (index_within_disk + 2) % raidPtr->numCol;
160193323Sed	}
161193323Sed
162193323Sed}
163193323Sed
164193323Sed
165193323Sed
166193323Sed/* Maps to the second copy of the mirror pair, which is chain declustered. The second copy is contained
167198090Srdivacky   in the next disk (mod numCol) after the disk containing the primary copy.
168193323Sed   The offset into the disk is one-half disk down */
169193323Sedvoid
170193323Sedrf_MapParityChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
171193323Sed			   RF_RowCol_t *col, RF_SectorNum_t *diskSector,
172193323Sed			   int remap)
173193323Sed{
174193323Sed	RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
175193323Sed	RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
176193323Sed	RF_SectorNum_t index_within_region, index_within_disk;
177198892Srdivacky	RF_StripeNum_t sparing_region_id;
178193323Sed	int     col_before_remap;
179193323Sed
180193323Sed	if (!remap) {
181198090Srdivacky		*col = SUID % raidPtr->numCol;
182193323Sed		*col = (*col + 1) % raidPtr->numCol;
183193323Sed		*diskSector = info->mirrorStripeOffset * raidPtr->Layout.sectorsPerStripeUnit;
184193323Sed		*diskSector += (SUID / raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit;
185193323Sed		*diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
186193323Sed	} else {
187193323Sed		/* remap parity to spare space ... */
188193323Sed		sparing_region_id = SUID / info->stripeUnitsPerSparingRegion;
189198090Srdivacky		index_within_region = SUID % info->stripeUnitsPerSparingRegion;
190193323Sed		index_within_disk = index_within_region / raidPtr->numCol;
191193323Sed		*diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit;
192193323Sed		*diskSector += (raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit;
193193323Sed		*diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
194193323Sed		col_before_remap = SUID % raidPtr->numCol;
195193323Sed		if (index_within_disk < col_before_remap)
196193323Sed			*col = index_within_disk;
197193323Sed		else
198193323Sed			if (index_within_disk == raidPtr->numCol - 2) {
199193323Sed				*col = (col_before_remap + 2) % raidPtr->numCol;
200193323Sed				*diskSector -= raidPtr->Layout.sectorsPerStripeUnit;
201193323Sed			} else
202193323Sed				*col = (index_within_disk + 2) % raidPtr->numCol;
203193323Sed	}
204193323Sed
205193323Sed}
206193323Sed
207193323Sedvoid
208193323Sedrf_IdentifyStripeChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
209193323Sed				RF_RowCol_t **diskids)
210193323Sed{
211193323Sed	RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
212193323Sed	RF_StripeNum_t SUID;
213193323Sed	RF_RowCol_t col;
214193323Sed
215193323Sed	SUID = addr / raidPtr->Layout.sectorsPerStripeUnit;
216193323Sed	col = SUID % raidPtr->numCol;
217193323Sed	*diskids = info->stripeIdentifier[col];
218193323Sed}
219193323Sed
220198090Srdivackyvoid
221193323Sedrf_MapSIDToPSIDChainDecluster(RF_RaidLayout_t *layoutPtr,
222193323Sed			      RF_StripeNum_t stripeID,
223193323Sed			      RF_StripeNum_t *psID,
224193323Sed			      RF_ReconUnitNum_t *which_ru)
225193323Sed{
226193323Sed	*which_ru = 0;
227193323Sed	*psID = stripeID;
228193323Sed}
229193323Sed/******************************************************************************
230193323Sed * select a graph to perform a single-stripe access
231193323Sed *
232193323Sed * Parameters:  raidPtr    - description of the physical array
233193323Sed *              type       - type of operation (read or write) requested
234193323Sed *              asmap      - logical & physical addresses for this access
235193323Sed *              createFunc - function to use to create the graph (return value)
236193323Sed *****************************************************************************/
237193323Sed
238193323Sedvoid
239198090Srdivackyrf_RAIDCDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type,
240193323Sed		  RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc)
241193323Sed#if 0
242193323Sed	void    (**createFunc) (RF_Raid_t *, RF_AccessStripeMap_t *,
243193323Sed            RF_DagHeader_t *, void *, RF_RaidAccessFlags_t,
244193323Sed            RF_AllocListElem_t *)
245193323Sed#endif
246193323Sed{
247193323Sed	RF_ASSERT(RF_IO_IS_R_OR_W(type));
248193323Sed
249193323Sed	if (asmap->numDataFailed + asmap->numParityFailed > 1) {
250193323Sed		RF_ERRORMSG("Multiple disks failed in a single group!  Aborting I/O operation.\n");
251193323Sed		*createFunc = NULL;
252193323Sed		return;
253193323Sed	}
254193323Sed	*createFunc = (type == RF_IO_TYPE_READ) ? (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG;
255193323Sed
256193323Sed	if (type == RF_IO_TYPE_READ) {
257193323Sed		if ((raidPtr->status == rf_rs_degraded) || (raidPtr->status == rf_rs_reconstructing))
258193323Sed			*createFunc = (RF_VoidFuncPtr) rf_CreateRaidCDegradedReadDAG;	/* array status is
259193323Sed											 * degraded, implement
260193323Sed											 * workload shifting */
261193323Sed		else
262193323Sed			*createFunc = (RF_VoidFuncPtr) rf_CreateMirrorPartitionReadDAG;	/* array status not
263193323Sed											 * degraded, so use
264193323Sed											 * mirror partition dag */
265193323Sed	} else
266193323Sed		*createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG;
267193323Sed}
268193323Sed#endif /* (RF_INCLUDE_CHAINDECLUSTER > 0) */
269193323Sed