rf_chaindecluster.c revision 1.13
1193323Sed/* $NetBSD: rf_chaindecluster.c,v 1.13 2005/12/11 12:23:37 christos Exp $ */ 2193323Sed/* 3193323Sed * Copyright (c) 1995 Carnegie-Mellon University. 4193323Sed * All rights reserved. 5193323Sed * 6193323Sed * Author: Khalil Amiri 7193323Sed * 8193323Sed * Permission to use, copy, modify and distribute this software and 9193323Sed * its documentation is hereby granted, provided that both the copyright 10193323Sed * notice and this permission notice appear in all copies of the 11193323Sed * software, derivative works or modified versions, and any portions 12193323Sed * thereof, and that both notices appear in supporting documentation. 13193323Sed * 14193323Sed * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15193323Sed * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16193323Sed * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17193323Sed * 18193323Sed * Carnegie Mellon requests users of this software to return to 19193323Sed * 20193323Sed * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21193323Sed * School of Computer Science 22193323Sed * Carnegie Mellon University 23193323Sed * Pittsburgh PA 15213-3890 24193323Sed * 25193323Sed * any improvements or extensions that they make and grant Carnegie the 26193323Sed * rights to redistribute these changes. 27193323Sed */ 28193323Sed 29193323Sed/****************************************************************************** 30193323Sed * 31193323Sed * rf_chaindecluster.c -- implements chained declustering 32193323Sed * 33193323Sed *****************************************************************************/ 34193323Sed 35193323Sed#include <sys/cdefs.h> 36198090Srdivacky__KERNEL_RCSID(0, "$NetBSD: rf_chaindecluster.c,v 1.13 2005/12/11 12:23:37 christos Exp $"); 37193323Sed 38193323Sed#include "rf_archs.h" 39193323Sed 40193323Sed#if (RF_INCLUDE_CHAINDECLUSTER > 0) 41193323Sed 42193323Sed#include <dev/raidframe/raidframevar.h> 43193323Sed 44193323Sed#include "rf_raid.h" 45193323Sed#include "rf_chaindecluster.h" 46193323Sed#include "rf_dag.h" 47193323Sed#include "rf_dagutils.h" 48193323Sed#include "rf_dagffrd.h" 49193323Sed#include "rf_dagffwr.h" 50193323Sed#include "rf_dagdegrd.h" 51193323Sed#include "rf_dagfuncs.h" 52193323Sed#include "rf_general.h" 53193323Sed#include "rf_utils.h" 54193323Sed 55193323Sedtypedef struct RF_ChaindeclusterConfigInfo_s { 56193323Sed RF_RowCol_t **stripeIdentifier; /* filled in at config time and used 57193323Sed * by IdentifyStripe */ 58193323Sed RF_StripeCount_t numSparingRegions; 59193323Sed RF_StripeCount_t stripeUnitsPerSparingRegion; 60193323Sed RF_SectorNum_t mirrorStripeOffset; 61193323Sed} RF_ChaindeclusterConfigInfo_t; 62193323Sed 63193323Sedint 64193323Sedrf_ConfigureChainDecluster(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, 65193323Sed RF_Config_t *cfgPtr) 66193323Sed{ 67193323Sed RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; 68193323Sed RF_StripeCount_t num_used_stripeUnitsPerDisk; 69193323Sed RF_ChaindeclusterConfigInfo_t *info; 70193323Sed RF_RowCol_t i; 71193323Sed 72193323Sed /* create a Chained Declustering configuration structure */ 73193323Sed RF_MallocAndAdd(info, sizeof(RF_ChaindeclusterConfigInfo_t), (RF_ChaindeclusterConfigInfo_t *), raidPtr->cleanupList); 74198090Srdivacky if (info == NULL) 75193323Sed return (ENOMEM); 76193323Sed layoutPtr->layoutSpecificInfo = (void *) info; 77193323Sed 78193323Sed /* fill in the config structure. */ 79193323Sed info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol, 2, raidPtr->cleanupList); 80193323Sed if (info->stripeIdentifier == NULL) 81193323Sed return (ENOMEM); 82193323Sed for (i = 0; i < raidPtr->numCol; i++) { 83193323Sed info->stripeIdentifier[i][0] = i % raidPtr->numCol; 84193323Sed info->stripeIdentifier[i][1] = (i + 1) % raidPtr->numCol; 85193323Sed } 86193323Sed 87193323Sed /* fill in the remaining layout parameters */ 88193323Sed num_used_stripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk - (layoutPtr->stripeUnitsPerDisk % 89193323Sed (2 * raidPtr->numCol - 2)); 90193323Sed info->numSparingRegions = num_used_stripeUnitsPerDisk / (2 * raidPtr->numCol - 2); 91193323Sed info->stripeUnitsPerSparingRegion = raidPtr->numCol * (raidPtr->numCol - 1); 92193323Sed info->mirrorStripeOffset = info->numSparingRegions * (raidPtr->numCol - 1); 93193323Sed layoutPtr->numStripe = info->numSparingRegions * info->stripeUnitsPerSparingRegion; 94193323Sed layoutPtr->numDataCol = 1; 95193323Sed layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit; 96193323Sed layoutPtr->numParityCol = 1; 97193323Sed 98193323Sed layoutPtr->dataStripeUnitsPerDisk = num_used_stripeUnitsPerDisk; 99193323Sed 100193323Sed raidPtr->sectorsPerDisk = 101193323Sed num_used_stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit; 102193323Sed 103193323Sed raidPtr->totalSectors = 104193323Sed (layoutPtr->numStripe) * layoutPtr->sectorsPerStripeUnit; 105193323Sed 106193323Sed layoutPtr->stripeUnitsPerDisk = raidPtr->sectorsPerDisk / layoutPtr->sectorsPerStripeUnit; 107193323Sed 108193323Sed return (0); 109193323Sed} 110193323Sed 111193323SedRF_ReconUnitCount_t 112193323Sedrf_GetNumSpareRUsChainDecluster(RF_Raid_t *raidPtr) 113193323Sed{ 114193323Sed RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; 115193323Sed 116193323Sed /* 117193323Sed * The layout uses two stripe units per disk as spare within each 118193323Sed * sparing region. 119193323Sed */ 120193323Sed return (2 * info->numSparingRegions); 121193323Sed} 122193323Sed 123193323Sed 124193323Sed/* Maps to the primary copy of the data, i.e. the first mirror pair */ 125193323Sedvoid 126193323Sedrf_MapSectorChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, 127193323Sed RF_RowCol_t *col, RF_SectorNum_t *diskSector, 128193323Sed int remap) 129193323Sed{ 130193323Sed RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; 131193323Sed RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; 132193323Sed RF_SectorNum_t index_within_region, index_within_disk; 133193323Sed RF_StripeNum_t sparing_region_id; 134193323Sed int col_before_remap; 135193323Sed 136193323Sed sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; 137193323Sed index_within_region = SUID % info->stripeUnitsPerSparingRegion; 138193323Sed index_within_disk = index_within_region / raidPtr->numCol; 139193323Sed col_before_remap = SUID % raidPtr->numCol; 140193323Sed 141193323Sed if (!remap) { 142193323Sed *col = col_before_remap; 143193323Sed *diskSector = (index_within_disk + ((raidPtr->numCol - 1) * sparing_region_id)) * 144193323Sed raidPtr->Layout.sectorsPerStripeUnit; 145193323Sed *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 146193323Sed } else { 147193323Sed /* remap sector to spare space... */ 148193323Sed *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit; 149193323Sed *diskSector += (raidPtr->numCol - 1) * raidPtr->Layout.sectorsPerStripeUnit; 150193323Sed *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 151193323Sed index_within_disk = index_within_region / raidPtr->numCol; 152193323Sed if (index_within_disk < col_before_remap) 153193323Sed *col = index_within_disk; 154193323Sed else 155193323Sed if (index_within_disk == raidPtr->numCol - 2) { 156193323Sed *col = (col_before_remap + raidPtr->numCol - 1) % raidPtr->numCol; 157193323Sed *diskSector += raidPtr->Layout.sectorsPerStripeUnit; 158193323Sed } else 159193323Sed *col = (index_within_disk + 2) % raidPtr->numCol; 160193323Sed } 161193323Sed 162193323Sed} 163193323Sed 164193323Sed 165193323Sed 166193323Sed/* Maps to the second copy of the mirror pair, which is chain declustered. The second copy is contained 167198090Srdivacky in the next disk (mod numCol) after the disk containing the primary copy. 168193323Sed The offset into the disk is one-half disk down */ 169193323Sedvoid 170193323Sedrf_MapParityChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector, 171193323Sed RF_RowCol_t *col, RF_SectorNum_t *diskSector, 172193323Sed int remap) 173193323Sed{ 174193323Sed RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; 175193323Sed RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit; 176193323Sed RF_SectorNum_t index_within_region, index_within_disk; 177198892Srdivacky RF_StripeNum_t sparing_region_id; 178193323Sed int col_before_remap; 179193323Sed 180193323Sed if (!remap) { 181198090Srdivacky *col = SUID % raidPtr->numCol; 182193323Sed *col = (*col + 1) % raidPtr->numCol; 183193323Sed *diskSector = info->mirrorStripeOffset * raidPtr->Layout.sectorsPerStripeUnit; 184193323Sed *diskSector += (SUID / raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit; 185193323Sed *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 186193323Sed } else { 187193323Sed /* remap parity to spare space ... */ 188193323Sed sparing_region_id = SUID / info->stripeUnitsPerSparingRegion; 189198090Srdivacky index_within_region = SUID % info->stripeUnitsPerSparingRegion; 190193323Sed index_within_disk = index_within_region / raidPtr->numCol; 191193323Sed *diskSector = sparing_region_id * (raidPtr->numCol + 1) * raidPtr->Layout.sectorsPerStripeUnit; 192193323Sed *diskSector += (raidPtr->numCol) * raidPtr->Layout.sectorsPerStripeUnit; 193193323Sed *diskSector += (raidSector % raidPtr->Layout.sectorsPerStripeUnit); 194193323Sed col_before_remap = SUID % raidPtr->numCol; 195193323Sed if (index_within_disk < col_before_remap) 196193323Sed *col = index_within_disk; 197193323Sed else 198193323Sed if (index_within_disk == raidPtr->numCol - 2) { 199193323Sed *col = (col_before_remap + 2) % raidPtr->numCol; 200193323Sed *diskSector -= raidPtr->Layout.sectorsPerStripeUnit; 201193323Sed } else 202193323Sed *col = (index_within_disk + 2) % raidPtr->numCol; 203193323Sed } 204193323Sed 205193323Sed} 206193323Sed 207193323Sedvoid 208193323Sedrf_IdentifyStripeChainDecluster(RF_Raid_t *raidPtr, RF_RaidAddr_t addr, 209193323Sed RF_RowCol_t **diskids) 210193323Sed{ 211193323Sed RF_ChaindeclusterConfigInfo_t *info = (RF_ChaindeclusterConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo; 212193323Sed RF_StripeNum_t SUID; 213193323Sed RF_RowCol_t col; 214193323Sed 215193323Sed SUID = addr / raidPtr->Layout.sectorsPerStripeUnit; 216193323Sed col = SUID % raidPtr->numCol; 217193323Sed *diskids = info->stripeIdentifier[col]; 218193323Sed} 219193323Sed 220198090Srdivackyvoid 221193323Sedrf_MapSIDToPSIDChainDecluster(RF_RaidLayout_t *layoutPtr, 222193323Sed RF_StripeNum_t stripeID, 223193323Sed RF_StripeNum_t *psID, 224193323Sed RF_ReconUnitNum_t *which_ru) 225193323Sed{ 226193323Sed *which_ru = 0; 227193323Sed *psID = stripeID; 228193323Sed} 229193323Sed/****************************************************************************** 230193323Sed * select a graph to perform a single-stripe access 231193323Sed * 232193323Sed * Parameters: raidPtr - description of the physical array 233193323Sed * type - type of operation (read or write) requested 234193323Sed * asmap - logical & physical addresses for this access 235193323Sed * createFunc - function to use to create the graph (return value) 236193323Sed *****************************************************************************/ 237193323Sed 238193323Sedvoid 239198090Srdivackyrf_RAIDCDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type, 240193323Sed RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc) 241193323Sed#if 0 242193323Sed void (**createFunc) (RF_Raid_t *, RF_AccessStripeMap_t *, 243193323Sed RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, 244193323Sed RF_AllocListElem_t *) 245193323Sed#endif 246193323Sed{ 247193323Sed RF_ASSERT(RF_IO_IS_R_OR_W(type)); 248193323Sed 249193323Sed if (asmap->numDataFailed + asmap->numParityFailed > 1) { 250193323Sed RF_ERRORMSG("Multiple disks failed in a single group! Aborting I/O operation.\n"); 251193323Sed *createFunc = NULL; 252193323Sed return; 253193323Sed } 254193323Sed *createFunc = (type == RF_IO_TYPE_READ) ? (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG : (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; 255193323Sed 256193323Sed if (type == RF_IO_TYPE_READ) { 257193323Sed if ((raidPtr->status == rf_rs_degraded) || (raidPtr->status == rf_rs_reconstructing)) 258193323Sed *createFunc = (RF_VoidFuncPtr) rf_CreateRaidCDegradedReadDAG; /* array status is 259193323Sed * degraded, implement 260193323Sed * workload shifting */ 261193323Sed else 262193323Sed *createFunc = (RF_VoidFuncPtr) rf_CreateMirrorPartitionReadDAG; /* array status not 263193323Sed * degraded, so use 264193323Sed * mirror partition dag */ 265193323Sed } else 266193323Sed *createFunc = (RF_VoidFuncPtr) rf_CreateRaidOneWriteDAG; 267193323Sed} 268193323Sed#endif /* (RF_INCLUDE_CHAINDECLUSTER > 0) */ 269193323Sed