rf_decluster.c revision 1.2
1/*	$NetBSD: rf_decluster.c,v 1.2 1999/01/26 02:33:55 oster Exp $	*/
2/*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21 *  School of Computer Science
22 *  Carnegie Mellon University
23 *  Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29/*----------------------------------------------------------------------
30 *
31 * rf_decluster.c -- code related to the declustered layout
32 *
33 * Created 10-21-92 (MCH)
34 *
35 * Nov 93:  adding support for distributed sparing.  This code is a little
36 *          complex:  the basic layout used is as follows:
37 *          let F = (v-1)/GCD(r,v-1).  The spare space for each set of
38 *          F consecutive fulltables is grouped together and placed after
39 *          that set of tables.
40 *                   +------------------------------+
41 *                   |        F fulltables          |
42 *                   |        Spare Space           |
43 *                   |        F fulltables          |
44 *                   |        Spare Space           |
45 *                   |            ...               |
46 *                   +------------------------------+
47 *
48 *--------------------------------------------------------------------*/
49
50#include "rf_types.h"
51#include "rf_raid.h"
52#include "rf_raidframe.h"
53#include "rf_configure.h"
54#include "rf_decluster.h"
55#include "rf_debugMem.h"
56#include "rf_utils.h"
57#include "rf_alloclist.h"
58#include "rf_general.h"
59#include "rf_shutdown.h"
60#include "rf_sys.h"
61
62extern int rf_copyback_in_progress;                /* debug only */
63
64/* found in rf_kintf.c */
65int rf_GetSpareTableFromDaemon(RF_SparetWait_t  *req);
66
67/* configuration code */
68
69int rf_ConfigureDeclustered(
70  RF_ShutdownList_t  **listp,
71  RF_Raid_t           *raidPtr,
72  RF_Config_t         *cfgPtr)
73{
74    RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
75    int b, v, k, r, lambda;				/* block design params */
76    int i, j;
77    RF_RowCol_t *first_avail_slot;
78    RF_StripeCount_t complete_FT_count, numCompleteFullTablesPerDisk;
79    RF_DeclusteredConfigInfo_t *info;
80    RF_StripeCount_t PUsPerDisk, spareRegionDepthInPUs, numCompleteSpareRegionsPerDisk, extraPUsPerDisk;
81    RF_StripeCount_t totSparePUsPerDisk;
82    RF_SectorNum_t diskOffsetOfLastFullTableInSUs;
83    RF_SectorCount_t SpareSpaceInSUs;
84    char *cfgBuf = (char *) (cfgPtr->layoutSpecific);
85    RF_StripeNum_t l, SUID;
86
87    SUID = l = 0;
88    numCompleteSpareRegionsPerDisk = 0;
89
90    /* 1. create layout specific structure */
91    RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t), (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList);
92    if (info == NULL)
93      return(ENOMEM);
94    layoutPtr->layoutSpecificInfo = (void *) info;
95    info->SpareTable = NULL;
96
97    /* 2. extract parameters from the config structure */
98    if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) {
99      (void) bcopy(cfgBuf, info->sparemap_fname, RF_SPAREMAP_NAME_LEN);
100    }
101    cfgBuf += RF_SPAREMAP_NAME_LEN;
102
103    b        = *( (int *) cfgBuf);   cfgBuf += sizeof(int);
104    v        = *( (int *) cfgBuf);   cfgBuf += sizeof(int);
105    k        = *( (int *) cfgBuf);   cfgBuf += sizeof(int);
106    r        = *( (int *) cfgBuf);   cfgBuf += sizeof(int);
107    lambda   = *( (int *) cfgBuf);   cfgBuf += sizeof(int);
108    raidPtr->noRotate = *( (int *) cfgBuf);   cfgBuf += sizeof(int);
109
110    /* the sparemaps are generated assuming that parity is rotated, so we issue
111     * a warning if both distributed sparing and no-rotate are on at the same time
112     */
113    if ((layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) && raidPtr->noRotate) {
114	RF_ERRORMSG("Warning:  distributed sparing specified without parity rotation.\n");
115    }
116
117    if (raidPtr->numCol != v) {
118        RF_ERRORMSG2("RAID: config error: table element count (%d) not equal to no. of cols (%d)\n", v, raidPtr->numCol);
119        return(EINVAL);
120    }
121
122    /* 3.  set up the values used in the mapping code */
123    info->BlocksPerTable = b;
124    info->Lambda = lambda;
125    info->NumParityReps = info->groupSize = k;
126    info->SUsPerTable = b * (k-1) * layoutPtr->SUsPerPU;/* b blks, k-1 SUs each */
127    info->SUsPerFullTable = k * info->SUsPerTable;	/* rot k times */
128    info->PUsPerBlock = k-1;
129    info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU;
130    info->TableDepthInPUs = (b*k) / v;
131    info->FullTableDepthInPUs = info->TableDepthInPUs * k;		/* k repetitions */
132
133    /* used only in distributed sparing case */
134    info->FullTablesPerSpareRegion = (v-1) / rf_gcd(r, v-1);		/* (v-1)/gcd fulltables */
135    info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion;
136    info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion / (v-1)) * layoutPtr->SUsPerPU;
137
138    /* check to make sure the block design is sufficiently small */
139    if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
140        if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU + info->SpareSpaceDepthPerRegionInSUs > layoutPtr->stripeUnitsPerDisk) {
141	    RF_ERRORMSG3("RAID: config error: Full Table depth (%d) + Spare Space (%d) larger than disk size (%d) (BD too big)\n",
142			 (int)info->FullTableDepthInPUs,
143			 (int)info->SpareSpaceDepthPerRegionInSUs,
144			 (int)layoutPtr->stripeUnitsPerDisk);
145	    return(EINVAL);
146	}
147    } else {
148	if (info->TableDepthInPUs * layoutPtr->SUsPerPU > layoutPtr->stripeUnitsPerDisk) {
149	    RF_ERRORMSG2("RAID: config error: Table depth (%d) larger than disk size (%d) (BD too big)\n",
150			 (int)(info->TableDepthInPUs * layoutPtr->SUsPerPU), \
151			 (int)layoutPtr->stripeUnitsPerDisk);
152	    return(EINVAL);
153	}
154    }
155
156
157    /* compute the size of each disk, and the number of tables in the last fulltable (which
158     * need not be complete)
159     */
160    if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
161
162	PUsPerDisk = layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU;
163	spareRegionDepthInPUs = (info->TablesPerSpareRegion * info->TableDepthInPUs +
164				 (info->TablesPerSpareRegion * info->TableDepthInPUs) / (v-1));
165	info->SpareRegionDepthInSUs = spareRegionDepthInPUs * layoutPtr->SUsPerPU;
166
167	numCompleteSpareRegionsPerDisk = PUsPerDisk / spareRegionDepthInPUs;
168	info->NumCompleteSRs = numCompleteSpareRegionsPerDisk;
169	extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs;
170
171	/* assume conservatively that we need the full amount of spare space in one region in order
172	 * to provide spares for the partial spare region at the end of the array.  We set "i" to
173	 * the number of tables in the partial spare region.  This may actually include some fulltables.
174	 */
175	extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
176	if (extraPUsPerDisk <= 0) i = 0;
177	else i = extraPUsPerDisk/info->TableDepthInPUs;
178
179	complete_FT_count = raidPtr->numRow * (numCompleteSpareRegionsPerDisk * (info->TablesPerSpareRegion/k) + i/k);
180        info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable;
181	info->ExtraTablesPerDisk = i % k;
182
183	/* note that in the last spare region, the spare space is complete even though data/parity space is not */
184	totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk+1) * (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
185	info->TotSparePUsPerDisk = totSparePUsPerDisk;
186
187	layoutPtr->stripeUnitsPerDisk =
188	    ((complete_FT_count/raidPtr->numRow) * info->FullTableDepthInPUs +	 	/* data & parity space */
189	     info->ExtraTablesPerDisk * info->TableDepthInPUs +
190	     totSparePUsPerDisk								/* spare space */
191	    ) * layoutPtr->SUsPerPU;
192	layoutPtr->dataStripeUnitsPerDisk =
193	    (complete_FT_count * info->FullTableDepthInPUs + info->ExtraTablesPerDisk * info->TableDepthInPUs)
194	    * layoutPtr->SUsPerPU * (k-1) / k;
195
196    } else {
197        /* non-dist spare case:  force each disk to contain an integral number of tables */
198        layoutPtr->stripeUnitsPerDisk /= (info->TableDepthInPUs * layoutPtr->SUsPerPU);
199        layoutPtr->stripeUnitsPerDisk *= (info->TableDepthInPUs * layoutPtr->SUsPerPU);
200
201	/* compute the number of tables in the last fulltable, which need not be complete */
202        complete_FT_count =
203            ((layoutPtr->stripeUnitsPerDisk/layoutPtr->SUsPerPU) / info->FullTableDepthInPUs) * raidPtr->numRow;
204
205        info->FullTableLimitSUID = complete_FT_count * info->SUsPerFullTable;
206        info->ExtraTablesPerDisk =
207		((layoutPtr->stripeUnitsPerDisk/layoutPtr->SUsPerPU) / info->TableDepthInPUs) % k;
208    }
209
210    raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk * layoutPtr->sectorsPerStripeUnit;
211
212    /* find the disk offset of the stripe unit where the last fulltable starts */
213    numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow;
214    diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk * info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
215    if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
216        SpareSpaceInSUs  = numCompleteSpareRegionsPerDisk * info->SpareSpaceDepthPerRegionInSUs;
217        diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs;
218        info->DiskOffsetOfLastSpareSpaceChunkInSUs =
219	    diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU;
220    }
221    info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs;
222    info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk;
223
224    /* 4.  create and initialize the lookup tables */
225    info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
226    if (info->LayoutTable == NULL)
227      return(ENOMEM);
228    info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
229    if (info->OffsetTable == NULL)
230      return(ENOMEM);
231    info->BlockTable  =	rf_make_2d_array(info->TableDepthInPUs*layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList);
232    if (info->BlockTable == NULL)
233      return(ENOMEM);
234
235    first_avail_slot = rf_make_1d_array(v, NULL);
236    if (first_avail_slot == NULL)
237      return(ENOMEM);
238
239    for (i=0; i<b; i++)
240      for (j=0; j<k; j++)
241        info->LayoutTable[i][j] = *cfgBuf++;
242
243    /* initialize offset table */
244    for (i=0; i<b; i++) for (j=0; j<k; j++) {
245        info->OffsetTable[i][j] = first_avail_slot[ info->LayoutTable[i][j] ];
246        first_avail_slot[ info->LayoutTable[i][j] ]++;
247    }
248
249    /* initialize block table */
250    for (SUID=l=0; l<layoutPtr->SUsPerPU; l++) {
251        for (i=0; i<b; i++) {
252            for (j=0; j<k; j++) {
253                info->BlockTable[ (info->OffsetTable[i][j] * layoutPtr->SUsPerPU) + l ]
254		                [ info->LayoutTable[i][j] ] = SUID;
255            }
256            SUID++;
257        }
258    }
259
260    rf_free_1d_array(first_avail_slot, v);
261
262    /* 5.  set up the remaining redundant-but-useful parameters */
263
264    raidPtr->totalSectors = (k*complete_FT_count + raidPtr->numRow*info->ExtraTablesPerDisk) *
265    			  info->SUsPerTable * layoutPtr->sectorsPerStripeUnit;
266    layoutPtr->numStripe = (raidPtr->totalSectors / layoutPtr->sectorsPerStripeUnit) / (k-1);
267
268    /* strange evaluation order below to try and minimize overflow problems */
269
270    layoutPtr->dataSectorsPerStripe = (k-1) * layoutPtr->sectorsPerStripeUnit;
271    layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit << raidPtr->logBytesPerSector;
272    layoutPtr->numDataCol = k-1;
273    layoutPtr->numParityCol = 1;
274
275    return(0);
276}
277
278/* declustering with distributed sparing */
279static void rf_ShutdownDeclusteredDS(RF_ThreadArg_t);
280static void rf_ShutdownDeclusteredDS(arg)
281  RF_ThreadArg_t  arg;
282{
283  RF_DeclusteredConfigInfo_t *info;
284  RF_Raid_t *raidPtr;
285
286  raidPtr = (RF_Raid_t *)arg;
287  info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
288  if (info->SpareTable)
289    rf_FreeSpareTable(raidPtr);
290}
291
292int rf_ConfigureDeclusteredDS(
293  RF_ShutdownList_t  **listp,
294  RF_Raid_t           *raidPtr,
295  RF_Config_t         *cfgPtr)
296{
297  int rc;
298
299  rc = rf_ConfigureDeclustered(listp, raidPtr, cfgPtr);
300  if (rc)
301    return(rc);
302  rc = rf_ShutdownCreate(listp, rf_ShutdownDeclusteredDS, raidPtr);
303  if (rc) {
304    RF_ERRORMSG1("Got %d adding shutdown event for DeclusteredDS\n", rc);
305    rf_ShutdownDeclusteredDS(raidPtr);
306    return(rc);
307  }
308  return(0);
309}
310
311void rf_MapSectorDeclustered(raidPtr, raidSector, row, col, diskSector, remap)
312  RF_Raid_t       *raidPtr;
313  RF_RaidAddr_t    raidSector;
314  RF_RowCol_t     *row;
315  RF_RowCol_t     *col;
316  RF_SectorNum_t  *diskSector;
317  int              remap;
318{
319    RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
320    RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
321    RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
322    RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
323    RF_StripeNum_t BlockID, BlockOffset, RepIndex;
324    RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
325    RF_StripeCount_t fulltable_depth  = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
326    RF_StripeNum_t base_suid = 0, outSU, SpareRegion=0, SpareSpace=0;
327
328    rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
329
330    FullTableID     = SUID / sus_per_fulltable;		/* fulltable ID within array (across rows) */
331    if (raidPtr->numRow == 1) *row = 0;                 /* avoid a mod and a div in the common case */
332    else {
333      *row            = FullTableID % raidPtr->numRow;
334      FullTableID    /= raidPtr->numRow;			/* convert to fulltable ID on this disk */
335    }
336    if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
337	SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
338        SpareSpace  = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
339    }
340    FullTableOffset = SUID % sus_per_fulltable;
341    TableID         = FullTableOffset / info->SUsPerTable;
342    TableOffset     = FullTableOffset - TableID * info->SUsPerTable;
343    BlockID         = TableOffset / info->PUsPerBlock;
344    BlockOffset     = TableOffset - BlockID * info->PUsPerBlock;
345    BlockID        %= info->BlocksPerTable;
346    RepIndex        = info->PUsPerBlock - TableID;
347    if (!raidPtr->noRotate) BlockOffset    += ((BlockOffset >= RepIndex) ? 1 : 0);
348    *col            = info->LayoutTable[BlockID][BlockOffset];
349
350    /* remap to distributed spare space if indicated */
351    if (remap) {
352      RF_ASSERT( raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared ||
353	     (rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal));
354      rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU);
355    } else {
356
357        outSU	    = base_suid;
358        outSU      += FullTableID * fulltable_depth;  				        /* offs to strt of FT */
359        outSU	   += SpareSpace;						        /* skip rsvd spare space */
360        outSU      += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;   	        /* offs to strt of tble */
361        outSU      += info->OffsetTable[BlockID][BlockOffset] * layoutPtr->SUsPerPU;	/* offs to the PU */
362    }
363    outSU          += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);	        /* offs to the SU within a PU */
364
365    /* convert SUs to sectors, and, if not aligned to SU boundary, add in offset to sector.  */
366    *diskSector     = outSU*layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);
367
368    RF_ASSERT( *col != -1 );
369}
370
371
372/* prototyping this inexplicably causes the compile of the layout table (rf_layout.c) to fail */
373void rf_MapParityDeclustered(
374  RF_Raid_t       *raidPtr,
375  RF_RaidAddr_t    raidSector,
376  RF_RowCol_t     *row,
377  RF_RowCol_t     *col,
378  RF_SectorNum_t  *diskSector,
379  int              remap)
380{
381    RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
382    RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
383    RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
384    RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
385    RF_StripeNum_t BlockID, BlockOffset, RepIndex;
386    RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
387    RF_StripeCount_t fulltable_depth  = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
388    RF_StripeNum_t base_suid = 0, outSU, SpareRegion=0, SpareSpace=0;
389
390    rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
391
392    /* compute row & (possibly) spare space exactly as before */
393    FullTableID     = SUID / sus_per_fulltable;
394    if (raidPtr->numRow == 1) *row = 0;                         /* avoid a mod and a div in the common case */
395    else {
396      *row            = FullTableID % raidPtr->numRow;
397      FullTableID    /= raidPtr->numRow;			/* convert to fulltable ID on this disk */
398    }
399    if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
400	SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
401        SpareSpace  = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
402    }
403
404    /* compute BlockID and RepIndex exactly as before */
405    FullTableOffset = SUID % sus_per_fulltable;
406    TableID         = FullTableOffset / info->SUsPerTable;
407    TableOffset     = FullTableOffset - TableID * info->SUsPerTable;
408    /*TableOffset     = FullTableOffset % info->SUsPerTable;*/
409    /*BlockID         = (TableOffset / info->PUsPerBlock) % info->BlocksPerTable;*/
410    BlockID         = TableOffset / info->PUsPerBlock;
411    /*BlockOffset     = TableOffset % info->PUsPerBlock;*/
412    BlockOffset     = TableOffset - BlockID * info->PUsPerBlock;
413    BlockID        %= info->BlocksPerTable;
414
415    /* the parity block is in the position indicated by RepIndex */
416    RepIndex        = (raidPtr->noRotate) ? info->PUsPerBlock : info->PUsPerBlock - TableID;
417    *col	    = info->LayoutTable[BlockID][RepIndex];
418
419    if (remap) {
420      RF_ASSERT( raidPtr->Disks[*row][*col].status == rf_ds_reconstructing || raidPtr->Disks[*row][*col].status == rf_ds_dist_spared ||
421	     (rf_copyback_in_progress && raidPtr->Disks[*row][*col].status == rf_ds_optimal));
422      rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID, TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col, &outSU);
423    } else {
424
425        /* compute sector as before, except use RepIndex instead of BlockOffset */
426        outSU        = base_suid;
427        outSU       += FullTableID * fulltable_depth;
428        outSU	    += SpareSpace;						/* skip rsvd spare space */
429        outSU       += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;
430        outSU       += info->OffsetTable[BlockID][RepIndex] * layoutPtr->SUsPerPU;
431    }
432
433    outSU       += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);
434    *diskSector  = outSU*layoutPtr->sectorsPerStripeUnit + (raidSector % layoutPtr->sectorsPerStripeUnit);
435
436    RF_ASSERT( *col != -1 );
437}
438
439/* returns an array of ints identifying the disks that comprise the stripe containing the indicated address.
440 * the caller must _never_ attempt to modify this array.
441 */
442void rf_IdentifyStripeDeclustered(
443  RF_Raid_t        *raidPtr,
444  RF_RaidAddr_t     addr,
445  RF_RowCol_t     **diskids,
446  RF_RowCol_t      *outRow)
447{
448  RF_RaidLayout_t *layoutPtr           = &(raidPtr->Layout);
449  RF_DeclusteredConfigInfo_t *info     = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
450  RF_StripeCount_t sus_per_fulltable   = info->SUsPerFullTable;
451  RF_StripeCount_t fulltable_depth     = info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
452  RF_StripeNum_t  base_suid            = 0;
453  RF_StripeNum_t SUID                  = rf_RaidAddressToStripeUnitID(layoutPtr, addr);
454  RF_StripeNum_t stripeID, FullTableID;
455  int tableOffset;
456
457  rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable, &fulltable_depth, &base_suid);
458  FullTableID     = SUID / sus_per_fulltable;		/* fulltable ID within array (across rows) */
459  *outRow         = FullTableID % raidPtr->numRow;
460  stripeID        = rf_StripeUnitIDToStripeID(layoutPtr, SUID);                     /* find stripe offset into array */
461  tableOffset     = (stripeID % info->BlocksPerTable);                        /* find offset into block design table */
462  *diskids        = info->LayoutTable[tableOffset];
463}
464
465/* This returns the default head-separation limit, which is measured
466 * in "required units for reconstruction".  Each time a disk fetches
467 * a unit, it bumps a counter.  The head-sep code prohibits any disk
468 * from getting more than headSepLimit counter values ahead of any
469 * other.
470 *
471 * We assume here that the number of floating recon buffers is already
472 * set.  There are r stripes to be reconstructed in each table, and so
473 * if we have a total of B buffers, we can have at most B/r tables
474 * under recon at any one time.  In each table, lambda units are required
475 * from each disk, so given B buffers, the head sep limit has to be
476 * (lambda*B)/r units.  We subtract one to avoid weird boundary cases.
477 *
478 * for example, suppose were given 50 buffers, r=19, and lambda=4 as in
479 * the 20.5 design.  There are 19 stripes/table to be reconstructed, so
480 * we can have 50/19 tables concurrently under reconstruction, which means
481 * we can allow the fastest disk to get 50/19 tables ahead of the slower
482 * disk.  There are lambda "required units" for each disk, so the fastest
483 * disk can get 4*50/19 = 10 counter values ahead of the slowest.
484 *
485 * If numBufsToAccumulate is not 1, we need to limit the head sep further
486 * because multiple bufs will be required for each stripe under recon.
487 */
488RF_HeadSepLimit_t rf_GetDefaultHeadSepLimitDeclustered(
489  RF_Raid_t  *raidPtr)
490{
491  RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
492
493  return(info->Lambda * raidPtr->numFloatingReconBufs / info->TableDepthInPUs / rf_numBufsToAccumulate);
494}
495
496/* returns the default number of recon buffers to use.  The value
497 * is somewhat arbitrary...it's intended to be large enough to allow
498 * for a reasonably large head-sep limit, but small enough that you
499 * don't use up all your system memory with buffers.
500 */
501int rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t *raidPtr)
502{
503  return(100 * rf_numBufsToAccumulate);
504}
505
506/* sectors in the last fulltable of the array need to be handled
507 * specially since this fulltable can be incomplete.  this function
508 * changes the values of certain params to handle this.
509 *
510 * the idea here is that MapSector et. al. figure out which disk the
511 * addressed unit lives on by computing the modulos of the unit number
512 * with the number of units per fulltable, table, etc.  In the last
513 * fulltable, there are fewer units per fulltable, so we need to adjust
514 * the number of user data units per fulltable to reflect this.
515 *
516 * so, we (1) convert the fulltable size and depth parameters to
517 * the size of the partial fulltable at the end, (2) compute the
518 * disk sector offset where this fulltable starts, and (3) convert
519 * the users stripe unit number from an offset into the array to
520 * an offset into the last fulltable.
521 */
522void rf_decluster_adjust_params(
523  RF_RaidLayout_t   *layoutPtr,
524  RF_StripeNum_t    *SUID,
525  RF_StripeCount_t  *sus_per_fulltable,
526  RF_StripeCount_t  *fulltable_depth,
527  RF_StripeNum_t    *base_suid)
528{
529    RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
530#if defined(__NetBSD__) && defined(_KERNEL)
531    /* Nothing! */
532#else
533    char pc = layoutPtr->map->parityConfig;
534#endif
535
536    if (*SUID >= info->FullTableLimitSUID) {
537	/* new full table size is size of last full table on disk */
538	*sus_per_fulltable = info->ExtraTablesPerDisk * info->SUsPerTable;
539
540	/* new full table depth is corresponding depth */
541	*fulltable_depth = info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU;
542
543	/* set up the new base offset */
544	*base_suid = info->DiskOffsetOfLastFullTableInSUs;
545
546	/* convert users array address to an offset into the last fulltable */
547	*SUID -= info->FullTableLimitSUID;
548    }
549}
550
551/*
552 * map a stripe ID to a parity stripe ID.
553 * See comment above RaidAddressToParityStripeID in layout.c.
554 */
555void rf_MapSIDToPSIDDeclustered(
556  RF_RaidLayout_t    *layoutPtr,
557  RF_StripeNum_t      stripeID,
558  RF_StripeNum_t     *psID,
559  RF_ReconUnitNum_t  *which_ru)
560{
561    RF_DeclusteredConfigInfo_t *info;
562
563    info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
564
565    *psID = (stripeID / (layoutPtr->SUsPerPU * info->BlocksPerTable))
566        * info->BlocksPerTable + (stripeID % info->BlocksPerTable);
567    *which_ru = (stripeID % (info->BlocksPerTable * layoutPtr->SUsPerPU))
568        / info->BlocksPerTable;
569    RF_ASSERT( (*which_ru) < layoutPtr->SUsPerPU/layoutPtr->SUsPerRU);
570}
571
572/*
573 * Called from MapSector and MapParity to retarget an access at the spare unit.
574 * Modifies the "col" and "outSU" parameters only.
575 */
576void rf_remap_to_spare_space(
577  RF_RaidLayout_t             *layoutPtr,
578  RF_DeclusteredConfigInfo_t  *info,
579  RF_RowCol_t                  row,
580  RF_StripeNum_t               FullTableID,
581  RF_StripeNum_t               TableID,
582  RF_SectorNum_t               BlockID,
583  RF_StripeNum_t               base_suid,
584  RF_StripeNum_t               SpareRegion,
585  RF_RowCol_t                 *outCol,
586  RF_StripeNum_t              *outSU)
587{
588    RF_StripeNum_t ftID, spareTableStartSU, TableInSpareRegion, lastSROffset, which_ft;
589
590    /*
591     * note that FullTableID and hence SpareRegion may have gotten
592     * tweaked by rf_decluster_adjust_params. We detect this by
593     * noticing that base_suid is not 0.
594     */
595    if (base_suid == 0) {
596      ftID = FullTableID;
597    }
598    else {
599      /*
600       * There may be > 1.0 full tables in the last (i.e. partial)
601       * spare region.  find out which of these we're in.
602       */
603      lastSROffset = info->NumCompleteSRs * info->SpareRegionDepthInSUs;
604      which_ft = (info->DiskOffsetOfLastFullTableInSUs - lastSROffset) / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU);
605
606      /* compute the actual full table ID */
607      ftID = info->DiskOffsetOfLastFullTableInSUs / (info->FullTableDepthInPUs * layoutPtr->SUsPerPU) + which_ft;
608      SpareRegion = info->NumCompleteSRs;
609    }
610    TableInSpareRegion = (ftID * info->NumParityReps + TableID) % info->TablesPerSpareRegion;
611
612    *outCol = info->SpareTable[TableInSpareRegion][BlockID].spareDisk;
613    RF_ASSERT( *outCol != -1);
614
615    spareTableStartSU = (SpareRegion == info->NumCompleteSRs) ?
616	    info->DiskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk * info->TableDepthInPUs * layoutPtr->SUsPerPU :
617	    (SpareRegion+1) * info->SpareRegionDepthInSUs - info->SpareSpaceDepthPerRegionInSUs;
618    *outSU = spareTableStartSU + info->SpareTable[TableInSpareRegion][BlockID].spareBlockOffsetInSUs;
619    if (*outSU >= layoutPtr->stripeUnitsPerDisk) {
620	printf("rf_remap_to_spare_space: invalid remapped disk SU offset %ld\n",(long)*outSU);
621    }
622}
623
624int rf_InstallSpareTable(
625  RF_Raid_t    *raidPtr,
626  RF_RowCol_t   frow,
627  RF_RowCol_t   fcol)
628{
629  RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
630  RF_SparetWait_t *req;
631  int retcode;
632
633  RF_Malloc(req, sizeof(*req), (RF_SparetWait_t *));
634  req->C                             = raidPtr->numCol;
635  req->G                             = raidPtr->Layout.numDataCol + raidPtr->Layout.numParityCol;
636  req->fcol                          = fcol;
637  req->SUsPerPU                      = raidPtr->Layout.SUsPerPU;
638  req->TablesPerSpareRegion          = info->TablesPerSpareRegion;
639  req->BlocksPerTable                = info->BlocksPerTable;
640  req->TableDepthInPUs               = info->TableDepthInPUs;
641  req->SpareSpaceDepthPerRegionInSUs = info->SpareSpaceDepthPerRegionInSUs;
642
643  retcode = rf_GetSpareTableFromDaemon(req);
644  RF_ASSERT(!retcode);                                     /* XXX -- fix this to recover gracefully -- XXX */
645  return(retcode);
646}
647
648/*
649 * Invoked via ioctl to install a spare table in the kernel.
650 */
651int rf_SetSpareTable(raidPtr, data)
652  RF_Raid_t  *raidPtr;
653  void       *data;
654{
655  RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
656  RF_SpareTableEntry_t **ptrs;
657  int i, retcode;
658
659  /* what we need to copyin is a 2-d array, so first copyin the user pointers to the rows in the table */
660  RF_Malloc(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **));
661  retcode = copyin((caddr_t) data, (caddr_t) ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
662
663  if (retcode) return(retcode);
664
665  /* now allocate kernel space for the row pointers */
666  RF_Malloc(info->SpareTable, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **));
667
668  /* now allocate kernel space for each row in the table, and copy it in from user space */
669  for (i=0; i<info->TablesPerSpareRegion; i++) {
670    RF_Malloc(info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t), (RF_SpareTableEntry_t *));
671    retcode = copyin(ptrs[i], info->SpareTable[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t));
672    if (retcode) {
673      info->SpareTable = NULL;             /* blow off the memory we've allocated */
674      return(retcode);
675    }
676  }
677
678  /* free up the temporary array we used */
679  RF_Free(ptrs, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
680
681  return(0);
682}
683
684RF_ReconUnitCount_t rf_GetNumSpareRUsDeclustered(raidPtr)
685  RF_Raid_t *raidPtr;
686{
687  RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
688
689  return( ((RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo)->TotSparePUsPerDisk );
690}
691
692
693void rf_FreeSpareTable(raidPtr)
694  RF_Raid_t  *raidPtr;
695{
696  long i;
697  RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
698  RF_DeclusteredConfigInfo_t *info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
699  RF_SpareTableEntry_t **table = info->SpareTable;
700
701  for (i=0; i<info->TablesPerSpareRegion; i++) {RF_Free(table[i], info->BlocksPerTable * sizeof(RF_SpareTableEntry_t));}
702  RF_Free(table, info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
703  info->SpareTable = (RF_SpareTableEntry_t **) NULL;
704}
705