rf_reconbuffer.c revision 1.9
1/* $NetBSD: rf_reconbuffer.c,v 1.9 2002/09/21 01:21:19 oster Exp $ */ 2/* 3 * Copyright (c) 1995 Carnegie-Mellon University. 4 * All rights reserved. 5 * 6 * Author: Mark Holland 7 * 8 * Permission to use, copy, modify and distribute this software and 9 * its documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 29/*************************************************** 30 * 31 * rf_reconbuffer.c -- reconstruction buffer manager 32 * 33 ***************************************************/ 34 35#include <sys/cdefs.h> 36__KERNEL_RCSID(0, "$NetBSD: rf_reconbuffer.c,v 1.9 2002/09/21 01:21:19 oster Exp $"); 37 38#include "rf_raid.h" 39#include "rf_reconbuffer.h" 40#include "rf_acctrace.h" 41#include "rf_etimer.h" 42#include "rf_general.h" 43#include "rf_revent.h" 44#include "rf_reconutil.h" 45#include "rf_nwayxor.h" 46 47#ifdef DEBUG 48 49#define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a) 50#define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b) 51#define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c) 52#define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d) 53#define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e) 54 55#else /* DEBUG */ 56 57#define Dprintf1(s,a) {} 58#define Dprintf2(s,a,b) {} 59#define Dprintf3(s,a,b,c) {} 60#define Dprintf4(s,a,b,c,d) {} 61#define Dprintf5(s,a,b,c,d,e) {} 62 63#endif 64 65/***************************************************************************** 66 * 67 * Submit a reconstruction buffer to the manager for XOR. We can only 68 * submit a buffer if (1) we can xor into an existing buffer, which 69 * means we don't have to acquire a new one, (2) we can acquire a 70 * floating recon buffer, or (3) the caller has indicated that we are 71 * allowed to keep the submitted buffer. 72 * 73 * Returns non-zero if and only if we were not able to submit. 74 * In this case, we append the current disk ID to the wait list on the 75 * indicated RU, so that it will be re-enabled when we acquire a buffer 76 * for this RU. 77 * 78 ****************************************************************************/ 79 80/* 81 * nWayXorFuncs[i] is a pointer to a function that will xor "i" 82 * bufs into the accumulating sum. 83 */ 84static RF_VoidFuncPtr nWayXorFuncs[] = { 85 NULL, 86 (RF_VoidFuncPtr) rf_nWayXor1, 87 (RF_VoidFuncPtr) rf_nWayXor2, 88 (RF_VoidFuncPtr) rf_nWayXor3, 89 (RF_VoidFuncPtr) rf_nWayXor4, 90 (RF_VoidFuncPtr) rf_nWayXor5, 91 (RF_VoidFuncPtr) rf_nWayXor6, 92 (RF_VoidFuncPtr) rf_nWayXor7, 93 (RF_VoidFuncPtr) rf_nWayXor8, 94 (RF_VoidFuncPtr) rf_nWayXor9 95}; 96 97int 98rf_SubmitReconBuffer(rbuf, keep_it, use_committed) 99 RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */ 100 int keep_it; /* whether we can keep this buffer or we have 101 * to return it */ 102 int use_committed; /* whether to use a committed or an available 103 * recon buffer */ 104{ 105 RF_LayoutSW_t *lp; 106 int rc; 107 108 lp = rbuf->raidPtr->Layout.map; 109 rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed); 110 return (rc); 111} 112 113int 114rf_SubmitReconBufferBasic(rbuf, keep_it, use_committed) 115 RF_ReconBuffer_t *rbuf; /* the recon buffer to submit */ 116 int keep_it; /* whether we can keep this buffer or we have 117 * to return it */ 118 int use_committed; /* whether to use a committed or an available 119 * recon buffer */ 120{ 121 RF_Raid_t *raidPtr = rbuf->raidPtr; 122 RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; 123 RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[rbuf->row]; 124 RF_ReconParityStripeStatus_t *pssPtr; 125 RF_ReconBuffer_t *targetRbuf, *t = NULL; /* temporary rbuf 126 * pointers */ 127 caddr_t ta; /* temporary data buffer pointer */ 128 RF_CallbackDesc_t *cb, *p; 129 int retcode = 0, created = 0; 130 131 RF_Etimer_t timer; 132 133 /* makes no sense to have a submission from the failed disk */ 134 RF_ASSERT(rbuf); 135 RF_ASSERT(rbuf->col != reconCtrlPtr->fcol); 136 137 Dprintf5("RECON: submission by row %d col %d for psid %ld ru %d (failed offset %ld)\n", 138 rbuf->row, rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru, (long) rbuf->failedDiskSectorOffset); 139 140 RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); 141 142 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); 143 144 pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created); 145 RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten 146 * an rbuf for it */ 147 148 /* check to see if enough buffers have accumulated to do an XOR. If 149 * so, there's no need to acquire a floating rbuf. Before we can do 150 * any XORing, we must have acquired a destination buffer. If we 151 * have, then we can go ahead and do the XOR if (1) including this 152 * buffer, enough bufs have accumulated, or (2) this is the last 153 * submission for this stripe. Otherwise, we have to go acquire a 154 * floating rbuf. */ 155 156 targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; 157 if ((targetRbuf != NULL) && 158 ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol))) { 159 pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf; /* install this buffer */ 160 Dprintf3("RECON: row %d col %d invoking a %d-way XOR\n", rbuf->row, rbuf->col, pssPtr->xorBufCount); 161 RF_ETIMER_START(timer); 162 rf_MultiWayReconXor(raidPtr, pssPtr); 163 RF_ETIMER_STOP(timer); 164 RF_ETIMER_EVAL(timer); 165 raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer); 166 if (!keep_it) { 167 raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer); 168 RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 169 RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 170 raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us += 171 RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 172 RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 173 174 rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]); 175 } 176 rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); 177 178 /* if use_committed is on, we _must_ consume a buffer off the 179 * committed list. */ 180 if (use_committed) { 181 t = reconCtrlPtr->committedRbufs; 182 RF_ASSERT(t); 183 reconCtrlPtr->committedRbufs = t->next; 184 rf_ReleaseFloatingReconBuffer(raidPtr, rbuf->row, t); 185 } 186 if (keep_it) { 187 RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); 188 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); 189 rf_FreeReconBuffer(rbuf); 190 return (retcode); 191 } 192 goto out; 193 } 194 /* set the value of "t", which we'll use as the rbuf from here on */ 195 if (keep_it) { 196 t = rbuf; 197 } else { 198 if (use_committed) { /* if a buffer has been committed to 199 * us, use it */ 200 t = reconCtrlPtr->committedRbufs; 201 RF_ASSERT(t); 202 reconCtrlPtr->committedRbufs = t->next; 203 t->next = NULL; 204 } else 205 if (reconCtrlPtr->floatingRbufs) { 206 t = reconCtrlPtr->floatingRbufs; 207 reconCtrlPtr->floatingRbufs = t->next; 208 t->next = NULL; 209 } 210 } 211 212 /* If we weren't able to acquire a buffer, append to the end of the 213 * buf list in the recon ctrl struct. */ 214 if (!t) { 215 RF_ASSERT(!keep_it && !use_committed); 216 Dprintf2("RECON: row %d col %d failed to acquire floating rbuf\n", rbuf->row, rbuf->col); 217 218 raidPtr->procsInBufWait++; 219 if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) && (raidPtr->numFullReconBuffers == 0)) { 220 printf("Buffer wait deadlock detected. Exiting.\n"); 221 rf_PrintPSStatusTable(raidPtr, rbuf->row); 222 RF_PANIC(); 223 } 224 pssPtr->flags |= RF_PSS_BUFFERWAIT; 225 cb = rf_AllocCallbackDesc(); /* append to buf wait list in 226 * recon ctrl structure */ 227 cb->row = rbuf->row; 228 cb->col = rbuf->col; 229 cb->callbackArg.v = rbuf->parityStripeID; 230 cb->callbackArg2.v = rbuf->which_ru; 231 cb->next = NULL; 232 if (!reconCtrlPtr->bufferWaitList) 233 reconCtrlPtr->bufferWaitList = cb; 234 else { /* might want to maintain head/tail pointers 235 * here rather than search for end of list */ 236 for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next); 237 p->next = cb; 238 } 239 retcode = 1; 240 goto out; 241 } 242 Dprintf2("RECON: row %d col %d acquired rbuf\n", rbuf->row, rbuf->col); 243 RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 244 RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 245 raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us += 246 RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 247 RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer); 248 249 rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]); 250 251 /* initialize the buffer */ 252 if (t != rbuf) { 253 t->row = rbuf->row; 254 t->col = reconCtrlPtr->fcol; 255 t->parityStripeID = rbuf->parityStripeID; 256 t->which_ru = rbuf->which_ru; 257 t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset; 258 t->spRow = rbuf->spRow; 259 t->spCol = rbuf->spCol; 260 t->spOffset = rbuf->spOffset; 261 262 ta = t->buffer; 263 t->buffer = rbuf->buffer; 264 rbuf->buffer = ta; /* swap buffers */ 265 } 266 /* the first installation always gets installed as the destination 267 * buffer. subsequent installations get stacked up to allow for 268 * multi-way XOR */ 269 if (!pssPtr->rbuf) { 270 pssPtr->rbuf = t; 271 t->count = 1; 272 } else 273 pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t; /* install this buffer */ 274 275 rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); /* the buffer is full if 276 * G=2 */ 277 278out: 279 RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID); 280 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); 281 return (retcode); 282} 283 284int 285rf_MultiWayReconXor(raidPtr, pssPtr) 286 RF_Raid_t *raidPtr; 287 RF_ReconParityStripeStatus_t *pssPtr; /* the pss descriptor for this 288 * parity stripe */ 289{ 290 int i, numBufs = pssPtr->xorBufCount; 291 int numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU); 292 RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor; 293 RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; 294 295 RF_ASSERT(pssPtr->rbuf != NULL); 296 RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS); 297#ifdef _KERNEL 298#ifndef __NetBSD__ 299 thread_block(); /* yield the processor before doing a big XOR */ 300#endif 301#endif /* _KERNEL */ 302 /* 303 * XXX 304 * 305 * What if more than 9 bufs? 306 */ 307 nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf, numBytes / sizeof(long)); 308 309 /* release all the reconstruction buffers except the last one, which 310 * belongs to the disk whose submission caused this XOR to take place */ 311 for (i = 0; i < numBufs - 1; i++) { 312 if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING) 313 rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]->row, rbufs[i]); 314 else 315 if (rbufs[i]->type == RF_RBUF_TYPE_FORCED) 316 rf_FreeReconBuffer(rbufs[i]); 317 else 318 RF_ASSERT(0); 319 } 320 targetRbuf->count += pssPtr->xorBufCount; 321 pssPtr->xorBufCount = 0; 322 return (0); 323} 324/* removes one full buffer from one of the full-buffer lists and returns it. 325 * 326 * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY. 327 */ 328RF_ReconBuffer_t * 329rf_GetFullReconBuffer(reconCtrlPtr) 330 RF_ReconCtrl_t *reconCtrlPtr; 331{ 332 RF_ReconBuffer_t *p; 333 334 RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex); 335 336 if ((p = reconCtrlPtr->priorityList) != NULL) { 337 reconCtrlPtr->priorityList = p->next; 338 p->next = NULL; 339 goto out; 340 } 341 if ((p = reconCtrlPtr->fullBufferList) != NULL) { 342 reconCtrlPtr->fullBufferList = p->next; 343 p->next = NULL; 344 goto out; 345 } 346out: 347 RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex); 348 return (p); 349} 350 351 352/* if the reconstruction buffer is full, move it to the full list, 353 * which is maintained sorted by failed disk sector offset 354 * 355 * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY. */ 356int 357rf_CheckForFullRbuf(raidPtr, reconCtrl, pssPtr, numDataCol) 358 RF_Raid_t *raidPtr; 359 RF_ReconCtrl_t *reconCtrl; 360 RF_ReconParityStripeStatus_t *pssPtr; 361 int numDataCol; 362{ 363 RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; 364 365 if (rbuf->count == numDataCol) { 366 raidPtr->numFullReconBuffers++; 367 Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n", 368 (long) rbuf->parityStripeID, rbuf->which_ru); 369 if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) { 370 Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n", 371 (long) rbuf->parityStripeID, rbuf->which_ru); 372 rbuf->next = reconCtrl->fullBufferList; 373 reconCtrl->fullBufferList = rbuf; 374 } else { 375 for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt = p, p = p->next); 376 rbuf->next = p; 377 pt->next = rbuf; 378 Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n", 379 (long) rbuf->parityStripeID, rbuf->which_ru); 380 } 381#if 0 382 pssPtr->writeRbuf = pssPtr->rbuf; /* DEBUG ONLY: we like 383 * to be able to find 384 * this rbuf while it's 385 * awaiting write */ 386#else 387 rbuf->pssPtr = pssPtr; 388#endif 389 pssPtr->rbuf = NULL; 390 rf_CauseReconEvent(raidPtr, rbuf->row, rbuf->col, NULL, RF_REVENT_BUFREADY); 391 } 392 return (0); 393} 394 395 396/* release a floating recon buffer for someone else to use. 397 * assumes the rb_mutex is LOCKED at entry 398 */ 399void 400rf_ReleaseFloatingReconBuffer(raidPtr, row, rbuf) 401 RF_Raid_t *raidPtr; 402 RF_RowCol_t row; 403 RF_ReconBuffer_t *rbuf; 404{ 405 RF_ReconCtrl_t *rcPtr = raidPtr->reconControl[row]; 406 RF_CallbackDesc_t *cb; 407 408 Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n", 409 (long) rbuf->parityStripeID, rbuf->which_ru); 410 411 /* if anyone is waiting on buffers, wake one of them up. They will 412 * subsequently wake up anyone else waiting on their RU */ 413 if (rcPtr->bufferWaitList) { 414 rbuf->next = rcPtr->committedRbufs; 415 rcPtr->committedRbufs = rbuf; 416 cb = rcPtr->bufferWaitList; 417 rcPtr->bufferWaitList = cb->next; 418 rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 1, RF_REVENT_BUFCLEAR); /* arg==1 => we've 419 * committed a buffer */ 420 rf_FreeCallbackDesc(cb); 421 raidPtr->procsInBufWait--; 422 } else { 423 rbuf->next = rcPtr->floatingRbufs; 424 rcPtr->floatingRbufs = rbuf; 425 } 426} 427