rf_reconbuffer.c revision 1.12
1/*	$NetBSD: rf_reconbuffer.c,v 1.12 2002/11/23 02:38:59 oster Exp $	*/
2/*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21 *  School of Computer Science
22 *  Carnegie Mellon University
23 *  Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29/***************************************************
30 *
31 * rf_reconbuffer.c -- reconstruction buffer manager
32 *
33 ***************************************************/
34
35#include <sys/cdefs.h>
36__KERNEL_RCSID(0, "$NetBSD: rf_reconbuffer.c,v 1.12 2002/11/23 02:38:59 oster Exp $");
37
38#include "rf_raid.h"
39#include "rf_reconbuffer.h"
40#include "rf_acctrace.h"
41#include "rf_etimer.h"
42#include "rf_general.h"
43#include "rf_revent.h"
44#include "rf_reconutil.h"
45#include "rf_nwayxor.h"
46
47#ifdef DEBUG
48
49#define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a)
50#define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b)
51#define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c)
52#define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d)
53#define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e)
54
55#else /* DEBUG */
56
57#define Dprintf1(s,a) {}
58#define Dprintf2(s,a,b) {}
59#define Dprintf3(s,a,b,c) {}
60#define Dprintf4(s,a,b,c,d) {}
61#define Dprintf5(s,a,b,c,d,e) {}
62
63#endif
64
65/*****************************************************************************
66 *
67 * Submit a reconstruction buffer to the manager for XOR.  We can only
68 * submit a buffer if (1) we can xor into an existing buffer, which
69 * means we don't have to acquire a new one, (2) we can acquire a
70 * floating recon buffer, or (3) the caller has indicated that we are
71 * allowed to keep the submitted buffer.
72 *
73 * Returns non-zero if and only if we were not able to submit.
74 * In this case, we append the current disk ID to the wait list on the
75 * indicated RU, so that it will be re-enabled when we acquire a buffer
76 * for this RU.
77 *
78 ****************************************************************************/
79
80/*
81 * nWayXorFuncs[i] is a pointer to a function that will xor "i"
82 * bufs into the accumulating sum.
83 */
84static RF_VoidFuncPtr nWayXorFuncs[] = {
85	NULL,
86	(RF_VoidFuncPtr) rf_nWayXor1,
87	(RF_VoidFuncPtr) rf_nWayXor2,
88	(RF_VoidFuncPtr) rf_nWayXor3,
89	(RF_VoidFuncPtr) rf_nWayXor4,
90	(RF_VoidFuncPtr) rf_nWayXor5,
91	(RF_VoidFuncPtr) rf_nWayXor6,
92	(RF_VoidFuncPtr) rf_nWayXor7,
93	(RF_VoidFuncPtr) rf_nWayXor8,
94	(RF_VoidFuncPtr) rf_nWayXor9
95};
96
97int
98rf_SubmitReconBuffer(rbuf, keep_it, use_committed)
99	RF_ReconBuffer_t *rbuf;	/* the recon buffer to submit */
100	int     keep_it;	/* whether we can keep this buffer or we have
101				 * to return it */
102	int     use_committed;	/* whether to use a committed or an available
103				 * recon buffer */
104{
105	RF_LayoutSW_t *lp;
106	int     rc;
107
108	lp = rbuf->raidPtr->Layout.map;
109	rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed);
110	return (rc);
111}
112
113int
114rf_SubmitReconBufferBasic(rbuf, keep_it, use_committed)
115	RF_ReconBuffer_t *rbuf;	/* the recon buffer to submit */
116	int     keep_it;	/* whether we can keep this buffer or we have
117				 * to return it */
118	int     use_committed;	/* whether to use a committed or an available
119				 * recon buffer */
120{
121	RF_Raid_t *raidPtr = rbuf->raidPtr;
122	RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
123	RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[rbuf->row];
124	RF_ReconParityStripeStatus_t *pssPtr;
125	RF_ReconBuffer_t *targetRbuf, *t = NULL;	/* temporary rbuf
126							 * pointers */
127	caddr_t ta;		/* temporary data buffer pointer */
128	RF_CallbackDesc_t *cb, *p;
129	int     retcode = 0, created = 0;
130
131	RF_Etimer_t timer;
132
133	/* makes no sense to have a submission from the failed disk */
134	RF_ASSERT(rbuf);
135	RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);
136
137	Dprintf5("RECON: submission by row %d col %d for psid %ld ru %d (failed offset %ld)\n",
138	    rbuf->row, rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru, (long) rbuf->failedDiskSectorOffset);
139
140	RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
141
142	RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
143
144	pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created);
145	RF_ASSERT(pssPtr);	/* if it didn't exist, we wouldn't have gotten
146				 * an rbuf for it */
147
148	/* check to see if enough buffers have accumulated to do an XOR.  If
149	 * so, there's no need to acquire a floating rbuf.  Before we can do
150	 * any XORing, we must have acquired a destination buffer.  If we
151	 * have, then we can go ahead and do the XOR if (1) including this
152	 * buffer, enough bufs have accumulated, or (2) this is the last
153	 * submission for this stripe. Otherwise, we have to go acquire a
154	 * floating rbuf. */
155
156	targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
157	if ((targetRbuf != NULL) &&
158	    ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol))) {
159		pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf;	/* install this buffer */
160		Dprintf3("RECON: row %d col %d invoking a %d-way XOR\n", rbuf->row, rbuf->col, pssPtr->xorBufCount);
161		RF_ETIMER_START(timer);
162		rf_MultiWayReconXor(raidPtr, pssPtr);
163		RF_ETIMER_STOP(timer);
164		RF_ETIMER_EVAL(timer);
165		raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer);
166		if (!keep_it) {
167			raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer);
168			RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
169			RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
170			raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
171			    RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
172			RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
173
174			rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
175		}
176		rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol);
177
178		/* if use_committed is on, we _must_ consume a buffer off the
179		 * committed list. */
180		if (use_committed) {
181			t = reconCtrlPtr->committedRbufs;
182			RF_ASSERT(t);
183			reconCtrlPtr->committedRbufs = t->next;
184			rf_ReleaseFloatingReconBuffer(raidPtr, rbuf->row, t);
185		}
186		if (keep_it) {
187			RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
188			RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
189			rf_FreeReconBuffer(rbuf);
190			return (retcode);
191		}
192		goto out;
193	}
194	/* set the value of "t", which we'll use as the rbuf from here on */
195	if (keep_it) {
196		t = rbuf;
197	} else {
198		if (use_committed) {	/* if a buffer has been committed to
199					 * us, use it */
200			t = reconCtrlPtr->committedRbufs;
201			RF_ASSERT(t);
202			reconCtrlPtr->committedRbufs = t->next;
203			t->next = NULL;
204		} else
205			if (reconCtrlPtr->floatingRbufs) {
206				t = reconCtrlPtr->floatingRbufs;
207				reconCtrlPtr->floatingRbufs = t->next;
208				t->next = NULL;
209			}
210	}
211
212	/* If we weren't able to acquire a buffer, append to the end of the
213	 * buf list in the recon ctrl struct. */
214	if (!t) {
215		RF_ASSERT(!keep_it && !use_committed);
216		Dprintf2("RECON: row %d col %d failed to acquire floating rbuf\n", rbuf->row, rbuf->col);
217
218		raidPtr->procsInBufWait++;
219		if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) && (raidPtr->numFullReconBuffers == 0)) {
220			printf("Buffer wait deadlock detected.  Exiting.\n");
221			rf_PrintPSStatusTable(raidPtr, rbuf->row);
222			RF_PANIC();
223		}
224		pssPtr->flags |= RF_PSS_BUFFERWAIT;
225		cb = rf_AllocCallbackDesc();	/* append to buf wait list in
226						 * recon ctrl structure */
227		cb->row = rbuf->row;
228		cb->col = rbuf->col;
229		cb->callbackArg.v = rbuf->parityStripeID;
230		cb->callbackArg2.v = rbuf->which_ru;
231		cb->next = NULL;
232		if (!reconCtrlPtr->bufferWaitList)
233			reconCtrlPtr->bufferWaitList = cb;
234		else {		/* might want to maintain head/tail pointers
235				 * here rather than search for end of list */
236			for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next);
237			p->next = cb;
238		}
239		retcode = 1;
240		goto out;
241	}
242	Dprintf2("RECON: row %d col %d acquired rbuf\n", rbuf->row, rbuf->col);
243	RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
244	RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
245	raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
246	    RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
247	RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
248
249	rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
250
251	/* initialize the buffer */
252	if (t != rbuf) {
253		t->row = rbuf->row;
254		t->col = reconCtrlPtr->fcol;
255		t->parityStripeID = rbuf->parityStripeID;
256		t->which_ru = rbuf->which_ru;
257		t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
258		t->spRow = rbuf->spRow;
259		t->spCol = rbuf->spCol;
260		t->spOffset = rbuf->spOffset;
261
262		ta = t->buffer;
263		t->buffer = rbuf->buffer;
264		rbuf->buffer = ta;	/* swap buffers */
265	}
266	/* the first installation always gets installed as the destination
267	 * buffer. subsequent installations get stacked up to allow for
268	 * multi-way XOR */
269	if (!pssPtr->rbuf) {
270		pssPtr->rbuf = t;
271		t->count = 1;
272	} else
273		pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t;	/* install this buffer */
274
275	rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol);	/* the buffer is full if
276											 * G=2 */
277
278out:
279	RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
280	RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
281	return (retcode);
282}
283
284int
285rf_MultiWayReconXor(raidPtr, pssPtr)
286	RF_Raid_t *raidPtr;
287	RF_ReconParityStripeStatus_t *pssPtr;	/* the pss descriptor for this
288						 * parity stripe */
289{
290	int     i, numBufs = pssPtr->xorBufCount;
291	int     numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU);
292	RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor;
293	RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
294
295	RF_ASSERT(pssPtr->rbuf != NULL);
296	RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS);
297#ifdef _KERNEL
298#ifndef __NetBSD__
299	thread_block();		/* yield the processor before doing a big XOR */
300#endif
301#endif				/* _KERNEL */
302	/*
303         * XXX
304         *
305         * What if more than 9 bufs?
306         */
307	nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf, numBytes / sizeof(long));
308
309	/* release all the reconstruction buffers except the last one, which
310	 * belongs to the disk whose submission caused this XOR to take place */
311	for (i = 0; i < numBufs - 1; i++) {
312		if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING)
313			rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]->row, rbufs[i]);
314		else
315			if (rbufs[i]->type == RF_RBUF_TYPE_FORCED)
316				rf_FreeReconBuffer(rbufs[i]);
317			else
318				RF_ASSERT(0);
319	}
320	targetRbuf->count += pssPtr->xorBufCount;
321	pssPtr->xorBufCount = 0;
322	return (0);
323}
324/* removes one full buffer from one of the full-buffer lists and returns it.
325 *
326 * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY.
327 */
328RF_ReconBuffer_t *
329rf_GetFullReconBuffer(reconCtrlPtr)
330	RF_ReconCtrl_t *reconCtrlPtr;
331{
332	RF_ReconBuffer_t *p;
333
334	RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
335
336	if ((p = reconCtrlPtr->fullBufferList) != NULL) {
337		reconCtrlPtr->fullBufferList = p->next;
338		p->next = NULL;
339	}
340	RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
341	return (p);
342}
343
344
345/* if the reconstruction buffer is full, move it to the full list,
346 * which is maintained sorted by failed disk sector offset
347 *
348 * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY.  */
349int
350rf_CheckForFullRbuf(raidPtr, reconCtrl, pssPtr, numDataCol)
351	RF_Raid_t *raidPtr;
352	RF_ReconCtrl_t *reconCtrl;
353	RF_ReconParityStripeStatus_t *pssPtr;
354	int     numDataCol;
355{
356	RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
357
358	if (rbuf->count == numDataCol) {
359		raidPtr->numFullReconBuffers++;
360		Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n",
361		    (long) rbuf->parityStripeID, rbuf->which_ru);
362		if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) {
363			Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n",
364			    (long) rbuf->parityStripeID, rbuf->which_ru);
365			rbuf->next = reconCtrl->fullBufferList;
366			reconCtrl->fullBufferList = rbuf;
367		} else {
368			for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt = p, p = p->next);
369			rbuf->next = p;
370			pt->next = rbuf;
371			Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n",
372			    (long) rbuf->parityStripeID, rbuf->which_ru);
373		}
374		rbuf->pssPtr = pssPtr;
375		pssPtr->rbuf = NULL;
376		rf_CauseReconEvent(raidPtr, rbuf->row, rbuf->col, NULL, RF_REVENT_BUFREADY);
377	}
378	return (0);
379}
380
381
382/* release a floating recon buffer for someone else to use.
383 * assumes the rb_mutex is LOCKED at entry
384 */
385void
386rf_ReleaseFloatingReconBuffer(raidPtr, row, rbuf)
387	RF_Raid_t *raidPtr;
388	RF_RowCol_t row;
389	RF_ReconBuffer_t *rbuf;
390{
391	RF_ReconCtrl_t *rcPtr = raidPtr->reconControl[row];
392	RF_CallbackDesc_t *cb;
393
394	Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n",
395	    (long) rbuf->parityStripeID, rbuf->which_ru);
396
397	/* if anyone is waiting on buffers, wake one of them up.  They will
398	 * subsequently wake up anyone else waiting on their RU */
399	if (rcPtr->bufferWaitList) {
400		rbuf->next = rcPtr->committedRbufs;
401		rcPtr->committedRbufs = rbuf;
402		cb = rcPtr->bufferWaitList;
403		rcPtr->bufferWaitList = cb->next;
404		rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 1, RF_REVENT_BUFCLEAR);	/* arg==1 => we've
405												 * committed a buffer */
406		rf_FreeCallbackDesc(cb);
407		raidPtr->procsInBufWait--;
408	} else {
409		rbuf->next = rcPtr->floatingRbufs;
410		rcPtr->floatingRbufs = rbuf;
411	}
412}
413