rf_reconbuffer.c revision 1.25
1/*	$NetBSD: rf_reconbuffer.c,v 1.25 2011/05/02 07:29:18 mrg Exp $	*/
2/*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Mark Holland
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21 *  School of Computer Science
22 *  Carnegie Mellon University
23 *  Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29/***************************************************
30 *
31 * rf_reconbuffer.c -- reconstruction buffer manager
32 *
33 ***************************************************/
34
35#include <sys/cdefs.h>
36__KERNEL_RCSID(0, "$NetBSD: rf_reconbuffer.c,v 1.25 2011/05/02 07:29:18 mrg Exp $");
37
38#include "rf_raid.h"
39#include "rf_reconbuffer.h"
40#include "rf_acctrace.h"
41#include "rf_etimer.h"
42#include "rf_general.h"
43#include "rf_revent.h"
44#include "rf_reconutil.h"
45#include "rf_nwayxor.h"
46
47#ifdef DEBUG
48
49#define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a)
50#define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b)
51#define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c)
52#define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d)
53#define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e)
54
55#else /* DEBUG */
56
57#define Dprintf1(s,a) {}
58#define Dprintf2(s,a,b) {}
59#define Dprintf3(s,a,b,c) {}
60#define Dprintf4(s,a,b,c,d) {}
61#define Dprintf5(s,a,b,c,d,e) {}
62
63#endif
64
65/*****************************************************************************
66 *
67 * Submit a reconstruction buffer to the manager for XOR.  We can only
68 * submit a buffer if (1) we can xor into an existing buffer, which
69 * means we don't have to acquire a new one, (2) we can acquire a
70 * floating recon buffer, or (3) the caller has indicated that we are
71 * allowed to keep the submitted buffer.
72 *
73 * Returns non-zero if and only if we were not able to submit.
74 * In this case, we append the current disk ID to the wait list on the
75 * indicated RU, so that it will be re-enabled when we acquire a buffer
76 * for this RU.
77 *
78 ****************************************************************************/
79
80/*
81 * nWayXorFuncs[i] is a pointer to a function that will xor "i"
82 * bufs into the accumulating sum.
83 */
84static const RF_VoidFuncPtr nWayXorFuncs[] = {
85	NULL,
86	(RF_VoidFuncPtr) rf_nWayXor1,
87	(RF_VoidFuncPtr) rf_nWayXor2,
88	(RF_VoidFuncPtr) rf_nWayXor3,
89	(RF_VoidFuncPtr) rf_nWayXor4,
90	(RF_VoidFuncPtr) rf_nWayXor5,
91	(RF_VoidFuncPtr) rf_nWayXor6,
92	(RF_VoidFuncPtr) rf_nWayXor7,
93	(RF_VoidFuncPtr) rf_nWayXor8,
94	(RF_VoidFuncPtr) rf_nWayXor9
95};
96
97/*
98 * rbuf          - the recon buffer to submit
99 * keep_it       - whether we can keep this buffer or we have to return it
100 * use_committed - whether to use a committed or an available recon buffer
101 */
102int
103rf_SubmitReconBuffer(RF_ReconBuffer_t *rbuf, int keep_it, int use_committed)
104{
105	const RF_LayoutSW_t *lp;
106	int     rc;
107
108	lp = rbuf->raidPtr->Layout.map;
109	rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed);
110	return (rc);
111}
112
113/*
114 * rbuf          - the recon buffer to submit
115 * keep_it       - whether we can keep this buffer or we have to return it
116 * use_committed - whether to use a committed or an available recon buffer
117 */
118int
119rf_SubmitReconBufferBasic(RF_ReconBuffer_t *rbuf, int keep_it,
120			  int use_committed)
121{
122	RF_Raid_t *raidPtr = rbuf->raidPtr;
123	RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
124	RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl;
125	RF_ReconParityStripeStatus_t *pssPtr;
126	RF_ReconBuffer_t *targetRbuf, *t = NULL;	/* temporary rbuf
127							 * pointers */
128	void *ta;		/* temporary data buffer pointer */
129	RF_CallbackDesc_t *cb, *p;
130	int     retcode = 0;
131
132	RF_Etimer_t timer;
133
134	/* makes no sense to have a submission from the failed disk */
135	RF_ASSERT(rbuf);
136	RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);
137
138	Dprintf4("RECON: submission by col %d for psid %ld ru %d (failed offset %ld)\n",
139	    rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru, (long) rbuf->failedDiskSectorOffset);
140
141	RF_LOCK_PSS_MUTEX(raidPtr, rbuf->parityStripeID);
142
143	rf_lock_mutex2(reconCtrlPtr->rb_mutex);
144	while(reconCtrlPtr->rb_lock) {
145		rf_wait_cond2(reconCtrlPtr->rb_cv, reconCtrlPtr->rb_mutex);
146	}
147	reconCtrlPtr->rb_lock = 1;
148	rf_unlock_mutex2(reconCtrlPtr->rb_mutex);
149
150	pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, NULL);
151	RF_ASSERT(pssPtr);	/* if it didn't exist, we wouldn't have gotten
152				 * an rbuf for it */
153
154	/* check to see if enough buffers have accumulated to do an XOR.  If
155	 * so, there's no need to acquire a floating rbuf.  Before we can do
156	 * any XORing, we must have acquired a destination buffer.  If we
157	 * have, then we can go ahead and do the XOR if (1) including this
158	 * buffer, enough bufs have accumulated, or (2) this is the last
159	 * submission for this stripe. Otherwise, we have to go acquire a
160	 * floating rbuf. */
161
162	targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
163	if ((targetRbuf != NULL) &&
164	    ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol))) {
165		pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf;	/* install this buffer */
166		Dprintf2("RECON: col %d invoking a %d-way XOR\n", rbuf->col, pssPtr->xorBufCount);
167		RF_ETIMER_START(timer);
168		rf_MultiWayReconXor(raidPtr, pssPtr);
169		RF_ETIMER_STOP(timer);
170		RF_ETIMER_EVAL(timer);
171		raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer);
172		if (!keep_it) {
173#if RF_ACC_TRACE > 0
174			raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer);
175			RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
176			RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
177			raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
178			    RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
179			RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
180
181			rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
182#endif
183		}
184		rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol);
185
186		/* if use_committed is on, we _must_ consume a buffer off the
187		 * committed list. */
188		if (use_committed) {
189			t = reconCtrlPtr->committedRbufs;
190			RF_ASSERT(t);
191			reconCtrlPtr->committedRbufs = t->next;
192			rf_ReleaseFloatingReconBuffer(raidPtr, t);
193		}
194		if (keep_it) {
195			RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->parityStripeID);
196			rf_lock_mutex2(reconCtrlPtr->rb_mutex);
197			reconCtrlPtr->rb_lock = 0;
198			rf_broadcast_cond2(reconCtrlPtr->rb_cv);
199			rf_unlock_mutex2(reconCtrlPtr->rb_mutex);
200			rf_FreeReconBuffer(rbuf);
201			return (retcode);
202		}
203		goto out;
204	}
205	/* set the value of "t", which we'll use as the rbuf from here on */
206	if (keep_it) {
207		t = rbuf;
208	} else {
209		if (use_committed) {	/* if a buffer has been committed to
210					 * us, use it */
211			t = reconCtrlPtr->committedRbufs;
212			RF_ASSERT(t);
213			reconCtrlPtr->committedRbufs = t->next;
214			t->next = NULL;
215		} else
216			if (reconCtrlPtr->floatingRbufs) {
217				t = reconCtrlPtr->floatingRbufs;
218				reconCtrlPtr->floatingRbufs = t->next;
219				t->next = NULL;
220			}
221	}
222
223	/* If we weren't able to acquire a buffer, append to the end of the
224	 * buf list in the recon ctrl struct. */
225	if (!t) {
226		RF_ASSERT(!keep_it && !use_committed);
227		Dprintf1("RECON: col %d failed to acquire floating rbuf\n", rbuf->col);
228
229		raidPtr->procsInBufWait++;
230		if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) && (raidPtr->numFullReconBuffers == 0)) {
231			printf("Buffer wait deadlock detected.  Exiting.\n");
232			rf_PrintPSStatusTable(raidPtr);
233			RF_PANIC();
234		}
235		pssPtr->flags |= RF_PSS_BUFFERWAIT;
236		cb = rf_AllocCallbackDesc();	/* append to buf wait list in
237						 * recon ctrl structure */
238		cb->col = rbuf->col;
239		cb->callbackArg.v = rbuf->parityStripeID;
240		cb->next = NULL;
241		if (!reconCtrlPtr->bufferWaitList)
242			reconCtrlPtr->bufferWaitList = cb;
243		else {		/* might want to maintain head/tail pointers
244				 * here rather than search for end of list */
245			for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next);
246			p->next = cb;
247		}
248		retcode = 1;
249		goto out;
250	}
251	Dprintf1("RECON: col %d acquired rbuf\n", rbuf->col);
252#if RF_ACC_TRACE > 0
253	RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
254	RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
255	raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us +=
256	    RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
257	RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
258
259	rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
260#endif
261
262	/* initialize the buffer */
263	if (t != rbuf) {
264		t->col = reconCtrlPtr->fcol;
265		t->parityStripeID = rbuf->parityStripeID;
266		t->which_ru = rbuf->which_ru;
267		t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
268		t->spCol = rbuf->spCol;
269		t->spOffset = rbuf->spOffset;
270
271		ta = t->buffer;
272		t->buffer = rbuf->buffer;
273		rbuf->buffer = ta;	/* swap buffers */
274	}
275	/* the first installation always gets installed as the destination
276	 * buffer. subsequent installations get stacked up to allow for
277	 * multi-way XOR */
278	if (!pssPtr->rbuf) {
279		pssPtr->rbuf = t;
280		t->count = 1;
281	} else
282		pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t;	/* install this buffer */
283
284	rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol);	/* the buffer is full if
285											 * G=2 */
286
287out:
288	RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->parityStripeID);
289	rf_lock_mutex2(reconCtrlPtr->rb_mutex);
290	reconCtrlPtr->rb_lock = 0;
291	rf_broadcast_cond2(reconCtrlPtr->rb_cv);
292	rf_unlock_mutex2(reconCtrlPtr->rb_mutex);
293	return (retcode);
294}
295/* pssPtr - the pss descriptor for this parity stripe */
296int
297rf_MultiWayReconXor(RF_Raid_t *raidPtr, RF_ReconParityStripeStatus_t *pssPtr)
298{
299	int     i, numBufs = pssPtr->xorBufCount;
300	int     numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU);
301	RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor;
302	RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
303
304	RF_ASSERT(pssPtr->rbuf != NULL);
305	RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS);
306#ifdef _KERNEL
307#ifndef __NetBSD__
308	thread_block();		/* yield the processor before doing a big XOR */
309#endif
310#endif				/* _KERNEL */
311	/*
312         * XXX
313         *
314         * What if more than 9 bufs?
315         */
316	nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf, numBytes / sizeof(long));
317
318	/* release all the reconstruction buffers except the last one, which
319	 * belongs to the disk whose submission caused this XOR to take place */
320	for (i = 0; i < numBufs - 1; i++) {
321		if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING)
322			rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]);
323		else
324			if (rbufs[i]->type == RF_RBUF_TYPE_FORCED)
325				rf_FreeReconBuffer(rbufs[i]);
326			else
327				RF_ASSERT(0);
328	}
329	targetRbuf->count += pssPtr->xorBufCount;
330	pssPtr->xorBufCount = 0;
331	return (0);
332}
333/* removes one full buffer from one of the full-buffer lists and returns it.
334 *
335 * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY.
336 */
337RF_ReconBuffer_t *
338rf_GetFullReconBuffer(RF_ReconCtrl_t *reconCtrlPtr)
339{
340	RF_ReconBuffer_t *p;
341
342	rf_lock_mutex2(reconCtrlPtr->rb_mutex);
343	while(reconCtrlPtr->rb_lock) {
344		rf_wait_cond2(reconCtrlPtr->rb_cv, reconCtrlPtr->rb_mutex);
345	}
346	reconCtrlPtr->rb_lock = 1;
347	rf_unlock_mutex2(reconCtrlPtr->rb_mutex);
348
349	if ((p = reconCtrlPtr->fullBufferList) != NULL) {
350		reconCtrlPtr->fullBufferList = p->next;
351		p->next = NULL;
352	}
353	rf_lock_mutex2(reconCtrlPtr->rb_mutex);
354	reconCtrlPtr->rb_lock = 0;
355	rf_broadcast_cond2(reconCtrlPtr->rb_cv);
356	rf_unlock_mutex2(reconCtrlPtr->rb_mutex);
357	return (p);
358}
359
360
361/* if the reconstruction buffer is full, move it to the full list,
362 * which is maintained sorted by failed disk sector offset
363 *
364 * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY.  */
365int
366rf_CheckForFullRbuf(RF_Raid_t *raidPtr, RF_ReconCtrl_t *reconCtrl,
367		    RF_ReconParityStripeStatus_t *pssPtr, int numDataCol)
368{
369	RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
370
371	if (rbuf->count == numDataCol) {
372		raidPtr->numFullReconBuffers++;
373		Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n",
374		    (long) rbuf->parityStripeID, rbuf->which_ru);
375		if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) {
376			Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n",
377			    (long) rbuf->parityStripeID, rbuf->which_ru);
378			rbuf->next = reconCtrl->fullBufferList;
379			reconCtrl->fullBufferList = rbuf;
380		} else {
381			for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt = p, p = p->next);
382			rbuf->next = p;
383			pt->next = rbuf;
384			Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n",
385			    (long) rbuf->parityStripeID, rbuf->which_ru);
386		}
387		rbuf->pssPtr = pssPtr;
388		pssPtr->rbuf = NULL;
389		rf_CauseReconEvent(raidPtr, rbuf->col, NULL, RF_REVENT_BUFREADY);
390	}
391	return (0);
392}
393
394
395/* release a floating recon buffer for someone else to use.
396 * assumes the rb_mutex is LOCKED at entry
397 */
398void
399rf_ReleaseFloatingReconBuffer(RF_Raid_t *raidPtr, RF_ReconBuffer_t *rbuf)
400{
401	RF_ReconCtrl_t *rcPtr = raidPtr->reconControl;
402	RF_CallbackDesc_t *cb;
403
404	Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n",
405	    (long) rbuf->parityStripeID, rbuf->which_ru);
406
407	/* if anyone is waiting on buffers, wake one of them up.  They will
408	 * subsequently wake up anyone else waiting on their RU */
409	if (rcPtr->bufferWaitList) {
410		rbuf->next = rcPtr->committedRbufs;
411		rcPtr->committedRbufs = rbuf;
412		cb = rcPtr->bufferWaitList;
413		rcPtr->bufferWaitList = cb->next;
414		rf_CauseReconEvent(raidPtr, cb->col, (void *) 1, RF_REVENT_BUFCLEAR);	/* arg==1 => we've
415												 * committed a buffer */
416		rf_FreeCallbackDesc(cb);
417		raidPtr->procsInBufWait--;
418	} else {
419		rbuf->next = rcPtr->floatingRbufs;
420		rcPtr->floatingRbufs = rbuf;
421	}
422}
423