softraid_raid0.c revision 1.5
1/* $OpenBSD: softraid_raid0.c,v 1.5 2008/01/24 19:58:08 marco Exp $ */
2/*
3 * Copyright (c) 2008 Marco Peereboom <marco@peereboom.us>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include "bio.h"
19
20#include <sys/param.h>
21#include <sys/systm.h>
22#include <sys/buf.h>
23#include <sys/device.h>
24#include <sys/ioctl.h>
25#include <sys/proc.h>
26#include <sys/malloc.h>
27#include <sys/kernel.h>
28#include <sys/disk.h>
29#include <sys/rwlock.h>
30#include <sys/queue.h>
31#include <sys/fcntl.h>
32#include <sys/disklabel.h>
33#include <sys/mount.h>
34#include <sys/sensors.h>
35#include <sys/stat.h>
36#include <sys/conf.h>
37#include <sys/uio.h>
38
39#include <scsi/scsi_all.h>
40#include <scsi/scsiconf.h>
41#include <scsi/scsi_disk.h>
42
43#include <dev/softraidvar.h>
44#include <dev/rndvar.h>
45
46/* RAID 0 functions */
47int
48sr_raid0_alloc_resources(struct sr_discipline *sd)
49{
50	int			rv = EINVAL;
51
52	if (!sd)
53		return (rv);
54
55	DNPRINTF(SR_D_DIS, "%s: sr_raid0_alloc_resources\n",
56	    DEVNAME(sd->sd_sc));
57
58	if (sr_alloc_wu(sd))
59		goto bad;
60	if (sr_alloc_ccb(sd))
61		goto bad;
62
63	/* setup runtime values */
64	sd->mds.mdd_raid0.sr0_strip_bits =
65	    sr_validate_stripsize(sd->sd_vol.sv_meta.svm_strip_size);
66	if (sd->mds.mdd_raid0.sr0_strip_bits == -1)
67		goto bad;
68
69	rv = 0;
70bad:
71	return (rv);
72}
73
74int
75sr_raid0_free_resources(struct sr_discipline *sd)
76{
77	int			rv = EINVAL;
78
79	if (!sd)
80		return (rv);
81
82	DNPRINTF(SR_D_DIS, "%s: sr_raid0_free_resources\n",
83	    DEVNAME(sd->sd_sc));
84
85	sr_free_wu(sd);
86	sr_free_ccb(sd);
87
88	if (sd->sd_meta)
89		free(sd->sd_meta, M_DEVBUF);
90
91	rv = 0;
92	return (rv);
93}
94
95void
96sr_raid0_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
97{
98	int			old_state, s;
99
100	DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
101	    DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname,
102	    sd->sd_vol.sv_chunks[c]->src_meta.scm_devname, c, new_state);
103
104	/* ok to go to splbio since this only happens in error path */
105	s = splbio();
106	old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
107
108	/* multiple IOs to the same chunk that fail will come through here */
109	if (old_state == new_state)
110		goto done;
111
112	switch (old_state) {
113	case BIOC_SDONLINE:
114		if (new_state == BIOC_SDOFFLINE)
115			break;
116		else
117			goto die;
118		break;
119
120	case BIOC_SDOFFLINE:
121		goto die;
122
123	default:
124die:
125		splx(s); /* XXX */
126		panic("%s: %s: %s: invalid chunk state transition "
127		    "%d -> %d\n", DEVNAME(sd->sd_sc),
128		    sd->sd_vol.sv_meta.svm_devname,
129		    sd->sd_vol.sv_chunks[c]->src_meta.scm_devname,
130		    old_state, new_state);
131		/* NOTREACHED */
132	}
133
134	sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state;
135	sd->sd_set_vol_state(sd);
136
137	sd->sd_must_flush = 1;
138	workq_add_task(NULL, 0, sr_save_metadata_callback, sd, NULL);
139done:
140	splx(s);
141}
142
143void
144sr_raid0_set_vol_state(struct sr_discipline *sd)
145{
146	int			states[SR_MAX_STATES];
147	int			new_state, i, s, nd;
148	int			old_state = sd->sd_vol.sv_meta.svm_status;
149
150	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
151	    DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname);
152
153	nd = sd->sd_vol.sv_meta.svm_no_chunk;
154
155	for (i = 0; i < SR_MAX_STATES; i++)
156		states[i] = 0;
157
158	for (i = 0; i < nd; i++) {
159		s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
160		if (s > SR_MAX_STATES)
161			panic("%s: %s: %s: invalid chunk state",
162			    DEVNAME(sd->sd_sc),
163			    sd->sd_vol.sv_meta.svm_devname,
164			    sd->sd_vol.sv_chunks[i]->src_meta.scm_devname);
165		states[s]++;
166	}
167
168	if (states[BIOC_SDONLINE] == nd)
169		new_state = BIOC_SVONLINE;
170	else
171		new_state = BIOC_SVOFFLINE;
172
173	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n",
174	    DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname,
175	    old_state, new_state);
176
177	switch (old_state) {
178	case BIOC_SVONLINE:
179		if (new_state == BIOC_SVOFFLINE)
180			break;
181		else
182			goto die;
183		break;
184
185	case BIOC_SVOFFLINE:
186		/* XXX this might be a little too much */
187		goto die;
188
189	default:
190die:
191		panic("%s: %s: invalid volume state transition "
192		    "%d -> %d\n", DEVNAME(sd->sd_sc),
193		    sd->sd_vol.sv_meta.svm_devname,
194		    old_state, new_state);
195		/* NOTREACHED */
196	}
197
198	sd->sd_vol.sv_meta.svm_status = new_state;
199}
200
201int
202sr_raid0_rw(struct sr_workunit *wu)
203{
204	struct sr_discipline	*sd = wu->swu_dis;
205	struct scsi_xfer	*xs = wu->swu_xs;
206	struct sr_workunit	*wup;
207	struct sr_ccb		*ccb;
208	struct sr_chunk		*scp;
209	int			s;
210	daddr64_t		blk, lbaoffs, strip_no, chunk, stripoffs;
211	daddr64_t		strip_size, no_chunk, chunkoffs, physoffs;
212	daddr64_t		strip_bits, length, leftover;
213	u_int8_t		*data;
214
215	DNPRINTF(SR_D_DIS, "%s: sr_raid0_rw 0x%02x\n", DEVNAME(sd->sd_sc),
216	    xs->cmd->opcode);
217
218	if (sd->sd_vol.sv_meta.svm_status == BIOC_SVOFFLINE) {
219		DNPRINTF(SR_D_DIS, "%s: sr_raid0_rw device offline\n",
220		    DEVNAME(sd->sd_sc));
221		goto bad;
222	}
223
224	if (xs->datalen == 0) {
225		printf("%s: %s: illegal block count\n",
226		    DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname);
227		goto bad;
228	}
229
230	if (xs->cmdlen == 10)
231		blk = _4btol(((struct scsi_rw_big *)xs->cmd)->addr);
232	else if (xs->cmdlen == 16)
233		blk = _8btol(((struct scsi_rw_16 *)xs->cmd)->addr);
234	else if (xs->cmdlen == 6)
235		blk = _3btol(((struct scsi_rw *)xs->cmd)->addr);
236	else {
237		printf("%s: %s: illegal cmdlen\n", DEVNAME(sd->sd_sc),
238		    sd->sd_vol.sv_meta.svm_devname);
239		goto bad;
240	}
241
242	wu->swu_blk_start = blk;
243	wu->swu_blk_end = blk + (xs->datalen >> DEV_BSHIFT) - 1;
244
245	if (wu->swu_blk_end > sd->sd_vol.sv_meta.svm_size) {
246		DNPRINTF(SR_D_DIS, "%s: sr_raid0_rw out of bounds start: %lld "
247		    "end: %lld length: %d\n", wu->swu_blk_start,
248		    wu->swu_blk_end, xs->datalen);
249
250		sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT |
251		    SSD_ERRCODE_VALID;
252		sd->sd_scsi_sense.flags = SKEY_ILLEGAL_REQUEST;
253		sd->sd_scsi_sense.add_sense_code = 0x21;
254		sd->sd_scsi_sense.add_sense_code_qual = 0x00;
255		sd->sd_scsi_sense.extra_len = 4;
256		goto bad;
257	}
258
259	strip_size = sd->sd_vol.sv_meta.svm_strip_size;
260	strip_bits = sd->mds.mdd_raid0.sr0_strip_bits;
261	no_chunk = sd->sd_vol.sv_meta.svm_no_chunk;
262
263	DNPRINTF(SR_D_DIS, "%s: %s: front end io: lba %lld size %d\n",
264	    DEVNAME(sc), sd->sd_vol.sv_meta.svm_devname, blk, xs->datalen);
265
266	/* all offs are in bytes */
267	lbaoffs = blk << DEV_BSHIFT;
268	strip_no = lbaoffs >> strip_bits;
269	chunk = strip_no % no_chunk;
270	stripoffs = lbaoffs & (strip_size - 1);
271	chunkoffs = (strip_no / no_chunk) << strip_bits;
272	physoffs = chunkoffs + stripoffs +
273	    ((SR_META_OFFSET + SR_META_SIZE) << DEV_BSHIFT);
274	length = MIN(xs->datalen, strip_size - stripoffs);
275	leftover = xs->datalen;
276	data = xs->data;
277	for (wu->swu_io_count = 1;; wu->swu_io_count++) {
278		/* make sure chunk is online */
279		scp = sd->sd_vol.sv_chunks[chunk];
280		if (scp->src_meta.scm_status != BIOC_SDONLINE) {
281			sr_put_ccb(ccb);
282			goto bad;
283		}
284
285		ccb = sr_get_ccb(sd);
286		if (!ccb) {
287			/* should never happen but handle more gracefully */
288			printf("%s: %s: too many ccbs queued\n",
289			    DEVNAME(sd->sd_sc),
290			    sd->sd_vol.sv_meta.svm_devname);
291			goto bad;
292		}
293
294		DNPRINTF(SR_D_DIS, "%s: %s raid io: lbaoffs: %lld "
295		    "strip_no: %lld chunk: %lld stripoffs: %lld "
296		    "chunkoffs: %lld physoffs: %lld length: %lld "
297		    "leftover: %lld data: %p\n",
298		    DEVNAME(sc), sd->sd_vol.sv_meta.svm_devname, lbaoffs,
299		    strip_no, chunk, stripoffs, chunkoffs, physoffs, length,
300		    leftover, data);
301
302		ccb->ccb_buf.b_flags = B_CALL;
303		ccb->ccb_buf.b_iodone = sr_raid0_intr;
304		ccb->ccb_buf.b_blkno = physoffs >> DEV_BSHIFT;
305		ccb->ccb_buf.b_bcount = length;
306		ccb->ccb_buf.b_bufsize = length;
307		ccb->ccb_buf.b_resid = length;
308		ccb->ccb_buf.b_data = data;
309		ccb->ccb_buf.b_error = 0;
310		ccb->ccb_buf.b_proc = curproc;
311		ccb->ccb_wu = wu;
312		ccb->ccb_buf.b_flags |= xs->flags & SCSI_DATA_IN ?
313		    B_READ : B_WRITE;
314		ccb->ccb_target = chunk;
315		ccb->ccb_buf.b_dev = sd->sd_vol.sv_chunks[chunk]->src_dev_mm;
316		ccb->ccb_buf.b_vp = NULL;
317		LIST_INIT(&ccb->ccb_buf.b_dep);
318		TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link);
319
320		DNPRINTF(SR_D_DIS, "%s: %s: sr_raid0: b_bcount: %d "
321		    "b_blkno: %lld b_flags 0x%0x b_data %p\n",
322		    DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname,
323		    ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_blkno,
324		    ccb->ccb_buf.b_flags, ccb->ccb_buf.b_data);
325
326		leftover -= length;
327		if (leftover == 0)
328			break;
329
330		data += length;
331		if (++chunk > no_chunk - 1) {
332			chunk = 0;
333			physoffs += length;
334		} else if (wu->swu_io_count == 1)
335			physoffs -= stripoffs;
336		length = MIN(leftover,strip_size);
337	}
338
339	s = splbio();
340
341	/* walk queue backwards and fill in collider if we have one */
342	TAILQ_FOREACH_REVERSE(wup, &sd->sd_wu_pendq, sr_wu_list, swu_link) {
343		if (wu->swu_blk_end < wup->swu_blk_start ||
344		    wup->swu_blk_end < wu->swu_blk_start)
345			continue;
346
347		/* we have an LBA collision, defer wu */
348		wu->swu_state = SR_WU_DEFERRED;
349		if (wup->swu_collider)
350			/* wu is on deferred queue, append to last wu */
351			while (wup->swu_collider)
352				wup = wup->swu_collider;
353
354		wup->swu_collider = wu;
355		TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link);
356		sd->sd_wu_collisions++;
357		goto queued;
358	}
359	sr_raid_startwu(wu);
360queued:
361	splx(s);
362	return (0);
363bad:
364	/* wu is unwound by sr_put_wu */
365	return (1);
366}
367
368void
369sr_raid0_intr(struct buf *bp)
370{
371	struct sr_ccb		*ccb = (struct sr_ccb *)bp;
372	struct sr_workunit	*wu = ccb->ccb_wu, *wup;
373	struct sr_discipline	*sd = wu->swu_dis;
374	struct scsi_xfer	*xs = wu->swu_xs;
375	struct sr_softc		*sc = sd->sd_sc;
376	int			s, pend;
377
378	DNPRINTF(SR_D_INTR, "%s: sr_intr bp %x xs %x\n",
379	    DEVNAME(sc), bp, xs);
380
381	DNPRINTF(SR_D_INTR, "%s: sr_intr: b_bcount: %d b_resid: %d"
382	    " b_flags: 0x%0x block: %lld target: %d\n", DEVNAME(sc),
383	    ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_resid, ccb->ccb_buf.b_flags,
384	    ccb->ccb_buf.b_blkno, ccb->ccb_target);
385
386	s = splbio();
387
388	if (ccb->ccb_buf.b_flags & B_ERROR) {
389		printf("%s: i/o error on block %lld target: %d b_error: %d\n",
390		    DEVNAME(sc), ccb->ccb_buf.b_blkno, ccb->ccb_target,
391		    ccb->ccb_buf.b_error);
392		DNPRINTF(SR_D_INTR, "%s: i/o error on block %lld target: %d\n",
393		    DEVNAME(sc), ccb->ccb_buf.b_blkno, ccb->ccb_target);
394		wu->swu_ios_failed++;
395		ccb->ccb_state = SR_CCB_FAILED;
396		if (ccb->ccb_target != -1)
397			sd->sd_set_chunk_state(sd, ccb->ccb_target,
398			    BIOC_SDOFFLINE);
399		else
400			panic("%s: invalid target on wu: %p", DEVNAME(sc), wu);
401	} else {
402		ccb->ccb_state = SR_CCB_OK;
403		wu->swu_ios_succeeded++;
404	}
405	wu->swu_ios_complete++;
406
407	DNPRINTF(SR_D_INTR, "%s: sr_intr: comp: %d count: %d failed: %d\n",
408	    DEVNAME(sc), wu->swu_ios_complete, wu->swu_io_count,
409	    wu->swu_ios_failed);
410
411	if (wu->swu_ios_complete >= wu->swu_io_count) {
412		if (wu->swu_ios_failed)
413			goto bad;
414
415		xs->error = XS_NOERROR;
416		xs->resid = 0;
417		xs->flags |= ITSDONE;
418
419		pend = 0;
420		TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) {
421			if (wu == wup) {
422				/* wu on pendq, remove */
423				TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link);
424				pend = 1;
425
426				if (wu->swu_collider) {
427					/* restart deferred wu */
428					wu->swu_collider->swu_state =
429					    SR_WU_INPROGRESS;
430					TAILQ_REMOVE(&sd->sd_wu_defq,
431					    wu->swu_collider, swu_link);
432					sr_raid_startwu(wu->swu_collider);
433				}
434				break;
435			}
436		}
437
438		if (!pend)
439			printf("%s: wu: %p not on pending queue\n",
440			    DEVNAME(sc), wu);
441
442		/* do not change the order of these 2 functions */
443		sr_put_wu(wu);
444		scsi_done(xs);
445
446		if (sd->sd_sync && sd->sd_wu_pending == 0)
447			wakeup(sd);
448	}
449
450	splx(s);
451	return;
452bad:
453	xs->error = XS_DRIVER_STUFFUP;
454	xs->flags |= ITSDONE;
455	sr_put_wu(wu);
456	scsi_done(xs);
457	splx(s);
458}
459