softraid_raid0.c revision 1.13
1/* $OpenBSD: softraid_raid0.c,v 1.13 2009/05/11 14:06:21 jsing Exp $ */
2/*
3 * Copyright (c) 2008 Marco Peereboom <marco@peereboom.us>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include "bio.h"
19
20#include <sys/param.h>
21#include <sys/systm.h>
22#include <sys/buf.h>
23#include <sys/device.h>
24#include <sys/ioctl.h>
25#include <sys/proc.h>
26#include <sys/malloc.h>
27#include <sys/kernel.h>
28#include <sys/disk.h>
29#include <sys/rwlock.h>
30#include <sys/queue.h>
31#include <sys/fcntl.h>
32#include <sys/disklabel.h>
33#include <sys/mount.h>
34#include <sys/sensors.h>
35#include <sys/stat.h>
36#include <sys/conf.h>
37#include <sys/uio.h>
38
39#include <scsi/scsi_all.h>
40#include <scsi/scsiconf.h>
41#include <scsi/scsi_disk.h>
42
43#include <dev/softraidvar.h>
44#include <dev/rndvar.h>
45
46/* RAID 0 functions. */
47int	sr_raid0_alloc_resources(struct sr_discipline *);
48int	sr_raid0_free_resources(struct sr_discipline *);
49int	sr_raid0_rw(struct sr_workunit *);
50void	sr_raid0_intr(struct buf *);
51void	sr_raid0_set_chunk_state(struct sr_discipline *, int, int);
52void	sr_raid0_set_vol_state(struct sr_discipline *);
53
54/* Discipline initialisation. */
55void
56sr_raid0_discipline_init(struct sr_discipline *sd)
57{
58
59	/* Fill out discipline members. */
60	sd->sd_type = SR_MD_RAID0;
61	sd->sd_max_ccb_per_wu =
62	    (MAXPHYS / sd->sd_meta->ssdi.ssd_strip_size + 1) *
63	    SR_RAID0_NOWU * sd->sd_meta->ssdi.ssd_chunk_no;
64	sd->sd_max_wu = SR_RAID0_NOWU;
65
66	/* Setup discipline pointers. */
67	sd->sd_alloc_resources = sr_raid0_alloc_resources;
68	sd->sd_free_resources = sr_raid0_free_resources;
69	sd->sd_start_discipline = NULL;
70	sd->sd_scsi_inquiry = sr_raid_inquiry;
71	sd->sd_scsi_read_cap = sr_raid_read_cap;
72	sd->sd_scsi_tur = sr_raid_tur;
73	sd->sd_scsi_req_sense = sr_raid_request_sense;
74	sd->sd_scsi_start_stop = sr_raid_start_stop;
75	sd->sd_scsi_sync = sr_raid_sync;
76	sd->sd_scsi_rw = sr_raid0_rw;
77	sd->sd_set_chunk_state = sr_raid0_set_chunk_state;
78	sd->sd_set_vol_state = sr_raid0_set_vol_state;
79}
80
81int
82sr_raid0_alloc_resources(struct sr_discipline *sd)
83{
84	int			rv = EINVAL;
85
86	if (!sd)
87		return (rv);
88
89	DNPRINTF(SR_D_DIS, "%s: sr_raid0_alloc_resources\n",
90	    DEVNAME(sd->sd_sc));
91
92	if (sr_wu_alloc(sd))
93		goto bad;
94	if (sr_ccb_alloc(sd))
95		goto bad;
96
97	/* setup runtime values */
98	sd->mds.mdd_raid0.sr0_strip_bits =
99	    sr_validate_stripsize(sd->sd_meta->ssdi.ssd_strip_size);
100	if (sd->mds.mdd_raid0.sr0_strip_bits == -1)
101		goto bad;
102
103	rv = 0;
104bad:
105	return (rv);
106}
107
108int
109sr_raid0_free_resources(struct sr_discipline *sd)
110{
111	int			rv = EINVAL;
112
113	if (!sd)
114		return (rv);
115
116	DNPRINTF(SR_D_DIS, "%s: sr_raid0_free_resources\n",
117	    DEVNAME(sd->sd_sc));
118
119	sr_wu_free(sd);
120	sr_ccb_free(sd);
121
122	rv = 0;
123	return (rv);
124}
125
126void
127sr_raid0_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
128{
129	int			old_state, s;
130
131	DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
132	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
133	    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
134
135	/* ok to go to splbio since this only happens in error path */
136	s = splbio();
137	old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
138
139	/* multiple IOs to the same chunk that fail will come through here */
140	if (old_state == new_state)
141		goto done;
142
143	switch (old_state) {
144	case BIOC_SDONLINE:
145		if (new_state == BIOC_SDOFFLINE)
146			break;
147		else
148			goto die;
149		break;
150
151	case BIOC_SDOFFLINE:
152		goto die;
153
154	default:
155die:
156		splx(s); /* XXX */
157		panic("%s: %s: %s: invalid chunk state transition "
158		    "%d -> %d\n", DEVNAME(sd->sd_sc),
159		    sd->sd_meta->ssd_devname,
160		    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname,
161		    old_state, new_state);
162		/* NOTREACHED */
163	}
164
165	sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state;
166	sd->sd_set_vol_state(sd);
167
168	sd->sd_must_flush = 1;
169	workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL);
170done:
171	splx(s);
172}
173
174void
175sr_raid0_set_vol_state(struct sr_discipline *sd)
176{
177	int			states[SR_MAX_STATES];
178	int			new_state, i, s, nd;
179	int			old_state = sd->sd_vol_status;
180
181	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
182	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
183
184	nd = sd->sd_meta->ssdi.ssd_chunk_no;
185
186	for (i = 0; i < SR_MAX_STATES; i++)
187		states[i] = 0;
188
189	for (i = 0; i < nd; i++) {
190		s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
191		if (s > SR_MAX_STATES)
192			panic("%s: %s: %s: invalid chunk state",
193			    DEVNAME(sd->sd_sc),
194			    sd->sd_meta->ssd_devname,
195			    sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
196		states[s]++;
197	}
198
199	if (states[BIOC_SDONLINE] == nd)
200		new_state = BIOC_SVONLINE;
201	else
202		new_state = BIOC_SVOFFLINE;
203
204	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n",
205	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
206	    old_state, new_state);
207
208	switch (old_state) {
209	case BIOC_SVONLINE:
210		if (new_state == BIOC_SVOFFLINE || new_state == BIOC_SVONLINE)
211			break;
212		else
213			goto die;
214		break;
215
216	case BIOC_SVOFFLINE:
217		/* XXX this might be a little too much */
218		goto die;
219
220	default:
221die:
222		panic("%s: %s: invalid volume state transition "
223		    "%d -> %d\n", DEVNAME(sd->sd_sc),
224		    sd->sd_meta->ssd_devname,
225		    old_state, new_state);
226		/* NOTREACHED */
227	}
228
229	sd->sd_vol_status = new_state;
230}
231
232int
233sr_raid0_rw(struct sr_workunit *wu)
234{
235	struct sr_discipline	*sd = wu->swu_dis;
236	struct scsi_xfer	*xs = wu->swu_xs;
237	struct sr_ccb		*ccb;
238	struct sr_chunk		*scp;
239	int			s;
240	daddr64_t		blk, lbaoffs, strip_no, chunk, stripoffs;
241	daddr64_t		strip_size, no_chunk, chunkoffs, physoffs;
242	daddr64_t		strip_bits, length, leftover;
243	u_int8_t		*data;
244
245	/* blk and scsi error will be handled by sr_validate_io */
246	if (sr_validate_io(wu, &blk, "sr_raid0_rw"))
247		goto bad;
248
249	strip_size = sd->sd_meta->ssdi.ssd_strip_size;
250	strip_bits = sd->mds.mdd_raid0.sr0_strip_bits;
251	no_chunk = sd->sd_meta->ssdi.ssd_chunk_no;
252
253	DNPRINTF(SR_D_DIS, "%s: %s: front end io: lba %lld size %d\n",
254	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
255	    blk, xs->datalen);
256
257	/* all offs are in bytes */
258	lbaoffs = blk << DEV_BSHIFT;
259	strip_no = lbaoffs >> strip_bits;
260	chunk = strip_no % no_chunk;
261	stripoffs = lbaoffs & (strip_size - 1);
262	chunkoffs = (strip_no / no_chunk) << strip_bits;
263	physoffs = chunkoffs + stripoffs +
264	    ((SR_META_OFFSET + SR_META_SIZE) << DEV_BSHIFT);
265	length = MIN(xs->datalen, strip_size - stripoffs);
266	leftover = xs->datalen;
267	data = xs->data;
268	for (wu->swu_io_count = 1;; wu->swu_io_count++) {
269		/* make sure chunk is online */
270		scp = sd->sd_vol.sv_chunks[chunk];
271		if (scp->src_meta.scm_status != BIOC_SDONLINE) {
272			goto bad;
273		}
274
275		ccb = sr_ccb_get(sd);
276		if (!ccb) {
277			/* should never happen but handle more gracefully */
278			printf("%s: %s: too many ccbs queued\n",
279			    DEVNAME(sd->sd_sc),
280			    sd->sd_meta->ssd_devname);
281			goto bad;
282		}
283
284		DNPRINTF(SR_D_DIS, "%s: %s raid io: lbaoffs: %lld "
285		    "strip_no: %lld chunk: %lld stripoffs: %lld "
286		    "chunkoffs: %lld physoffs: %lld length: %lld "
287		    "leftover: %lld data: %p\n",
288		    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, lbaoffs,
289		    strip_no, chunk, stripoffs, chunkoffs, physoffs, length,
290		    leftover, data);
291
292		ccb->ccb_buf.b_flags = B_CALL;
293		ccb->ccb_buf.b_iodone = sr_raid0_intr;
294		ccb->ccb_buf.b_blkno = physoffs >> DEV_BSHIFT;
295		ccb->ccb_buf.b_bcount = length;
296		ccb->ccb_buf.b_bufsize = length;
297		ccb->ccb_buf.b_resid = length;
298		ccb->ccb_buf.b_data = data;
299		ccb->ccb_buf.b_error = 0;
300		ccb->ccb_buf.b_proc = curproc;
301		ccb->ccb_wu = wu;
302		ccb->ccb_buf.b_flags |= xs->flags & SCSI_DATA_IN ?
303		    B_READ : B_WRITE;
304		ccb->ccb_target = chunk;
305		ccb->ccb_buf.b_dev = sd->sd_vol.sv_chunks[chunk]->src_dev_mm;
306		ccb->ccb_buf.b_vp = NULL;
307		LIST_INIT(&ccb->ccb_buf.b_dep);
308		TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link);
309
310		DNPRINTF(SR_D_DIS, "%s: %s: sr_raid0: b_bcount: %d "
311		    "b_blkno: %lld b_flags 0x%0x b_data %p\n",
312		    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
313		    ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_blkno,
314		    ccb->ccb_buf.b_flags, ccb->ccb_buf.b_data);
315
316		leftover -= length;
317		if (leftover == 0)
318			break;
319
320		data += length;
321		if (++chunk > no_chunk - 1) {
322			chunk = 0;
323			physoffs += length;
324		} else if (wu->swu_io_count == 1)
325			physoffs -= stripoffs;
326		length = MIN(leftover,strip_size);
327	}
328
329	s = splbio();
330
331	if (sr_check_io_collision(wu))
332		goto queued;
333
334	sr_raid_startwu(wu);
335queued:
336	splx(s);
337	return (0);
338bad:
339	/* wu is unwound by sr_wu_put */
340	return (1);
341}
342
343void
344sr_raid0_intr(struct buf *bp)
345{
346	struct sr_ccb		*ccb = (struct sr_ccb *)bp;
347	struct sr_workunit	*wu = ccb->ccb_wu, *wup;
348	struct sr_discipline	*sd = wu->swu_dis;
349	struct scsi_xfer	*xs = wu->swu_xs;
350	struct sr_softc		*sc = sd->sd_sc;
351	int			s, pend;
352
353	DNPRINTF(SR_D_INTR, "%s: sr_intr bp %x xs %x\n",
354	    DEVNAME(sc), bp, xs);
355
356	DNPRINTF(SR_D_INTR, "%s: sr_intr: b_bcount: %d b_resid: %d"
357	    " b_flags: 0x%0x block: %lld target: %d\n", DEVNAME(sc),
358	    ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_resid, ccb->ccb_buf.b_flags,
359	    ccb->ccb_buf.b_blkno, ccb->ccb_target);
360
361	s = splbio();
362
363	if (ccb->ccb_buf.b_flags & B_ERROR) {
364		printf("%s: i/o error on block %lld target: %d b_error: %d\n",
365		    DEVNAME(sc), ccb->ccb_buf.b_blkno, ccb->ccb_target,
366		    ccb->ccb_buf.b_error);
367		DNPRINTF(SR_D_INTR, "%s: i/o error on block %lld target: %d\n",
368		    DEVNAME(sc), ccb->ccb_buf.b_blkno, ccb->ccb_target);
369		wu->swu_ios_failed++;
370		ccb->ccb_state = SR_CCB_FAILED;
371		if (ccb->ccb_target != -1)
372			sd->sd_set_chunk_state(sd, ccb->ccb_target,
373			    BIOC_SDOFFLINE);
374		else
375			panic("%s: invalid target on wu: %p", DEVNAME(sc), wu);
376	} else {
377		ccb->ccb_state = SR_CCB_OK;
378		wu->swu_ios_succeeded++;
379	}
380	wu->swu_ios_complete++;
381
382	DNPRINTF(SR_D_INTR, "%s: sr_intr: comp: %d count: %d failed: %d\n",
383	    DEVNAME(sc), wu->swu_ios_complete, wu->swu_io_count,
384	    wu->swu_ios_failed);
385
386	if (wu->swu_ios_complete >= wu->swu_io_count) {
387		if (wu->swu_ios_failed)
388			goto bad;
389
390		xs->error = XS_NOERROR;
391		xs->resid = 0;
392		xs->flags |= ITSDONE;
393
394		pend = 0;
395		TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) {
396			if (wu == wup) {
397				/* wu on pendq, remove */
398				TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link);
399				pend = 1;
400
401				if (wu->swu_collider) {
402					/* restart deferred wu */
403					wu->swu_collider->swu_state =
404					    SR_WU_INPROGRESS;
405					TAILQ_REMOVE(&sd->sd_wu_defq,
406					    wu->swu_collider, swu_link);
407					sr_raid_startwu(wu->swu_collider);
408				}
409				break;
410			}
411		}
412
413		if (!pend)
414			printf("%s: wu: %p not on pending queue\n",
415			    DEVNAME(sc), wu);
416
417		/* do not change the order of these 2 functions */
418		sr_wu_put(wu);
419		sr_scsi_done(sd, xs);
420
421		if (sd->sd_sync && sd->sd_wu_pending == 0)
422			wakeup(sd);
423	}
424
425	splx(s);
426	return;
427bad:
428	xs->error = XS_DRIVER_STUFFUP;
429	xs->flags |= ITSDONE;
430	sr_wu_put(wu);
431	sr_scsi_done(sd, xs);
432	splx(s);
433}
434