1/* $OpenBSD: softraid_raid1.c,v 1.67 2021/05/16 15:12:37 deraadt Exp $ */
2/*
3 * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include "bio.h"
19
20#include <sys/param.h>
21#include <sys/systm.h>
22#include <sys/buf.h>
23#include <sys/device.h>
24#include <sys/ioctl.h>
25#include <sys/malloc.h>
26#include <sys/kernel.h>
27#include <sys/disk.h>
28#include <sys/rwlock.h>
29#include <sys/queue.h>
30#include <sys/fcntl.h>
31#include <sys/mount.h>
32#include <sys/sensors.h>
33#include <sys/stat.h>
34#include <sys/task.h>
35#include <sys/conf.h>
36#include <sys/uio.h>
37
38#include <scsi/scsi_all.h>
39#include <scsi/scsiconf.h>
40#include <scsi/scsi_disk.h>
41
42#include <dev/softraidvar.h>
43
44/* RAID 1 functions. */
45int	sr_raid1_create(struct sr_discipline *, struct bioc_createraid *,
46	    int, int64_t);
47int	sr_raid1_assemble(struct sr_discipline *, struct bioc_createraid *,
48	    int, void *);
49int	sr_raid1_init(struct sr_discipline *sd);
50int	sr_raid1_rw(struct sr_workunit *);
51int	sr_raid1_wu_done(struct sr_workunit *);
52void	sr_raid1_set_chunk_state(struct sr_discipline *, int, int);
53void	sr_raid1_set_vol_state(struct sr_discipline *);
54
55/* Discipline initialisation. */
56void
57sr_raid1_discipline_init(struct sr_discipline *sd)
58{
59	/* Fill out discipline members. */
60	sd->sd_type = SR_MD_RAID1;
61	strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name));
62	sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE |
63	    SR_CAP_REBUILD | SR_CAP_REDUNDANT;
64	sd->sd_max_wu = SR_RAID1_NOWU;
65
66	/* Setup discipline specific function pointers. */
67	sd->sd_assemble = sr_raid1_assemble;
68	sd->sd_create = sr_raid1_create;
69	sd->sd_scsi_rw = sr_raid1_rw;
70	sd->sd_scsi_wu_done = sr_raid1_wu_done;
71	sd->sd_set_chunk_state = sr_raid1_set_chunk_state;
72	sd->sd_set_vol_state = sr_raid1_set_vol_state;
73}
74
75int
76sr_raid1_create(struct sr_discipline *sd, struct bioc_createraid *bc,
77    int no_chunk, int64_t coerced_size)
78{
79	if (no_chunk < 2) {
80		sr_error(sd->sd_sc, "%s requires two or more chunks",
81		    sd->sd_name);
82		return EINVAL;
83	}
84
85	sd->sd_meta->ssdi.ssd_size = coerced_size;
86
87	return sr_raid1_init(sd);
88}
89
90int
91sr_raid1_assemble(struct sr_discipline *sd, struct bioc_createraid *bc,
92    int no_chunk, void *data)
93{
94	return sr_raid1_init(sd);
95}
96
97int
98sr_raid1_init(struct sr_discipline *sd)
99{
100	sd->sd_max_ccb_per_wu = sd->sd_meta->ssdi.ssd_chunk_no;
101
102	return 0;
103}
104
105void
106sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
107{
108	int			old_state, s;
109
110	DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid1_set_chunk_state %d -> %d\n",
111	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
112	    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
113
114	/* ok to go to splbio since this only happens in error path */
115	s = splbio();
116	old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
117
118	/* multiple IOs to the same chunk that fail will come through here */
119	if (old_state == new_state)
120		goto done;
121
122	switch (old_state) {
123	case BIOC_SDONLINE:
124		switch (new_state) {
125		case BIOC_SDOFFLINE:
126		case BIOC_SDSCRUB:
127			break;
128		default:
129			goto die;
130		}
131		break;
132
133	case BIOC_SDOFFLINE:
134		switch (new_state) {
135		case BIOC_SDREBUILD:
136		case BIOC_SDHOTSPARE:
137			break;
138		default:
139			goto die;
140		}
141		break;
142
143	case BIOC_SDSCRUB:
144		if (new_state == BIOC_SDONLINE) {
145			;
146		} else
147			goto die;
148		break;
149
150	case BIOC_SDREBUILD:
151		switch (new_state) {
152		case BIOC_SDONLINE:
153			break;
154		case BIOC_SDOFFLINE:
155			/* Abort rebuild since the rebuild chunk disappeared. */
156			sd->sd_reb_abort = 1;
157			break;
158		default:
159			goto die;
160		}
161		break;
162
163	case BIOC_SDHOTSPARE:
164		switch (new_state) {
165		case BIOC_SDOFFLINE:
166		case BIOC_SDREBUILD:
167			break;
168		default:
169			goto die;
170		}
171		break;
172
173	default:
174die:
175		splx(s); /* XXX */
176		panic("%s: %s: %s: invalid chunk state transition %d -> %d",
177		    DEVNAME(sd->sd_sc),
178		    sd->sd_meta->ssd_devname,
179		    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname,
180		    old_state, new_state);
181		/* NOTREACHED */
182	}
183
184	sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state;
185	sd->sd_set_vol_state(sd);
186
187	sd->sd_must_flush = 1;
188	task_add(systq, &sd->sd_meta_save_task);
189done:
190	splx(s);
191}
192
193void
194sr_raid1_set_vol_state(struct sr_discipline *sd)
195{
196	int			states[SR_MAX_STATES];
197	int			new_state, i, s, nd;
198	int			old_state = sd->sd_vol_status;
199
200	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid1_set_vol_state\n",
201	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
202
203	nd = sd->sd_meta->ssdi.ssd_chunk_no;
204
205#ifdef SR_DEBUG
206	for (i = 0; i < nd; i++)
207		DNPRINTF(SR_D_STATE, "%s: chunk %d status = %u\n",
208		    DEVNAME(sd->sd_sc), i,
209		    sd->sd_vol.sv_chunks[i]->src_meta.scm_status);
210#endif
211
212	for (i = 0; i < SR_MAX_STATES; i++)
213		states[i] = 0;
214
215	for (i = 0; i < nd; i++) {
216		s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
217		if (s >= SR_MAX_STATES)
218			panic("%s: %s: %s: invalid chunk state",
219			    DEVNAME(sd->sd_sc),
220			    sd->sd_meta->ssd_devname,
221			    sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
222		states[s]++;
223	}
224
225	if (states[BIOC_SDONLINE] == nd)
226		new_state = BIOC_SVONLINE;
227	else if (states[BIOC_SDONLINE] == 0)
228		new_state = BIOC_SVOFFLINE;
229	else if (states[BIOC_SDSCRUB] != 0)
230		new_state = BIOC_SVSCRUB;
231	else if (states[BIOC_SDREBUILD] != 0)
232		new_state = BIOC_SVREBUILD;
233	else if (states[BIOC_SDOFFLINE] != 0)
234		new_state = BIOC_SVDEGRADED;
235	else {
236		DNPRINTF(SR_D_STATE, "%s: invalid volume state, old state "
237		    "was %d\n", DEVNAME(sd->sd_sc), old_state);
238		panic("invalid volume state");
239	}
240
241	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid1_set_vol_state %d -> %d\n",
242	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
243	    old_state, new_state);
244
245	switch (old_state) {
246	case BIOC_SVONLINE:
247		switch (new_state) {
248		case BIOC_SVONLINE: /* can go to same state */
249		case BIOC_SVOFFLINE:
250		case BIOC_SVDEGRADED:
251		case BIOC_SVREBUILD: /* happens on boot */
252			break;
253		default:
254			goto die;
255		}
256		break;
257
258	case BIOC_SVOFFLINE:
259		/* XXX this might be a little too much */
260		goto die;
261
262	case BIOC_SVDEGRADED:
263		switch (new_state) {
264		case BIOC_SVOFFLINE:
265		case BIOC_SVREBUILD:
266		case BIOC_SVDEGRADED: /* can go to the same state */
267			break;
268		default:
269			goto die;
270		}
271		break;
272
273	case BIOC_SVBUILDING:
274		switch (new_state) {
275		case BIOC_SVONLINE:
276		case BIOC_SVOFFLINE:
277		case BIOC_SVBUILDING: /* can go to the same state */
278			break;
279		default:
280			goto die;
281		}
282		break;
283
284	case BIOC_SVSCRUB:
285		switch (new_state) {
286		case BIOC_SVONLINE:
287		case BIOC_SVOFFLINE:
288		case BIOC_SVDEGRADED:
289		case BIOC_SVSCRUB: /* can go to same state */
290			break;
291		default:
292			goto die;
293		}
294		break;
295
296	case BIOC_SVREBUILD:
297		switch (new_state) {
298		case BIOC_SVONLINE:
299		case BIOC_SVOFFLINE:
300		case BIOC_SVDEGRADED:
301		case BIOC_SVREBUILD: /* can go to the same state */
302			break;
303		default:
304			goto die;
305		}
306		break;
307
308	default:
309die:
310		panic("%s: %s: invalid volume state transition %d -> %d",
311		    DEVNAME(sd->sd_sc),
312		    sd->sd_meta->ssd_devname,
313		    old_state, new_state);
314		/* NOTREACHED */
315	}
316
317	sd->sd_vol_status = new_state;
318
319	/* If we have just become degraded, look for a hotspare. */
320	if (new_state == BIOC_SVDEGRADED)
321		task_add(systq, &sd->sd_hotspare_rebuild_task);
322}
323
324int
325sr_raid1_rw(struct sr_workunit *wu)
326{
327	struct sr_discipline	*sd = wu->swu_dis;
328	struct scsi_xfer	*xs = wu->swu_xs;
329	struct sr_ccb		*ccb;
330	struct sr_chunk		*scp;
331	int			ios, chunk, i, rt;
332	daddr_t			blkno;
333
334	/* blkno and scsi error will be handled by sr_validate_io */
335	if (sr_validate_io(wu, &blkno, "sr_raid1_rw"))
336		goto bad;
337
338	if (xs->flags & SCSI_DATA_IN)
339		ios = 1;
340	else
341		ios = sd->sd_meta->ssdi.ssd_chunk_no;
342
343	for (i = 0; i < ios; i++) {
344		if (xs->flags & SCSI_DATA_IN) {
345			rt = 0;
346ragain:
347			/* interleave reads */
348			chunk = sd->mds.mdd_raid1.sr1_counter++ %
349			    sd->sd_meta->ssdi.ssd_chunk_no;
350			scp = sd->sd_vol.sv_chunks[chunk];
351			switch (scp->src_meta.scm_status) {
352			case BIOC_SDONLINE:
353			case BIOC_SDSCRUB:
354				break;
355
356			case BIOC_SDOFFLINE:
357			case BIOC_SDREBUILD:
358			case BIOC_SDHOTSPARE:
359				if (rt++ < sd->sd_meta->ssdi.ssd_chunk_no)
360					goto ragain;
361
362				/* FALLTHROUGH */
363			default:
364				/* volume offline */
365				printf("%s: is offline, cannot read\n",
366				    DEVNAME(sd->sd_sc));
367				goto bad;
368			}
369		} else {
370			/* writes go on all working disks */
371			chunk = i;
372			scp = sd->sd_vol.sv_chunks[chunk];
373			switch (scp->src_meta.scm_status) {
374			case BIOC_SDONLINE:
375			case BIOC_SDSCRUB:
376			case BIOC_SDREBUILD:
377				break;
378
379			case BIOC_SDHOTSPARE: /* should never happen */
380			case BIOC_SDOFFLINE:
381				continue;
382
383			default:
384				goto bad;
385			}
386		}
387
388		ccb = sr_ccb_rw(sd, chunk, blkno, xs->datalen, xs->data,
389		    xs->flags, 0);
390		if (!ccb) {
391			/* should never happen but handle more gracefully */
392			printf("%s: %s: too many ccbs queued\n",
393			    DEVNAME(sd->sd_sc),
394			    sd->sd_meta->ssd_devname);
395			goto bad;
396		}
397		sr_wu_enqueue_ccb(wu, ccb);
398	}
399
400	sr_schedule_wu(wu);
401
402	return (0);
403
404bad:
405	/* wu is unwound by sr_wu_put */
406	return (1);
407}
408
409int
410sr_raid1_wu_done(struct sr_workunit *wu)
411{
412	struct sr_discipline	*sd = wu->swu_dis;
413	struct scsi_xfer	*xs = wu->swu_xs;
414
415	/* If at least one I/O succeeded, we are okay. */
416	if (wu->swu_ios_succeeded > 0) {
417		xs->error = XS_NOERROR;
418		return SR_WU_OK;
419	}
420
421	/* If all I/O failed, retry reads and give up on writes. */
422	if (xs->flags & SCSI_DATA_IN) {
423		printf("%s: retrying read on block %lld\n",
424		    sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start);
425		if (wu->swu_cb_active == 1)
426			panic("%s: sr_raid1_intr_cb",
427			    DEVNAME(sd->sd_sc));
428		sr_wu_release_ccbs(wu);
429		wu->swu_state = SR_WU_RESTART;
430		if (sd->sd_scsi_rw(wu) == 0)
431			return SR_WU_RESTART;
432	} else {
433		printf("%s: permanently failing write on block %lld\n",
434		    sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start);
435	}
436
437	wu->swu_state = SR_WU_FAILED;
438	xs->error = XS_DRIVER_STUFFUP;
439
440	return SR_WU_FAILED;
441}
442