geom_disk.c revision 226737
1/*-
2 * Copyright (c) 2002 Poul-Henning Kamp
3 * Copyright (c) 2002 Networks Associates Technology, Inc.
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7 * and NAI Labs, the Security Research Division of Network Associates, Inc.
8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9 * DARPA CHATS research program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. The names of the authors may not be used to endorse or promote
20 *    products derived from this software without specific prior written
21 *    permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD: head/sys/geom/geom_disk.c 226737 2011-10-25 14:07:17Z pjd $");
38
39#include "opt_geom.h"
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/kernel.h>
44#include <sys/sysctl.h>
45#include <sys/bio.h>
46#include <sys/ctype.h>
47#include <sys/fcntl.h>
48#include <sys/malloc.h>
49#include <sys/sbuf.h>
50#include <sys/sysctl.h>
51#include <sys/devicestat.h>
52#include <machine/md_var.h>
53
54#include <sys/lock.h>
55#include <sys/mutex.h>
56#include <geom/geom.h>
57#include <geom/geom_disk.h>
58#include <geom/geom_int.h>
59
60#include <dev/led/led.h>
61
62struct g_disk_softc {
63	struct disk		*dp;
64	struct sysctl_ctx_list	sysctl_ctx;
65	struct sysctl_oid	*sysctl_tree;
66	char			led[64];
67	uint32_t		state;
68};
69
70static struct mtx g_disk_done_mtx;
71
72static g_access_t g_disk_access;
73static g_init_t g_disk_init;
74static g_fini_t g_disk_fini;
75static g_start_t g_disk_start;
76static g_ioctl_t g_disk_ioctl;
77static g_dumpconf_t g_disk_dumpconf;
78
79static struct g_class g_disk_class = {
80	.name = "DISK",
81	.version = G_VERSION,
82	.init = g_disk_init,
83	.fini = g_disk_fini,
84	.start = g_disk_start,
85	.access = g_disk_access,
86	.ioctl = g_disk_ioctl,
87	.dumpconf = g_disk_dumpconf,
88};
89
90SYSCTL_DECL(_kern_geom);
91SYSCTL_NODE(_kern_geom, OID_AUTO, disk, CTLFLAG_RW, 0, "GEOM_DISK stuff");
92
93static void
94g_disk_init(struct g_class *mp __unused)
95{
96
97	mtx_init(&g_disk_done_mtx, "g_disk_done", NULL, MTX_DEF);
98}
99
100static void
101g_disk_fini(struct g_class *mp __unused)
102{
103
104	mtx_destroy(&g_disk_done_mtx);
105}
106
107DECLARE_GEOM_CLASS(g_disk_class, g_disk);
108
109static void __inline
110g_disk_lock_giant(struct disk *dp)
111{
112
113	if (dp->d_flags & DISKFLAG_NEEDSGIANT)
114		mtx_lock(&Giant);
115}
116
117static void __inline
118g_disk_unlock_giant(struct disk *dp)
119{
120
121	if (dp->d_flags & DISKFLAG_NEEDSGIANT)
122		mtx_unlock(&Giant);
123}
124
125static int
126g_disk_access(struct g_provider *pp, int r, int w, int e)
127{
128	struct disk *dp;
129	struct g_disk_softc *sc;
130	int error;
131
132	g_trace(G_T_ACCESS, "g_disk_access(%s, %d, %d, %d)",
133	    pp->name, r, w, e);
134	g_topology_assert();
135	sc = pp->geom->softc;
136	if (sc == NULL || (dp = sc->dp) == NULL || dp->d_destroyed) {
137		/*
138		 * Allow decreasing access count even if disk is not
139		 * avaliable anymore.
140		 */
141		if (r <= 0 && w <= 0 && e <= 0)
142			return (0);
143		return (ENXIO);
144	}
145	r += pp->acr;
146	w += pp->acw;
147	e += pp->ace;
148	error = 0;
149	if ((pp->acr + pp->acw + pp->ace) == 0 && (r + w + e) > 0) {
150		if (dp->d_open != NULL) {
151			g_disk_lock_giant(dp);
152			error = dp->d_open(dp);
153			if (bootverbose && error != 0)
154				printf("Opened disk %s -> %d\n",
155				    pp->name, error);
156			g_disk_unlock_giant(dp);
157		}
158		pp->mediasize = dp->d_mediasize;
159		pp->sectorsize = dp->d_sectorsize;
160		if (dp->d_flags & DISKFLAG_CANDELETE)
161			pp->flags |= G_PF_CANDELETE;
162		else
163			pp->flags &= ~G_PF_CANDELETE;
164		pp->stripeoffset = dp->d_stripeoffset;
165		pp->stripesize = dp->d_stripesize;
166		dp->d_flags |= DISKFLAG_OPEN;
167		if (dp->d_maxsize == 0) {
168			printf("WARNING: Disk drive %s%d has no d_maxsize\n",
169			    dp->d_name, dp->d_unit);
170			dp->d_maxsize = DFLTPHYS;
171		}
172	} else if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) {
173		if (dp->d_close != NULL) {
174			g_disk_lock_giant(dp);
175			error = dp->d_close(dp);
176			if (error != 0)
177				printf("Closed disk %s -> %d\n",
178				    pp->name, error);
179			g_disk_unlock_giant(dp);
180		}
181		sc->state = G_STATE_ACTIVE;
182		if (sc->led[0] != 0)
183			led_set(sc->led, "0");
184		dp->d_flags &= ~DISKFLAG_OPEN;
185	}
186	return (error);
187}
188
189static void
190g_disk_kerneldump(struct bio *bp, struct disk *dp)
191{
192	struct g_kerneldump *gkd;
193	struct g_geom *gp;
194
195	gkd = (struct g_kerneldump*)bp->bio_data;
196	gp = bp->bio_to->geom;
197	g_trace(G_T_TOPOLOGY, "g_disk_kernedump(%s, %jd, %jd)",
198		gp->name, (intmax_t)gkd->offset, (intmax_t)gkd->length);
199	if (dp->d_dump == NULL) {
200		g_io_deliver(bp, ENODEV);
201		return;
202	}
203	gkd->di.dumper = dp->d_dump;
204	gkd->di.priv = dp;
205	gkd->di.blocksize = dp->d_sectorsize;
206	gkd->di.maxiosize = dp->d_maxsize;
207	gkd->di.mediaoffset = gkd->offset;
208	if ((gkd->offset + gkd->length) > dp->d_mediasize)
209		gkd->length = dp->d_mediasize - gkd->offset;
210	gkd->di.mediasize = gkd->length;
211	g_io_deliver(bp, 0);
212}
213
214static void
215g_disk_setstate(struct bio *bp, struct g_disk_softc *sc)
216{
217	const char *cmd;
218
219	memcpy(&sc->state, bp->bio_data, sizeof(sc->state));
220	if (sc->led[0] != 0) {
221		switch (sc->state) {
222		case G_STATE_FAILED:
223			cmd = "1";
224			break;
225		case G_STATE_REBUILD:
226			cmd = "f5";
227			break;
228		case G_STATE_RESYNC:
229			cmd = "f1";
230			break;
231		default:
232			cmd = "0";
233			break;
234		}
235		led_set(sc->led, cmd);
236	}
237	g_io_deliver(bp, 0);
238}
239
240static void
241g_disk_done(struct bio *bp)
242{
243	struct bio *bp2;
244	struct disk *dp;
245	struct g_disk_softc *sc;
246
247	/* See "notes" for why we need a mutex here */
248	/* XXX: will witness accept a mix of Giant/unGiant drivers here ? */
249	mtx_lock(&g_disk_done_mtx);
250	bp->bio_completed = bp->bio_length - bp->bio_resid;
251
252	bp2 = bp->bio_parent;
253	if (bp2->bio_error == 0)
254		bp2->bio_error = bp->bio_error;
255	bp2->bio_completed += bp->bio_completed;
256	if ((bp->bio_cmd & (BIO_READ|BIO_WRITE|BIO_DELETE)) != 0 &&
257	    (sc = bp2->bio_to->geom->softc) != NULL &&
258	    (dp = sc->dp) != NULL) {
259		devstat_end_transaction_bio(dp->d_devstat, bp);
260	}
261	g_destroy_bio(bp);
262	bp2->bio_inbed++;
263	if (bp2->bio_children == bp2->bio_inbed) {
264		bp2->bio_resid = bp2->bio_bcount - bp2->bio_completed;
265		g_io_deliver(bp2, bp2->bio_error);
266	}
267	mtx_unlock(&g_disk_done_mtx);
268}
269
270static int
271g_disk_ioctl(struct g_provider *pp, u_long cmd, void * data, int fflag, struct thread *td)
272{
273	struct g_geom *gp;
274	struct disk *dp;
275	struct g_disk_softc *sc;
276	int error;
277
278	gp = pp->geom;
279	sc = gp->softc;
280	dp = sc->dp;
281
282	if (dp->d_ioctl == NULL)
283		return (ENOIOCTL);
284	g_disk_lock_giant(dp);
285	error = dp->d_ioctl(dp, cmd, data, fflag, td);
286	g_disk_unlock_giant(dp);
287	return (error);
288}
289
290static void
291g_disk_start(struct bio *bp)
292{
293	struct bio *bp2, *bp3;
294	struct disk *dp;
295	struct g_disk_softc *sc;
296	int error;
297	off_t off;
298
299	sc = bp->bio_to->geom->softc;
300	if (sc == NULL || (dp = sc->dp) == NULL || dp->d_destroyed) {
301		g_io_deliver(bp, ENXIO);
302		return;
303	}
304	error = EJUSTRETURN;
305	switch(bp->bio_cmd) {
306	case BIO_DELETE:
307		if (!(dp->d_flags & DISKFLAG_CANDELETE)) {
308			error = EOPNOTSUPP;
309			break;
310		}
311		/* fall-through */
312	case BIO_READ:
313	case BIO_WRITE:
314		off = 0;
315		bp3 = NULL;
316		bp2 = g_clone_bio(bp);
317		if (bp2 == NULL) {
318			error = ENOMEM;
319			break;
320		}
321		do {
322			bp2->bio_offset += off;
323			bp2->bio_length -= off;
324			bp2->bio_data += off;
325			if (bp2->bio_length > dp->d_maxsize) {
326				/*
327				 * XXX: If we have a stripesize we should really
328				 * use it here.
329				 */
330				bp2->bio_length = dp->d_maxsize;
331				off += dp->d_maxsize;
332				/*
333				 * To avoid a race, we need to grab the next bio
334				 * before we schedule this one.  See "notes".
335				 */
336				bp3 = g_clone_bio(bp);
337				if (bp3 == NULL)
338					bp->bio_error = ENOMEM;
339			}
340			bp2->bio_done = g_disk_done;
341			bp2->bio_pblkno = bp2->bio_offset / dp->d_sectorsize;
342			bp2->bio_bcount = bp2->bio_length;
343			bp2->bio_disk = dp;
344			devstat_start_transaction_bio(dp->d_devstat, bp2);
345			g_disk_lock_giant(dp);
346			dp->d_strategy(bp2);
347			g_disk_unlock_giant(dp);
348			bp2 = bp3;
349			bp3 = NULL;
350		} while (bp2 != NULL);
351		break;
352	case BIO_GETATTR:
353		/* Give the driver a chance to override */
354		if (dp->d_getattr != NULL) {
355			if (bp->bio_disk == NULL)
356				bp->bio_disk = dp;
357			error = dp->d_getattr(bp);
358			if (error != -1)
359				break;
360			error = EJUSTRETURN;
361		}
362		if (g_handleattr_int(bp, "GEOM::candelete",
363		    (dp->d_flags & DISKFLAG_CANDELETE) != 0))
364			break;
365		else if (g_handleattr_int(bp, "GEOM::fwsectors",
366		    dp->d_fwsectors))
367			break;
368		else if (g_handleattr_int(bp, "GEOM::fwheads", dp->d_fwheads))
369			break;
370		else if (g_handleattr_off_t(bp, "GEOM::frontstuff", 0))
371			break;
372		else if (g_handleattr_str(bp, "GEOM::ident", dp->d_ident))
373			break;
374		else if (g_handleattr(bp, "GEOM::hba_vendor",
375		    &dp->d_hba_vendor, 2))
376			break;
377		else if (g_handleattr(bp, "GEOM::hba_device",
378		    &dp->d_hba_device, 2))
379			break;
380		else if (g_handleattr(bp, "GEOM::hba_subvendor",
381		    &dp->d_hba_subvendor, 2))
382			break;
383		else if (g_handleattr(bp, "GEOM::hba_subdevice",
384		    &dp->d_hba_subdevice, 2))
385			break;
386		else if (!strcmp(bp->bio_attribute, "GEOM::kerneldump"))
387			g_disk_kerneldump(bp, dp);
388		else if (!strcmp(bp->bio_attribute, "GEOM::setstate"))
389			g_disk_setstate(bp, sc);
390		else
391			error = ENOIOCTL;
392		break;
393	case BIO_FLUSH:
394		g_trace(G_T_TOPOLOGY, "g_disk_flushcache(%s)",
395		    bp->bio_to->name);
396		if (!(dp->d_flags & DISKFLAG_CANFLUSHCACHE)) {
397			error = EOPNOTSUPP;
398			break;
399		}
400		bp2 = g_clone_bio(bp);
401		if (bp2 == NULL) {
402			g_io_deliver(bp, ENOMEM);
403			return;
404		}
405		bp2->bio_done = g_disk_done;
406		bp2->bio_disk = dp;
407		g_disk_lock_giant(dp);
408		dp->d_strategy(bp2);
409		g_disk_unlock_giant(dp);
410		break;
411	default:
412		error = EOPNOTSUPP;
413		break;
414	}
415	if (error != EJUSTRETURN)
416		g_io_deliver(bp, error);
417	return;
418}
419
420static void
421g_disk_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp)
422{
423	struct disk *dp;
424	struct g_disk_softc *sc;
425
426	sc = gp->softc;
427	if (sc == NULL || (dp = sc->dp) == NULL)
428		return;
429	if (indent == NULL) {
430		sbuf_printf(sb, " hd %u", dp->d_fwheads);
431		sbuf_printf(sb, " sc %u", dp->d_fwsectors);
432		return;
433	}
434	if (pp != NULL) {
435		sbuf_printf(sb, "%s<fwheads>%u</fwheads>\n",
436		    indent, dp->d_fwheads);
437		sbuf_printf(sb, "%s<fwsectors>%u</fwsectors>\n",
438		    indent, dp->d_fwsectors);
439		sbuf_printf(sb, "%s<ident>%s</ident>\n", indent, dp->d_ident);
440		sbuf_printf(sb, "%s<descr>%s</descr>\n", indent, dp->d_descr);
441	}
442}
443
444static void
445g_disk_create(void *arg, int flag)
446{
447	struct g_geom *gp;
448	struct g_provider *pp;
449	struct disk *dp;
450	struct g_disk_softc *sc;
451	char tmpstr[80];
452
453	if (flag == EV_CANCEL)
454		return;
455	g_topology_assert();
456	dp = arg;
457	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
458	sc->dp = dp;
459	gp = g_new_geomf(&g_disk_class, "%s%d", dp->d_name, dp->d_unit);
460	gp->softc = sc;
461	pp = g_new_providerf(gp, "%s", gp->name);
462	pp->mediasize = dp->d_mediasize;
463	pp->sectorsize = dp->d_sectorsize;
464	if (dp->d_flags & DISKFLAG_CANDELETE)
465		pp->flags |= G_PF_CANDELETE;
466	pp->stripeoffset = dp->d_stripeoffset;
467	pp->stripesize = dp->d_stripesize;
468	if (bootverbose)
469		printf("GEOM: new disk %s\n", gp->name);
470	sysctl_ctx_init(&sc->sysctl_ctx);
471	snprintf(tmpstr, sizeof(tmpstr), "GEOM disk %s", gp->name);
472	sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx,
473		SYSCTL_STATIC_CHILDREN(_kern_geom_disk), OID_AUTO, gp->name,
474		CTLFLAG_RD, 0, tmpstr);
475	if (sc->sysctl_tree != NULL) {
476		snprintf(tmpstr, sizeof(tmpstr),
477		    "kern.geom.disk.%s.led", gp->name);
478		TUNABLE_STR_FETCH(tmpstr, sc->led, sizeof(sc->led));
479		SYSCTL_ADD_STRING(&sc->sysctl_ctx,
480		    SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "led",
481		    CTLFLAG_RW | CTLFLAG_TUN, sc->led, sizeof(sc->led),
482		    "LED name");
483	}
484	pp->private = sc;
485	dp->d_geom = gp;
486	g_error_provider(pp, 0);
487}
488
489static void
490g_disk_destroy(void *ptr, int flag)
491{
492	struct disk *dp;
493	struct g_geom *gp;
494	struct g_disk_softc *sc;
495
496	g_topology_assert();
497	dp = ptr;
498	gp = dp->d_geom;
499	if (gp != NULL) {
500		sc = gp->softc;
501		if (sc->sysctl_tree != NULL) {
502			sysctl_ctx_free(&sc->sysctl_ctx);
503			sc->sysctl_tree = NULL;
504		}
505		if (sc->led[0] != 0) {
506			led_set(sc->led, "0");
507			sc->led[0] = 0;
508		}
509		g_free(sc);
510		gp->softc = NULL;
511		g_wither_geom(gp, ENXIO);
512	}
513	g_free(dp);
514}
515
516/*
517 * We only allow printable characters in disk ident,
518 * the rest is converted to 'x<HH>'.
519 */
520static void
521g_disk_ident_adjust(char *ident, size_t size)
522{
523	char *p, tmp[4], newid[DISK_IDENT_SIZE];
524
525	newid[0] = '\0';
526	for (p = ident; *p != '\0'; p++) {
527		if (isprint(*p)) {
528			tmp[0] = *p;
529			tmp[1] = '\0';
530		} else {
531			snprintf(tmp, sizeof(tmp), "x%02hhx",
532			    *(unsigned char *)p);
533		}
534		if (strlcat(newid, tmp, sizeof(newid)) >= sizeof(newid))
535			break;
536	}
537	bzero(ident, size);
538	strlcpy(ident, newid, size);
539}
540
541struct disk *
542disk_alloc(void)
543{
544
545	return (g_malloc(sizeof(struct disk), M_WAITOK | M_ZERO));
546}
547
548void
549disk_create(struct disk *dp, int version)
550{
551
552	if (version != DISK_VERSION_00 && version != DISK_VERSION_01) {
553		printf("WARNING: Attempt to add disk %s%d %s",
554		    dp->d_name, dp->d_unit,
555		    " using incompatible ABI version of disk(9)\n");
556		printf("WARNING: Ignoring disk %s%d\n",
557		    dp->d_name, dp->d_unit);
558		return;
559	}
560	KASSERT(dp->d_strategy != NULL, ("disk_create need d_strategy"));
561	KASSERT(dp->d_name != NULL, ("disk_create need d_name"));
562	KASSERT(*dp->d_name != 0, ("disk_create need d_name"));
563	KASSERT(strlen(dp->d_name) < SPECNAMELEN - 4, ("disk name too long"));
564	if (dp->d_devstat == NULL)
565		dp->d_devstat = devstat_new_entry(dp->d_name, dp->d_unit,
566		    dp->d_sectorsize, DEVSTAT_ALL_SUPPORTED,
567		    DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
568	dp->d_geom = NULL;
569	g_disk_ident_adjust(dp->d_ident, sizeof(dp->d_ident));
570	g_post_event(g_disk_create, dp, M_WAITOK, dp, NULL);
571}
572
573void
574disk_destroy(struct disk *dp)
575{
576
577	g_cancel_event(dp);
578	dp->d_destroyed = 1;
579	if (dp->d_devstat != NULL)
580		devstat_remove_entry(dp->d_devstat);
581	g_post_event(g_disk_destroy, dp, M_WAITOK, NULL);
582}
583
584void
585disk_gone(struct disk *dp)
586{
587	struct g_geom *gp;
588	struct g_provider *pp;
589
590	gp = dp->d_geom;
591	if (gp != NULL)
592		LIST_FOREACH(pp, &gp->provider, provider)
593			g_wither_provider(pp, ENXIO);
594}
595
596void
597disk_attr_changed(struct disk *dp, const char *attr, int flag)
598{
599	struct g_geom *gp;
600	struct g_provider *pp;
601
602	gp = dp->d_geom;
603	if (gp != NULL)
604		LIST_FOREACH(pp, &gp->provider, provider)
605			(void)g_attr_changed(pp, attr, flag);
606}
607
608static void
609g_kern_disks(void *p, int flag __unused)
610{
611	struct sbuf *sb;
612	struct g_geom *gp;
613	char *sp;
614
615	sb = p;
616	sp = "";
617	g_topology_assert();
618	LIST_FOREACH(gp, &g_disk_class.geom, geom) {
619		sbuf_printf(sb, "%s%s", sp, gp->name);
620		sp = " ";
621	}
622	sbuf_finish(sb);
623}
624
625static int
626sysctl_disks(SYSCTL_HANDLER_ARGS)
627{
628	int error;
629	struct sbuf *sb;
630
631	sb = sbuf_new_auto();
632	g_waitfor_event(g_kern_disks, sb, M_WAITOK, NULL);
633	error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
634	sbuf_delete(sb);
635	return error;
636}
637
638SYSCTL_PROC(_kern, OID_AUTO, disks,
639    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
640    sysctl_disks, "A", "names of available disks");
641