geom_disk.c revision 226735
1/*-
2 * Copyright (c) 2002 Poul-Henning Kamp
3 * Copyright (c) 2002 Networks Associates Technology, Inc.
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7 * and NAI Labs, the Security Research Division of Network Associates, Inc.
8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9 * DARPA CHATS research program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. The names of the authors may not be used to endorse or promote
20 *    products derived from this software without specific prior written
21 *    permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD: head/sys/geom/geom_disk.c 226735 2011-10-25 14:04:59Z pjd $");
38
39#include "opt_geom.h"
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/kernel.h>
44#include <sys/sysctl.h>
45#include <sys/bio.h>
46#include <sys/ctype.h>
47#include <sys/fcntl.h>
48#include <sys/malloc.h>
49#include <sys/sbuf.h>
50#include <sys/sysctl.h>
51#include <sys/devicestat.h>
52#include <machine/md_var.h>
53
54#include <sys/lock.h>
55#include <sys/mutex.h>
56#include <geom/geom.h>
57#include <geom/geom_disk.h>
58#include <geom/geom_int.h>
59
60#include <dev/led/led.h>
61
62struct g_disk_softc {
63	struct disk		*dp;
64	struct sysctl_ctx_list	sysctl_ctx;
65	struct sysctl_oid	*sysctl_tree;
66	char			led[64];
67	uint32_t		state;
68};
69
70static struct mtx g_disk_done_mtx;
71
72static g_access_t g_disk_access;
73static g_init_t g_disk_init;
74static g_fini_t g_disk_fini;
75static g_start_t g_disk_start;
76static g_ioctl_t g_disk_ioctl;
77static g_dumpconf_t g_disk_dumpconf;
78
79static struct g_class g_disk_class = {
80	.name = "DISK",
81	.version = G_VERSION,
82	.init = g_disk_init,
83	.fini = g_disk_fini,
84	.start = g_disk_start,
85	.access = g_disk_access,
86	.ioctl = g_disk_ioctl,
87	.dumpconf = g_disk_dumpconf,
88};
89
90SYSCTL_DECL(_kern_geom);
91SYSCTL_NODE(_kern_geom, OID_AUTO, disk, CTLFLAG_RW, 0, "GEOM_DISK stuff");
92
93static void
94g_disk_init(struct g_class *mp __unused)
95{
96
97	mtx_init(&g_disk_done_mtx, "g_disk_done", NULL, MTX_DEF);
98}
99
100static void
101g_disk_fini(struct g_class *mp __unused)
102{
103
104	mtx_destroy(&g_disk_done_mtx);
105}
106
107DECLARE_GEOM_CLASS(g_disk_class, g_disk);
108
109static void __inline
110g_disk_lock_giant(struct disk *dp)
111{
112	if (dp->d_flags & DISKFLAG_NEEDSGIANT)
113		mtx_lock(&Giant);
114}
115
116static void __inline
117g_disk_unlock_giant(struct disk *dp)
118{
119	if (dp->d_flags & DISKFLAG_NEEDSGIANT)
120		mtx_unlock(&Giant);
121}
122
123static int
124g_disk_access(struct g_provider *pp, int r, int w, int e)
125{
126	struct disk *dp;
127	struct g_disk_softc *sc;
128	int error;
129
130	g_trace(G_T_ACCESS, "g_disk_access(%s, %d, %d, %d)",
131	    pp->name, r, w, e);
132	g_topology_assert();
133	sc = pp->geom->softc;
134	if (sc == NULL || (dp = sc->dp) == NULL || dp->d_destroyed) {
135		/*
136		 * Allow decreasing access count even if disk is not
137		 * avaliable anymore.
138		 */
139		if (r <= 0 && w <= 0 && e <= 0)
140			return (0);
141		return (ENXIO);
142	}
143	r += pp->acr;
144	w += pp->acw;
145	e += pp->ace;
146	error = 0;
147	if ((pp->acr + pp->acw + pp->ace) == 0 && (r + w + e) > 0) {
148		if (dp->d_open != NULL) {
149			g_disk_lock_giant(dp);
150			error = dp->d_open(dp);
151			if (bootverbose && error != 0)
152				printf("Opened disk %s -> %d\n",
153				    pp->name, error);
154			g_disk_unlock_giant(dp);
155		}
156		pp->mediasize = dp->d_mediasize;
157		pp->sectorsize = dp->d_sectorsize;
158		if (dp->d_flags & DISKFLAG_CANDELETE)
159			pp->flags |= G_PF_CANDELETE;
160		else
161			pp->flags &= ~G_PF_CANDELETE;
162		pp->stripeoffset = dp->d_stripeoffset;
163		pp->stripesize = dp->d_stripesize;
164		dp->d_flags |= DISKFLAG_OPEN;
165		if (dp->d_maxsize == 0) {
166			printf("WARNING: Disk drive %s%d has no d_maxsize\n",
167			    dp->d_name, dp->d_unit);
168			dp->d_maxsize = DFLTPHYS;
169		}
170	} else if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) {
171		if (dp->d_close != NULL) {
172			g_disk_lock_giant(dp);
173			error = dp->d_close(dp);
174			if (error != 0)
175				printf("Closed disk %s -> %d\n",
176				    pp->name, error);
177			g_disk_unlock_giant(dp);
178		}
179		sc->state = G_STATE_ACTIVE;
180		if (sc->led[0] != 0)
181			led_set(sc->led, "0");
182		dp->d_flags &= ~DISKFLAG_OPEN;
183	}
184	return (error);
185}
186
187static void
188g_disk_kerneldump(struct bio *bp, struct disk *dp)
189{
190	struct g_kerneldump *gkd;
191	struct g_geom *gp;
192
193	gkd = (struct g_kerneldump*)bp->bio_data;
194	gp = bp->bio_to->geom;
195	g_trace(G_T_TOPOLOGY, "g_disk_kernedump(%s, %jd, %jd)",
196		gp->name, (intmax_t)gkd->offset, (intmax_t)gkd->length);
197	if (dp->d_dump == NULL) {
198		g_io_deliver(bp, ENODEV);
199		return;
200	}
201	gkd->di.dumper = dp->d_dump;
202	gkd->di.priv = dp;
203	gkd->di.blocksize = dp->d_sectorsize;
204	gkd->di.maxiosize = dp->d_maxsize;
205	gkd->di.mediaoffset = gkd->offset;
206	if ((gkd->offset + gkd->length) > dp->d_mediasize)
207		gkd->length = dp->d_mediasize - gkd->offset;
208	gkd->di.mediasize = gkd->length;
209	g_io_deliver(bp, 0);
210}
211
212static void
213g_disk_setstate(struct bio *bp, struct g_disk_softc *sc)
214{
215	const char *cmd;
216
217	memcpy(&sc->state, bp->bio_data, sizeof(sc->state));
218	if (sc->led[0] != 0) {
219		switch (sc->state) {
220		case G_STATE_FAILED:
221			cmd = "1";
222			break;
223		case G_STATE_REBUILD:
224			cmd = "f5";
225			break;
226		case G_STATE_RESYNC:
227			cmd = "f1";
228			break;
229		default:
230			cmd = "0";
231			break;
232		}
233		led_set(sc->led, cmd);
234	}
235	g_io_deliver(bp, 0);
236}
237
238static void
239g_disk_done(struct bio *bp)
240{
241	struct bio *bp2;
242	struct disk *dp;
243	struct g_disk_softc *sc;
244
245	/* See "notes" for why we need a mutex here */
246	/* XXX: will witness accept a mix of Giant/unGiant drivers here ? */
247	mtx_lock(&g_disk_done_mtx);
248	bp->bio_completed = bp->bio_length - bp->bio_resid;
249
250	bp2 = bp->bio_parent;
251	if (bp2->bio_error == 0)
252		bp2->bio_error = bp->bio_error;
253	bp2->bio_completed += bp->bio_completed;
254	if ((bp->bio_cmd & (BIO_READ|BIO_WRITE|BIO_DELETE)) &&
255	    (sc = bp2->bio_to->geom->softc) &&
256	    (dp = sc->dp)) {
257		devstat_end_transaction_bio(dp->d_devstat, bp);
258	}
259	g_destroy_bio(bp);
260	bp2->bio_inbed++;
261	if (bp2->bio_children == bp2->bio_inbed) {
262		bp2->bio_resid = bp2->bio_bcount - bp2->bio_completed;
263		g_io_deliver(bp2, bp2->bio_error);
264	}
265	mtx_unlock(&g_disk_done_mtx);
266}
267
268static int
269g_disk_ioctl(struct g_provider *pp, u_long cmd, void * data, int fflag, struct thread *td)
270{
271	struct g_geom *gp;
272	struct disk *dp;
273	struct g_disk_softc *sc;
274	int error;
275
276	gp = pp->geom;
277	sc = gp->softc;
278	dp = sc->dp;
279
280	if (dp->d_ioctl == NULL)
281		return (ENOIOCTL);
282	g_disk_lock_giant(dp);
283	error = dp->d_ioctl(dp, cmd, data, fflag, td);
284	g_disk_unlock_giant(dp);
285	return(error);
286}
287
288static void
289g_disk_start(struct bio *bp)
290{
291	struct bio *bp2, *bp3;
292	struct disk *dp;
293	struct g_disk_softc *sc;
294	int error;
295	off_t off;
296
297	sc = bp->bio_to->geom->softc;
298	if (sc == NULL || (dp = sc->dp) == NULL || dp->d_destroyed) {
299		g_io_deliver(bp, ENXIO);
300		return;
301	}
302	error = EJUSTRETURN;
303	switch(bp->bio_cmd) {
304	case BIO_DELETE:
305		if (!(dp->d_flags & DISKFLAG_CANDELETE)) {
306			error = 0;
307			break;
308		}
309		/* fall-through */
310	case BIO_READ:
311	case BIO_WRITE:
312		off = 0;
313		bp3 = NULL;
314		bp2 = g_clone_bio(bp);
315		if (bp2 == NULL) {
316			error = ENOMEM;
317			break;
318		}
319		do {
320			bp2->bio_offset += off;
321			bp2->bio_length -= off;
322			bp2->bio_data += off;
323			if (bp2->bio_length > dp->d_maxsize) {
324				/*
325				 * XXX: If we have a stripesize we should really
326				 * use it here.
327				 */
328				bp2->bio_length = dp->d_maxsize;
329				off += dp->d_maxsize;
330				/*
331				 * To avoid a race, we need to grab the next bio
332				 * before we schedule this one.  See "notes".
333				 */
334				bp3 = g_clone_bio(bp);
335				if (bp3 == NULL)
336					bp->bio_error = ENOMEM;
337			}
338			bp2->bio_done = g_disk_done;
339			bp2->bio_pblkno = bp2->bio_offset / dp->d_sectorsize;
340			bp2->bio_bcount = bp2->bio_length;
341			bp2->bio_disk = dp;
342			devstat_start_transaction_bio(dp->d_devstat, bp2);
343			g_disk_lock_giant(dp);
344			dp->d_strategy(bp2);
345			g_disk_unlock_giant(dp);
346			bp2 = bp3;
347			bp3 = NULL;
348		} while (bp2 != NULL);
349		break;
350	case BIO_GETATTR:
351		/* Give the driver a chance to override */
352		if (dp->d_getattr != NULL) {
353			if (bp->bio_disk == NULL)
354				bp->bio_disk = dp;
355			error = dp->d_getattr(bp);
356			if (error != -1)
357				break;
358			error = EJUSTRETURN;
359		}
360		if (g_handleattr_int(bp, "GEOM::candelete",
361		    (dp->d_flags & DISKFLAG_CANDELETE) != 0))
362			break;
363		else if (g_handleattr_int(bp, "GEOM::fwsectors",
364		    dp->d_fwsectors))
365			break;
366		else if (g_handleattr_int(bp, "GEOM::fwheads", dp->d_fwheads))
367			break;
368		else if (g_handleattr_off_t(bp, "GEOM::frontstuff", 0))
369			break;
370		else if (g_handleattr_str(bp, "GEOM::ident", dp->d_ident))
371			break;
372		else if (g_handleattr(bp, "GEOM::hba_vendor",
373		    &dp->d_hba_vendor, 2))
374			break;
375		else if (g_handleattr(bp, "GEOM::hba_device",
376		    &dp->d_hba_device, 2))
377			break;
378		else if (g_handleattr(bp, "GEOM::hba_subvendor",
379		    &dp->d_hba_subvendor, 2))
380			break;
381		else if (g_handleattr(bp, "GEOM::hba_subdevice",
382		    &dp->d_hba_subdevice, 2))
383			break;
384		else if (!strcmp(bp->bio_attribute, "GEOM::kerneldump"))
385			g_disk_kerneldump(bp, dp);
386		else if (!strcmp(bp->bio_attribute, "GEOM::setstate"))
387			g_disk_setstate(bp, sc);
388		else
389			error = ENOIOCTL;
390		break;
391	case BIO_FLUSH:
392		g_trace(G_T_TOPOLOGY, "g_disk_flushcache(%s)",
393		    bp->bio_to->name);
394		if (!(dp->d_flags & DISKFLAG_CANFLUSHCACHE)) {
395			g_io_deliver(bp, ENODEV);
396			return;
397		}
398		bp2 = g_clone_bio(bp);
399		if (bp2 == NULL) {
400			g_io_deliver(bp, ENOMEM);
401			return;
402		}
403		bp2->bio_done = g_disk_done;
404		bp2->bio_disk = dp;
405		g_disk_lock_giant(dp);
406		dp->d_strategy(bp2);
407		g_disk_unlock_giant(dp);
408		break;
409	default:
410		error = EOPNOTSUPP;
411		break;
412	}
413	if (error != EJUSTRETURN)
414		g_io_deliver(bp, error);
415	return;
416}
417
418static void
419g_disk_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp)
420{
421	struct disk *dp;
422	struct g_disk_softc *sc;
423
424	sc = gp->softc;
425	if (sc == NULL || (dp = sc->dp) == NULL)
426		return;
427	if (indent == NULL) {
428		sbuf_printf(sb, " hd %u", dp->d_fwheads);
429		sbuf_printf(sb, " sc %u", dp->d_fwsectors);
430		return;
431	}
432	if (pp != NULL) {
433		sbuf_printf(sb, "%s<fwheads>%u</fwheads>\n",
434		    indent, dp->d_fwheads);
435		sbuf_printf(sb, "%s<fwsectors>%u</fwsectors>\n",
436		    indent, dp->d_fwsectors);
437		sbuf_printf(sb, "%s<ident>%s</ident>\n", indent, dp->d_ident);
438		sbuf_printf(sb, "%s<descr>%s</descr>\n", indent, dp->d_descr);
439	}
440}
441
442static void
443g_disk_create(void *arg, int flag)
444{
445	struct g_geom *gp;
446	struct g_provider *pp;
447	struct disk *dp;
448	struct g_disk_softc *sc;
449	char tmpstr[80];
450
451	if (flag == EV_CANCEL)
452		return;
453	g_topology_assert();
454	dp = arg;
455	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
456	sc->dp = dp;
457	gp = g_new_geomf(&g_disk_class, "%s%d", dp->d_name, dp->d_unit);
458	gp->softc = sc;
459	pp = g_new_providerf(gp, "%s", gp->name);
460	pp->mediasize = dp->d_mediasize;
461	pp->sectorsize = dp->d_sectorsize;
462	if (dp->d_flags & DISKFLAG_CANDELETE)
463		pp->flags |= G_PF_CANDELETE;
464	pp->stripeoffset = dp->d_stripeoffset;
465	pp->stripesize = dp->d_stripesize;
466	if (bootverbose)
467		printf("GEOM: new disk %s\n", gp->name);
468	sysctl_ctx_init(&sc->sysctl_ctx);
469	snprintf(tmpstr, sizeof(tmpstr), "GEOM disk %s", gp->name);
470	sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx,
471		SYSCTL_STATIC_CHILDREN(_kern_geom_disk), OID_AUTO, gp->name,
472		CTLFLAG_RD, 0, tmpstr);
473	if (sc->sysctl_tree != NULL) {
474		snprintf(tmpstr, sizeof(tmpstr),
475		    "kern.geom.disk.%s.led", gp->name);
476		TUNABLE_STR_FETCH(tmpstr, sc->led, sizeof(sc->led));
477		SYSCTL_ADD_STRING(&sc->sysctl_ctx,
478		    SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "led",
479		    CTLFLAG_RW | CTLFLAG_TUN, sc->led, sizeof(sc->led),
480		    "LED name");
481	}
482	pp->private = sc;
483	dp->d_geom = gp;
484	g_error_provider(pp, 0);
485}
486
487static void
488g_disk_destroy(void *ptr, int flag)
489{
490	struct disk *dp;
491	struct g_geom *gp;
492	struct g_disk_softc *sc;
493
494	g_topology_assert();
495	dp = ptr;
496	gp = dp->d_geom;
497	if (gp != NULL) {
498		sc = gp->softc;
499		if (sc->sysctl_tree != NULL) {
500			sysctl_ctx_free(&sc->sysctl_ctx);
501			sc->sysctl_tree = NULL;
502		}
503		if (sc->led[0] != 0) {
504			led_set(sc->led, "0");
505			sc->led[0] = 0;
506		}
507		g_free(sc);
508		gp->softc = NULL;
509		g_wither_geom(gp, ENXIO);
510	}
511	g_free(dp);
512}
513
514/*
515 * We only allow printable characters in disk ident,
516 * the rest is converted to 'x<HH>'.
517 */
518static void
519g_disk_ident_adjust(char *ident, size_t size)
520{
521	char *p, tmp[4], newid[DISK_IDENT_SIZE];
522
523	newid[0] = '\0';
524	for (p = ident; *p != '\0'; p++) {
525		if (isprint(*p)) {
526			tmp[0] = *p;
527			tmp[1] = '\0';
528		} else {
529			snprintf(tmp, sizeof(tmp), "x%02hhx",
530			    *(unsigned char *)p);
531		}
532		if (strlcat(newid, tmp, sizeof(newid)) >= sizeof(newid))
533			break;
534	}
535	bzero(ident, size);
536	strlcpy(ident, newid, size);
537}
538
539struct disk *
540disk_alloc(void)
541{
542
543	return (g_malloc(sizeof(struct disk), M_WAITOK | M_ZERO));
544}
545
546void
547disk_create(struct disk *dp, int version)
548{
549	if (version != DISK_VERSION_00 && version != DISK_VERSION_01) {
550		printf("WARNING: Attempt to add disk %s%d %s",
551		    dp->d_name, dp->d_unit,
552		    " using incompatible ABI version of disk(9)\n");
553		printf("WARNING: Ignoring disk %s%d\n",
554		    dp->d_name, dp->d_unit);
555		return;
556	}
557	KASSERT(dp->d_strategy != NULL, ("disk_create need d_strategy"));
558	KASSERT(dp->d_name != NULL, ("disk_create need d_name"));
559	KASSERT(*dp->d_name != 0, ("disk_create need d_name"));
560	KASSERT(strlen(dp->d_name) < SPECNAMELEN - 4, ("disk name too long"));
561	if (dp->d_devstat == NULL)
562		dp->d_devstat = devstat_new_entry(dp->d_name, dp->d_unit,
563		    dp->d_sectorsize, DEVSTAT_ALL_SUPPORTED,
564		    DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
565	dp->d_geom = NULL;
566	g_disk_ident_adjust(dp->d_ident, sizeof(dp->d_ident));
567	g_post_event(g_disk_create, dp, M_WAITOK, dp, NULL);
568}
569
570void
571disk_destroy(struct disk *dp)
572{
573
574	g_cancel_event(dp);
575	dp->d_destroyed = 1;
576	if (dp->d_devstat != NULL)
577		devstat_remove_entry(dp->d_devstat);
578	g_post_event(g_disk_destroy, dp, M_WAITOK, NULL);
579}
580
581void
582disk_gone(struct disk *dp)
583{
584	struct g_geom *gp;
585	struct g_provider *pp;
586
587	gp = dp->d_geom;
588	if (gp != NULL)
589		LIST_FOREACH(pp, &gp->provider, provider)
590			g_wither_provider(pp, ENXIO);
591}
592
593void
594disk_attr_changed(struct disk *dp, const char *attr, int flag)
595{
596	struct g_geom *gp;
597	struct g_provider *pp;
598
599	gp = dp->d_geom;
600	if (gp != NULL)
601		LIST_FOREACH(pp, &gp->provider, provider)
602			(void)g_attr_changed(pp, attr, flag);
603}
604
605static void
606g_kern_disks(void *p, int flag __unused)
607{
608	struct sbuf *sb;
609	struct g_geom *gp;
610	char *sp;
611
612	sb = p;
613	sp = "";
614	g_topology_assert();
615	LIST_FOREACH(gp, &g_disk_class.geom, geom) {
616		sbuf_printf(sb, "%s%s", sp, gp->name);
617		sp = " ";
618	}
619	sbuf_finish(sb);
620}
621
622static int
623sysctl_disks(SYSCTL_HANDLER_ARGS)
624{
625	int error;
626	struct sbuf *sb;
627
628	sb = sbuf_new_auto();
629	g_waitfor_event(g_kern_disks, sb, M_WAITOK, NULL);
630	error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
631	sbuf_delete(sb);
632	return error;
633}
634
635SYSCTL_PROC(_kern, OID_AUTO, disks,
636    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
637    sysctl_disks, "A", "names of available disks");
638
639