geom_dev.c revision 126080
192108Sphk/*-
292108Sphk * Copyright (c) 2002 Poul-Henning Kamp
392108Sphk * Copyright (c) 2002 Networks Associates Technology, Inc.
492108Sphk * All rights reserved.
592108Sphk *
692108Sphk * This software was developed for the FreeBSD Project by Poul-Henning Kamp
792108Sphk * and NAI Labs, the Security Research Division of Network Associates, Inc.
892108Sphk * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
992108Sphk * DARPA CHATS research program.
1092108Sphk *
1192108Sphk * Redistribution and use in source and binary forms, with or without
1292108Sphk * modification, are permitted provided that the following conditions
1392108Sphk * are met:
1492108Sphk * 1. Redistributions of source code must retain the above copyright
1592108Sphk *    notice, this list of conditions and the following disclaimer.
1692108Sphk * 2. Redistributions in binary form must reproduce the above copyright
1792108Sphk *    notice, this list of conditions and the following disclaimer in the
1892108Sphk *    documentation and/or other materials provided with the distribution.
1992108Sphk * 3. The names of the authors may not be used to endorse or promote
2092108Sphk *    products derived from this software without specific prior written
2192108Sphk *    permission.
2292108Sphk *
2392108Sphk * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
2492108Sphk * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2592108Sphk * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2692108Sphk * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2792108Sphk * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2892108Sphk * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2992108Sphk * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3092108Sphk * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3192108Sphk * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3292108Sphk * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3392108Sphk * SUCH DAMAGE.
3492108Sphk */
3592108Sphk
36116196Sobrien#include <sys/cdefs.h>
37116196Sobrien__FBSDID("$FreeBSD: head/sys/geom/geom_dev.c 126080 2004-02-21 21:10:55Z phk $");
38116196Sobrien
3992108Sphk#include <sys/param.h>
4092108Sphk#include <sys/systm.h>
4192108Sphk#include <sys/malloc.h>
4292108Sphk#include <sys/kernel.h>
4392108Sphk#include <sys/conf.h>
4492108Sphk#include <sys/bio.h>
4592108Sphk#include <sys/lock.h>
4692108Sphk#include <sys/mutex.h>
4792108Sphk#include <sys/errno.h>
4892108Sphk#include <sys/time.h>
4992108Sphk#include <sys/disk.h>
5092108Sphk#include <sys/fcntl.h>
51114216Skan#include <sys/limits.h>
5292108Sphk#include <geom/geom.h>
5395323Sphk#include <geom/geom_int.h>
5492108Sphk
5592108Sphkstatic d_open_t		g_dev_open;
5692108Sphkstatic d_close_t	g_dev_close;
5792108Sphkstatic d_strategy_t	g_dev_strategy;
5892108Sphkstatic d_ioctl_t	g_dev_ioctl;
5992108Sphk
6092108Sphkstatic struct cdevsw g_dev_cdevsw = {
61126080Sphk	.d_version =	D_VERSION,
62111815Sphk	.d_open =	g_dev_open,
63111815Sphk	.d_close =	g_dev_close,
64111815Sphk	.d_read =	physread,
65111815Sphk	.d_write =	physwrite,
66111815Sphk	.d_ioctl =	g_dev_ioctl,
67111815Sphk	.d_strategy =	g_dev_strategy,
68111815Sphk	.d_name =	"g_dev",
69111815Sphk	.d_maj =	GEOM_MAJOR,
70126080Sphk	.d_flags =	D_DISK | D_TRACKCLOSE,
7192108Sphk};
7292108Sphk
7392108Sphkstatic g_taste_t g_dev_taste;
7492108Sphkstatic g_orphan_t g_dev_orphan;
7592108Sphk
7693248Sphkstatic struct g_class g_dev_class	= {
77112552Sphk	.name = "DEV",
78112552Sphk	.taste = g_dev_taste,
7992108Sphk};
8092108Sphk
81115960Sphkvoid
82105947Sphkg_dev_print(void)
83105947Sphk{
84105947Sphk	struct g_geom *gp;
85115960Sphk	char const *p = "";
86105947Sphk
87115960Sphk	LIST_FOREACH(gp, &g_dev_class.geom, geom) {
88115960Sphk		printf("%s%s", p, gp->name);
89115960Sphk		p = " ";
90115960Sphk	}
91105947Sphk	printf("\n");
92105947Sphk}
93105947Sphk
94108294Sphk/*
95108294Sphk * XXX: This is disgusting and wrong in every way imaginable:  The only reason
96108294Sphk * XXX: we have a clone function is because of the root-mount hack we currently
97108294Sphk * XXX: employ.  An improvment would be to unregister this cloner once we know
98108294Sphk * XXX: we no longer need it.  Ideally, root-fs would be mounted through DEVFS
99108294Sphk * XXX: eliminating the need for this hack.
100108294Sphk */
10192108Sphkstatic void
102104087Sphkg_dev_clone(void *arg __unused, char *name, int namelen __unused, dev_t *dev)
10392108Sphk{
10492108Sphk	struct g_geom *gp;
10592108Sphk
10692108Sphk	if (*dev != NODEV)
10792108Sphk		return;
10892108Sphk
10998066Sphk	g_waitidle();
11092108Sphk
11192108Sphk	/* g_topology_lock(); */
11293248Sphk	LIST_FOREACH(gp, &g_dev_class.geom, geom) {
11392108Sphk		if (strcmp(gp->name, name))
11492108Sphk			continue;
11592108Sphk		*dev = gp->softc;
11692108Sphk		g_trace(G_T_TOPOLOGY, "g_dev_clone(%s) = %p", name, *dev);
11792108Sphk		return;
11892108Sphk	}
11992108Sphk	/* g_topology_unlock(); */
12092108Sphk	return;
12192108Sphk}
12292108Sphk
12392108Sphkstatic void
12492108Sphkg_dev_register_cloner(void *foo __unused)
12592108Sphk{
12692108Sphk	static int once;
12792108Sphk
12896987Sphk	/* XXX: why would this happen more than once ?? */
12992108Sphk	if (!once) {
13096987Sphk		EVENTHANDLER_REGISTER(dev_clone, g_dev_clone, 0, 1000);
13192108Sphk		once++;
13292108Sphk	}
13392108Sphk}
13492108Sphk
13592108SphkSYSINIT(geomdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,g_dev_register_cloner,NULL);
13692108Sphk
137119593Sphkstruct g_provider *
138119593Sphkg_dev_getprovider(dev_t dev)
139119593Sphk{
140119593Sphk	struct g_consumer *cp;
141119593Sphk
142119593Sphk	if (dev == NULL)
143119593Sphk		return (NULL);
144119593Sphk	if (devsw(dev) != &g_dev_cdevsw)
145119593Sphk		return (NULL);
146119593Sphk	cp = dev->si_drv2;
147119593Sphk	return (cp->provider);
148119593Sphk}
149119593Sphk
150119593Sphk
15192108Sphkstatic struct g_geom *
15293250Sphkg_dev_taste(struct g_class *mp, struct g_provider *pp, int insist __unused)
15392108Sphk{
15492108Sphk	struct g_geom *gp;
15592108Sphk	struct g_consumer *cp;
156110540Sphk	static int unit = GEOM_MINOR_PROVIDERS;
15796987Sphk	int error;
15892108Sphk	dev_t dev;
15992108Sphk
16092108Sphk	g_trace(G_T_TOPOLOGY, "dev_taste(%s,%s)", mp->name, pp->name);
16192108Sphk	g_topology_assert();
16292108Sphk	LIST_FOREACH(cp, &pp->consumers, consumers)
16393248Sphk		if (cp->geom->class == mp)
16492108Sphk			return (NULL);
16592108Sphk	gp = g_new_geomf(mp, pp->name);
16693776Sphk	gp->orphan = g_dev_orphan;
16792108Sphk	cp = g_new_consumer(gp);
16896987Sphk	error = g_attach(cp, pp);
16996987Sphk	KASSERT(error == 0,
17096987Sphk	    ("g_dev_taste(%s) failed to g_attach, err=%d", pp->name, error));
17196987Sphk	/*
17296987Sphk	 * XXX: I'm not 100% sure we can call make_dev(9) without Giant
17396987Sphk	 * yet.  Once we can, we don't need to drop topology here either.
17496987Sphk	 */
17592108Sphk	g_topology_unlock();
17692108Sphk	mtx_lock(&Giant);
17796987Sphk	dev = make_dev(&g_dev_cdevsw, unit2minor(unit++),
178104316Sphk	    UID_ROOT, GID_OPERATOR, 0640, gp->name);
179110700Sphk	if (pp->flags & G_PF_CANDELETE)
180110700Sphk		dev->si_flags |= SI_CANDELETE;
18196987Sphk	mtx_unlock(&Giant);
18296987Sphk	g_topology_lock();
183110728Sphk	dev->si_iosize_max = MAXPHYS;
184110710Sphk	dev->si_stripesize = pp->stripesize;
185110710Sphk	dev->si_stripeoffset = pp->stripeoffset;
18692108Sphk	gp->softc = dev;
18792108Sphk	dev->si_drv1 = gp;
18892108Sphk	dev->si_drv2 = cp;
18992108Sphk	return (gp);
19092108Sphk}
19192108Sphk
19292108Sphkstatic int
19392108Sphkg_dev_open(dev_t dev, int flags, int fmt, struct thread *td)
19492108Sphk{
19592108Sphk	struct g_geom *gp;
19692108Sphk	struct g_consumer *cp;
19792108Sphk	int error, r, w, e;
19892108Sphk
19992108Sphk	gp = dev->si_drv1;
20092108Sphk	cp = dev->si_drv2;
201112978Sphk	if (gp == NULL || cp == NULL || gp->softc != dev)
202112978Sphk		return(ENXIO);		/* g_dev_taste() not done yet */
203112978Sphk
20492108Sphk	g_trace(G_T_ACCESS, "g_dev_open(%s, %d, %d, %p)",
20592108Sphk	    gp->name, flags, fmt, td);
20692108Sphk	r = flags & FREAD ? 1 : 0;
20792108Sphk	w = flags & FWRITE ? 1 : 0;
208103004Sphk#ifdef notyet
20992108Sphk	e = flags & O_EXCL ? 1 : 0;
210103004Sphk#else
211103004Sphk	e = 0;
212103004Sphk#endif
213112978Sphk	g_topology_lock();
214112978Sphk	if (dev->si_devsw == NULL)
215112978Sphk		error = ENXIO;		/* We were orphaned */
216112978Sphk	else
217125755Sphk		error = g_access(cp, r, w, e);
21892108Sphk	g_topology_unlock();
21998066Sphk	g_waitidle();
220112978Sphk	if (!error)
221112978Sphk		dev->si_bsize_phys = cp->provider->sectorsize;
22292108Sphk	return(error);
22392108Sphk}
22492108Sphk
22592108Sphkstatic int
22692108Sphkg_dev_close(dev_t dev, int flags, int fmt, struct thread *td)
22792108Sphk{
22892108Sphk	struct g_geom *gp;
22992108Sphk	struct g_consumer *cp;
230114864Sphk	int error, r, w, e, i;
23192108Sphk
23292108Sphk	gp = dev->si_drv1;
23392108Sphk	cp = dev->si_drv2;
23492108Sphk	if (gp == NULL || cp == NULL)
23592108Sphk		return(ENXIO);
23692108Sphk	g_trace(G_T_ACCESS, "g_dev_close(%s, %d, %d, %p)",
23792108Sphk	    gp->name, flags, fmt, td);
23892108Sphk	r = flags & FREAD ? -1 : 0;
23992108Sphk	w = flags & FWRITE ? -1 : 0;
240103004Sphk#ifdef notyet
24192108Sphk	e = flags & O_EXCL ? -1 : 0;
242103004Sphk#else
243103004Sphk	e = 0;
244103004Sphk#endif
245112978Sphk	g_topology_lock();
246112978Sphk	if (dev->si_devsw == NULL)
247112978Sphk		error = ENXIO;		/* We were orphaned */
248112978Sphk	else
249125755Sphk		error = g_access(cp, r, w, e);
250114864Sphk	for (i = 0; i < 10 * hz;) {
251114864Sphk		if (cp->acr != 0 || cp->acw != 0)
252114864Sphk			break;
253114864Sphk 		if (cp->nstart == cp->nend)
254114864Sphk			break;
255114864Sphk		tsleep(&i, PRIBIO, "gdevwclose", hz / 10);
256114864Sphk		i += hz / 10;
257114864Sphk	}
258114864Sphk	if (cp->acr == 0 && cp->acw == 0 && cp->nstart != cp->nend) {
259124880Sphk		printf("WARNING: Final close of geom_dev(%s) %s %s\n",
260114864Sphk		    gp->name,
261114864Sphk		    "still has outstanding I/O after 10 seconds.",
262114864Sphk		    "Completing close anyway, panic may happen later.");
263114864Sphk	}
26492108Sphk	g_topology_unlock();
26598066Sphk	g_waitidle();
26692108Sphk	return (error);
26792108Sphk}
26892108Sphk
269112978Sphk/*
270112978Sphk * XXX: Until we have unmessed the ioctl situation, there is a race against
271112978Sphk * XXX: a concurrent orphanization.  We cannot close it by holding topology
272112978Sphk * XXX: since that would prevent us from doing our job, and stalling events
273112978Sphk * XXX: will break (actually: stall) the BSD disklabel hacks.
274112978Sphk */
27592108Sphkstatic int
27692108Sphkg_dev_ioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct thread *td)
27792108Sphk{
278115515Sphk	struct g_geom *gp;
27992108Sphk	struct g_consumer *cp;
28095038Sphk	struct g_kerneldump kd;
28192108Sphk	int i, error;
28295038Sphk	u_int u;
28392108Sphk
28492108Sphk	gp = dev->si_drv1;
28592108Sphk	cp = dev->si_drv2;
28692108Sphk
28792108Sphk	error = 0;
288112978Sphk	KASSERT(cp->acr || cp->acw,
289112978Sphk	    ("Consumer with zero access count in g_dev_ioctl"));
29092403Sphk
29192698Sphk	i = IOCPARM_LEN(cmd);
29292698Sphk	switch (cmd) {
29392698Sphk	case DIOCGSECTORSIZE:
294105551Sphk		*(u_int *)data = cp->provider->sectorsize;
295105551Sphk		if (*(u_int *)data == 0)
296105180Snjl			error = ENOENT;
29792698Sphk		break;
29892698Sphk	case DIOCGMEDIASIZE:
299105551Sphk		*(off_t *)data = cp->provider->mediasize;
300105551Sphk		if (*(off_t *)data == 0)
301105180Snjl			error = ENOENT;
30292698Sphk		break;
30392698Sphk	case DIOCGFWSECTORS:
30493250Sphk		error = g_io_getattr("GEOM::fwsectors", cp, &i, data);
305105180Snjl		if (error == 0 && *(u_int *)data == 0)
306105180Snjl			error = ENOENT;
30792698Sphk		break;
30892698Sphk	case DIOCGFWHEADS:
30993250Sphk		error = g_io_getattr("GEOM::fwheads", cp, &i, data);
310105180Snjl		if (error == 0 && *(u_int *)data == 0)
311105180Snjl			error = ENOENT;
31292698Sphk		break;
31394287Sphk	case DIOCGFRONTSTUFF:
31494287Sphk		error = g_io_getattr("GEOM::frontstuff", cp, &i, data);
31594287Sphk		break;
31695038Sphk	case DIOCSKERNELDUMP:
31795038Sphk		u = *((u_int *)data);
31895038Sphk		if (!u) {
31995038Sphk			set_dumper(NULL);
32095038Sphk			error = 0;
32195038Sphk			break;
32295038Sphk		}
32395038Sphk		kd.offset = 0;
32495038Sphk		kd.length = OFF_MAX;
32595038Sphk		i = sizeof kd;
32695038Sphk		error = g_io_getattr("GEOM::kerneldump", cp, &i, &kd);
32795038Sphk		if (!error)
32895038Sphk			dev->si_flags |= SI_DUMPDEV;
32995038Sphk		break;
330104602Sphk
33192698Sphk	default:
332119660Sphk		if (cp->provider->geom->ioctl != NULL) {
333119660Sphk			error = cp->provider->geom->ioctl(cp->provider, cmd, data, td);
334119749Sphk		} else {
335119749Sphk			error = ENOIOCTL;
336119660Sphk		}
33792698Sphk	}
33892403Sphk
33998066Sphk	g_waitidle();
34092108Sphk	return (error);
34192108Sphk}
34292108Sphk
34392108Sphkstatic void
34492108Sphkg_dev_done(struct bio *bp2)
34592108Sphk{
34692108Sphk	struct bio *bp;
34792108Sphk
348110517Sphk	bp = bp2->bio_parent;
34992108Sphk	bp->bio_error = bp2->bio_error;
35092108Sphk	if (bp->bio_error != 0) {
35192108Sphk		g_trace(G_T_BIO, "g_dev_done(%p) had error %d",
35292108Sphk		    bp2, bp->bio_error);
35392108Sphk		bp->bio_flags |= BIO_ERROR;
35492108Sphk	} else {
355105540Sphk		g_trace(G_T_BIO, "g_dev_done(%p/%p) resid %ld completed %jd",
356105540Sphk		    bp2, bp, bp->bio_resid, (intmax_t)bp2->bio_completed);
35792108Sphk	}
35892108Sphk	bp->bio_resid = bp->bio_bcount - bp2->bio_completed;
35992108Sphk	g_destroy_bio(bp2);
36092108Sphk	biodone(bp);
36192108Sphk}
36292108Sphk
36392108Sphkstatic void
36492108Sphkg_dev_strategy(struct bio *bp)
36592108Sphk{
36692108Sphk	struct g_consumer *cp;
36792108Sphk	struct bio *bp2;
36892108Sphk	dev_t dev;
36992108Sphk
370106300Sphk	KASSERT(bp->bio_cmd == BIO_READ ||
371106300Sphk	        bp->bio_cmd == BIO_WRITE ||
372106300Sphk	        bp->bio_cmd == BIO_DELETE,
373106300Sphk		("Wrong bio_cmd bio=%p cmd=%d", bp, bp->bio_cmd));
37492108Sphk	dev = bp->bio_dev;
37592108Sphk	cp = dev->si_drv2;
376112978Sphk	KASSERT(cp->acr || cp->acw,
377112978Sphk	    ("Consumer with zero access count in g_dev_strategy"));
378112978Sphk
379118869Sphk	for (;;) {
380118869Sphk		/*
381118869Sphk		 * XXX: This is not an ideal solution, but I belive it to
382118869Sphk		 * XXX: deadlock safe, all things considered.
383118869Sphk		 */
384118869Sphk		bp2 = g_clone_bio(bp);
385118869Sphk		if (bp2 != NULL)
386118869Sphk			break;
387118869Sphk		tsleep(&bp, PRIBIO, "gdstrat", hz / 10);
388118869Sphk	}
389107834Sphk	KASSERT(bp2 != NULL, ("XXX: ENOMEM in a bad place"));
39092108Sphk	bp2->bio_length = (off_t)bp->bio_bcount;
39192108Sphk	bp2->bio_done = g_dev_done;
39292108Sphk	g_trace(G_T_BIO,
393105540Sphk	    "g_dev_strategy(%p/%p) offset %jd length %jd data %p cmd %d",
394105540Sphk	    bp, bp2, (intmax_t)bp->bio_offset, (intmax_t)bp2->bio_length,
395105540Sphk	    bp2->bio_data, bp2->bio_cmd);
39692108Sphk	g_io_request(bp2, cp);
397112978Sphk	KASSERT(cp->acr || cp->acw,
398112978Sphk	    ("g_dev_strategy raced with g_dev_close and lost"));
399112978Sphk
40092108Sphk}
40192108Sphk
40296987Sphk/*
40396987Sphk * g_dev_orphan()
40496987Sphk *
405112024Sphk * Called from below when the provider orphaned us.
406112024Sphk * - Clear any dump settings.
407112024Sphk * - Destroy the dev_t to prevent any more request from coming in.  The
408112024Sphk *   provider is already marked with an error, so anything which comes in
409112024Sphk *   in the interrim will be returned immediately.
410112024Sphk * - Wait for any outstanding I/O to finish.
411112024Sphk * - Set our access counts to zero, whatever they were.
412112024Sphk * - Detach and self-destruct.
41396987Sphk */
41492108Sphk
41592108Sphkstatic void
41693250Sphkg_dev_orphan(struct g_consumer *cp)
41792108Sphk{
41892108Sphk	struct g_geom *gp;
41992108Sphk	dev_t dev;
42092108Sphk
421112024Sphk	g_topology_assert();
42292108Sphk	gp = cp->geom;
423112024Sphk	dev = gp->softc;
42492108Sphk	g_trace(G_T_TOPOLOGY, "g_dev_orphan(%p(%s))", cp, gp->name);
425112024Sphk
426112024Sphk	/* Reset any dump-area set on this device */
42795038Sphk	if (dev->si_flags & SI_DUMPDEV)
42895038Sphk		set_dumper(NULL);
429112024Sphk
430112024Sphk	/* Destroy the dev_t so we get no more requests */
43192108Sphk	destroy_dev(dev);
432112024Sphk
433112024Sphk	/* Wait for the cows to come home */
434112024Sphk	while (cp->nstart != cp->nend)
435112978Sphk		msleep(&dev, NULL, PRIBIO, "gdevorphan", hz / 10);
436112024Sphk
43792108Sphk	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
438125755Sphk		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
439112024Sphk
44098066Sphk	g_detach(cp);
44192108Sphk	g_destroy_consumer(cp);
44292108Sphk	g_destroy_geom(gp);
44392108Sphk}
44492108Sphk
44596987SphkDECLARE_GEOM_CLASS(g_dev_class, g_dev);
446